diff --git a/docs/library.md b/docs/library.md index a5a9e23..a56c0d6 100644 --- a/docs/library.md +++ b/docs/library.md @@ -148,7 +148,7 @@ the following fields: |-------|------|-------------| | `user_login` | `str` | GitHub username that was scored | | `context_repo` | `str` | Repository used as scoring context | -| `raw_score` | `float` | Raw graph score before normalization | +| `raw_score` | `float` | Pre-normalization score: graph score (v1) or logit (v2) | | `normalized_score` | `float` | Normalized score (0.0 - 1.0) | | `trust_level` | `TrustLevel` | HIGH, MEDIUM, LOW, UNKNOWN, or BOT | | `percentile` | `float` | Percentile rank (0.0 - 1.0) | diff --git a/docs/mcp-server.md b/docs/mcp-server.md index 5c7b8f1..caf0fd3 100644 --- a/docs/mcp-server.md +++ b/docs/mcp-server.md @@ -180,7 +180,7 @@ Returns an expanded breakdown with contributions, flags, and metadata. "context_repo": "octocat/Hello-World", "trust_level": "HIGH", "normalized_score": 0.82, - "raw_score": 0.0045, + "raw_score": 0.2871, "account_age_days": 3650, "total_merged_prs": 47, "unique_repos_contributed": 12, diff --git a/docs/methodology.md b/docs/methodology.md index b5c36a0..6a7f4e9 100644 --- a/docs/methodology.md +++ b/docs/methodology.md @@ -101,7 +101,7 @@ These raw weights are normalized to sum to 1.0, so actual values in the random w The directed graph is scored using personalized graph-based ranking with a damping factor (alpha) of 0.85. This produces a raw score for the user node. -Normalization converts the raw score to a 0-1 range: +**v1:** Normalization converts the raw graph score to a 0-1 range: ``` baseline = 1 / num_nodes @@ -111,6 +111,18 @@ normalized = ratio / (ratio + 1) This sigmoid-like mapping means a score equal to the uniform baseline maps to 0.5, with diminishing returns above. +**v2:** The normalized graph score is combined with merge rate and account age into a logit, then passed through a sigmoid: + +``` +logit = intercept + graph_score_weight * graph_score + + merge_rate_weight * merge_rate + + account_age_weight * log(account_age_days + 1) + +normalized = 1 / (1 + e^(-logit)) +``` + +`raw_score` in the v2 output contains the pre-sigmoid logit value. + ### Classification | Level | Threshold | Meaning | diff --git a/src/good_egg/scorer.py b/src/good_egg/scorer.py index 162af21..f89bbdb 100644 --- a/src/good_egg/scorer.py +++ b/src/good_egg/scorer.py @@ -206,7 +206,7 @@ def _score_v2( return TrustScore( user_login=login, context_repo=context_repo, - raw_score=raw_score, + raw_score=logit, normalized_score=normalized, trust_level=trust_level, account_age_days=user_data.profile.account_age_days, diff --git a/tests/test_scorer.py b/tests/test_scorer.py index 9bb33fb..69dd81a 100644 --- a/tests/test_scorer.py +++ b/tests/test_scorer.py @@ -2,6 +2,7 @@ from __future__ import annotations +import math from datetime import UTC, datetime, timedelta from good_egg.config import GoodEggConfig @@ -426,8 +427,8 @@ def test_v2_scoring_produces_nonzero_score(self) -> None: data = _make_contribution_data(merged_prs=prs, repos=repos, closed_pr_count=5) result = scorer.score(data, "my-org/my-elixir-app") - assert result.raw_score > 0.0 - assert 0.0 <= result.normalized_score <= 1.0 + expected = 1.0 / (1.0 + math.exp(-result.raw_score)) + assert abs(result.normalized_score - expected) < 1e-9 assert result.scoring_model == "v2" def test_v2_component_scores_populated(self) -> None: