khezen · khezen · Feb 28, 2026 · Feb 28, 2026
diff --git a/.env.example b/.env.example
@@ -96,16 +96,15 @@ DEFAULT_MODEL=anthropic/claude-opus-4-6
 #     supply chain, scope identification, doc review. Requires strong reasoning.
 #     Recommended: anthropic/claude-opus-4-6 or equivalent.
 #
-#   Mid-tier (EXTRACTION_MODEL + DEDUPLICATION_MODEL): Structured field
-#     extraction and issue deduplication. Needs accuracy but not deep
-#     reasoning. Recommended: anthropic/claude-sonnet-4-5-20250929 or equivalent.
+#   Mid-tier (EXTRACTION_MODEL): Structured field extraction. Needs accuracy
+#     but not deep reasoning. Recommended: anthropic/claude-sonnet-4-5-20250929
+#     or equivalent.
 #
 #   Cheap (SUMMARIZATION_MODEL): PR summary generation. Simple synthesis.
 #     Recommended: anthropic/claude-haiku-4-5-20251001 or equivalent.
 #
 # By default, all models fall back to DEFAULT_MODEL. To optimize costs:
 # EXTRACTION_MODEL=anthropic/claude-sonnet-4-5-20250929
-# DEDUPLICATION_MODEL=anthropic/claude-sonnet-4-5-20250929
 # SUMMARIZATION_MODEL=anthropic/claude-haiku-4-5-20251001
 
 # =============================================================================
@@ -160,7 +159,6 @@ DEFAULT_MODEL=anthropic/claude-opus-4-6
 #   - DOC            (compares patches against documentation)
 #   - SCOPE          (code scope detection)
 #   - SUPPLY_CHAIN   (supply chain security analysis)
-#   - DEDUPLICATION  (issue deduplication)
 #   - SUMMARIZATION  (PR summary generation)
 #
 # Available settings per signature:
@@ -187,8 +185,5 @@ DEFAULT_MODEL=anthropic/claude-opus-4-6
 # SCOPE_MAX_ITERS=10
 # SCOPE_MAX_REASONING_TOKENS=1024
 
-# DEDUPLICATION_ENABLED=true
-# DEDUPLICATION_MODEL=anthropic/claude-sonnet-4-5-20250929
-
 # SUMMARIZATION_ENABLED=true
 # SUMMARIZATION_MODEL=anthropic/claude-haiku-4-5-20251001
diff --git a/.github/workflows/codespy-review.yml.example b/.github/workflows/codespy-review.yml.example
@@ -36,11 +36,10 @@ jobs:
         with:
           # Recommended model strategy (see README):
           #   🧠 Smart tier  — core analysis & reasoning
-          #   ⚡ Mid-tier    — field extraction & deduplication
+          #   ⚡ Mid-tier    — field extraction
           #   💰 Cheap tier  — summarization
           model: 'anthropic/claude-opus-4-6'
           extraction-model: 'anthropic/claude-sonnet-4-5-20250929'
-          deduplication-model: 'anthropic/claude-sonnet-4-5-20250929'
           summarization-model: 'anthropic/claude-haiku-4-5-20251001'
 
           # Required: Provide your LLM API key (use GitHub Secrets!)
@@ -72,7 +71,6 @@ jobs:
         with:
           model: 'anthropic/claude-opus-4-6'
           extraction-model: 'anthropic/claude-sonnet-4-5-20250929'
-          deduplication-model: 'anthropic/claude-sonnet-4-5-20250929'
           summarization-model: 'anthropic/claude-haiku-4-5-20251001'
           anthropic-api-key: ${{ secrets.ANTHROPIC_API_KEY }}
           pr-url: ${{ github.event.issue.pull_request.html_url }}
@@ -95,7 +93,6 @@ jobs:
 #       with:
 #         model: 'bedrock/us.anthropic.claude-opus-4-6-v1'
 #         extraction-model: 'bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0'
-#         deduplication-model: 'bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0'
 #         summarization-model: 'bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0'
 #         aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
 #         aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}

diff --git a/README.md b/README.md
@@ -89,7 +89,6 @@ Built for **engineering teams that care about correctness, security, and control
 - 🐛 **Bug Detection** - Identifies logic errors, null references, resource leaks, edge cases
 - 📝 **Documentation Review** - Checks for missing docstrings, outdated comments, incomplete docs
 - 🔍 **Intelligent Scope Detection** - Automatically identifies code scopes (frontend, backend, infra, microservice in mono repo, etc...)
-- 🔄 **Smart Deduplication** - LLM-powered issue deduplication across reviewers
 - 💰 **Cost Tracking** - Track LLM calls, tokens, and costs per review
 - 🤖 **Model Agnostic** - Works with OpenAI, AWS Bedrock, Anthropic, Ollama, and more via LiteLLM
 - 🐳 **Docker Ready** - Run locally or in the cloud with Docker
@@ -502,7 +501,7 @@ codespy uses a tiered model approach to balance review quality and cost:
 | Tier | Role | Default | Recommended Model | Used By |
 |------|------|---------|-------------------|---------|
 | 🧠 **Smart** | Core analysis & reasoning | `DEFAULT_MODEL` | `anthropic/claude-opus-4-6` | Code & doc review, supply chain, scope identification |
-| ⚡ **Mid-tier** | Extraction & deduplication | Falls back to `DEFAULT_MODEL` | `anthropic/claude-sonnet-4-5-20250929` | TwoStepAdapter field extraction, issue deduplication |
+| ⚡ **Mid-tier** | Field extraction | Falls back to `DEFAULT_MODEL` | `anthropic/claude-sonnet-4-5-20250929` | TwoStepAdapter field extraction |
 | 💰 **Cheap** | Summarization | Falls back to `DEFAULT_MODEL` | `anthropic/claude-haiku-4-5-20251001` | PR summary generation |
 
 By default, **all models use `DEFAULT_MODEL`** (`anthropic/claude-opus-4-6`). This works out of the box — just set your API credentials and go.
@@ -513,7 +512,6 @@ To optimize costs, override the mid-tier and cheap models:
 # .env or environment variables
 DEFAULT_MODEL=anthropic/claude-opus-4-6                    # Smart tier (default)
 EXTRACTION_MODEL=anthropic/claude-sonnet-4-5-20250929      # Mid-tier: field extraction
-DEDUPLICATION_MODEL=anthropic/claude-sonnet-4-5-20250929   # Mid-tier: issue deduplication
 SUMMARIZATION_MODEL=anthropic/claude-haiku-4-5-20251001    # Cheap tier: PR summary
 ```
 
@@ -523,8 +521,6 @@ Or in `codespy.yaml`:
 default_model: anthropic/claude-opus-4-6
 extraction_model: anthropic/claude-sonnet-4-5-20250929
 signatures:
-  deduplication:
-    model: anthropic/claude-sonnet-4-5-20250929
   summarization:
     model: anthropic/claude-haiku-4-5-20251001
 ```
@@ -651,11 +647,6 @@ output_git: true
 │  └──────────────────────────┬─────────────────────────────────┘     │
 │                             │                                       │
 │  ┌──────────────────────────▼─────────────────────────────────┐     │
-│  │                 Issue Deduplicator                         │     │
-│  │  (LLM-powered deduplication across reviewers)              │     │
-│  └──────────────────────────┬─────────────────────────────────┘     │
-│                             │                                       │
-│  ┌──────────────────────────▼─────────────────────────────────┐     │
 │  │                   PR Summarizer                            │     │
 │  │  (generates summary, quality assessment, recommendation)   │     │
 │  └────────────────────────────────────────────────────────────┘     │
@@ -694,7 +685,6 @@ The review is powered by DSPy signatures that structure the LLM's analysis:
 | **CodeReviewSignature** | `code_review` | Detects verified bugs, security vulnerabilities, removed defensive code, and code smells |
 | **DocReviewSignature** | `doc` | Detects stale or wrong documentation caused by code changes |
 | **SupplyChainSecuritySignature** | `supply_chain` | Analyzes artifacts (Dockerfiles) and dependencies for supply chain security |
-| **IssueDeduplicationSignature** | `deduplication` | LLM-powered deduplication of issues across reviewers |
 | **MRSummarySignature** | `summarization` | Generates summary, quality assessment, and recommendation |
 
 ## Supported Languages

diff --git a/action.yml b/action.yml
@@ -215,34 +215,6 @@ inputs:
     required: false
     default: 'false'
 
-  # ==========================================
-  # SIGNATURE: deduplication
-  # ==========================================
-  deduplication-enabled:
-    description: 'Enable issue deduplication signature'
-    required: false
-    default: 'true'
-
-  deduplication-model:
-    description: 'Model for deduplication (empty = use default)'
-    required: false
-
-  deduplication-max-iters:
-    description: 'Max iterations for deduplication'
-    required: false
-
-  deduplication-max-context-size:
-    description: 'Max context size for deduplication'
-    required: false
-
-  deduplication-max-reasoning-tokens:
-    description: 'Max reasoning tokens for deduplication'
-    required: false
-
-  deduplication-temperature:
-    description: 'Temperature for deduplication'
-    required: false
-
   # ==========================================
   # SIGNATURE: summarization
   # ==========================================
@@ -377,14 +349,6 @@ runs:
         SUPPLY_CHAIN_TEMPERATURE: ${{ inputs.supply-chain-temperature }}
         SUPPLY_CHAIN_SCAN_UNCHANGED: ${{ inputs.supply-chain-scan-unchanged }}
 
-        # Deduplication signature
-        DEDUPLICATION_ENABLED: ${{ inputs.deduplication-enabled }}
-        DEDUPLICATION_MODEL: ${{ inputs.deduplication-model }}
-        DEDUPLICATION_MAX_ITERS: ${{ inputs.deduplication-max-iters }}
-        DEDUPLICATION_MAX_CONTEXT_SIZE: ${{ inputs.deduplication-max-context-size }}
-        DEDUPLICATION_MAX_REASONING_TOKENS: ${{ inputs.deduplication-max-reasoning-tokens }}
-        DEDUPLICATION_TEMPERATURE: ${{ inputs.deduplication-temperature }}
-
         # Summarization signature
         SUMMARIZATION_ENABLED: ${{ inputs.summarization-enabled }}
         SUMMARIZATION_MODEL: ${{ inputs.summarization-model }}
@@ -453,14 +417,6 @@ runs:
         [ -n "$SUPPLY_CHAIN_TEMPERATURE" ] && DOCKER_ARGS="$DOCKER_ARGS -e SUPPLY_CHAIN_TEMPERATURE"
         [ -n "$SUPPLY_CHAIN_SCAN_UNCHANGED" ] && DOCKER_ARGS="$DOCKER_ARGS -e SUPPLY_CHAIN_SCAN_UNCHANGED"
 
-        # Deduplication
-        [ -n "$DEDUPLICATION_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e DEDUPLICATION_ENABLED"
-        [ -n "$DEDUPLICATION_MODEL" ] && DOCKER_ARGS="$DOCKER_ARGS -e DEDUPLICATION_MODEL"
-        [ -n "$DEDUPLICATION_MAX_ITERS" ] && DOCKER_ARGS="$DOCKER_ARGS -e DEDUPLICATION_MAX_ITERS"
-        [ -n "$DEDUPLICATION_MAX_CONTEXT_SIZE" ] && DOCKER_ARGS="$DOCKER_ARGS -e DEDUPLICATION_MAX_CONTEXT_SIZE"
-        [ -n "$DEDUPLICATION_MAX_REASONING_TOKENS" ] && DOCKER_ARGS="$DOCKER_ARGS -e DEDUPLICATION_MAX_REASONING_TOKENS"
-        [ -n "$DEDUPLICATION_TEMPERATURE" ] && DOCKER_ARGS="$DOCKER_ARGS -e DEDUPLICATION_TEMPERATURE"
-
         # Summarization
         [ -n "$SUMMARIZATION_ENABLED" ] && DOCKER_ARGS="$DOCKER_ARGS -e SUMMARIZATION_ENABLED"
         [ -n "$SUMMARIZATION_MODEL" ] && DOCKER_ARGS="$DOCKER_ARGS -e SUMMARIZATION_MODEL"

diff --git a/codespy.yaml b/codespy.yaml
@@ -71,21 +71,19 @@ gitlab:
 #     smells), supply chain, scope identification, doc review. Requires strong reasoning.
 #     Recommended: anthropic/claude-opus-4-6 or equivalent.
 #
-#   Mid-tier (extraction_model + deduplication): Used for structured field
-#     extraction (TwoStepAdapter) and issue deduplication. Needs accuracy but
-#     not deep reasoning. Recommended: anthropic/claude-sonnet-4-5-20250929 or equivalent.
+#   Mid-tier (extraction_model): Used for structured field
+#     extraction (TwoStepAdapter). Needs accuracy but not deep reasoning.
+#     Recommended: anthropic/claude-sonnet-4-5-20250929 or equivalent.
 #
 #   Cheap (summarization): Used for PR summary generation. Simple synthesis
 #     task. Recommended: anthropic/claude-haiku-4-5-20251001 or equivalent.
 #
-# By default, all models fall back to default_model. Override extraction_model,
-# deduplication model, and summarization model for cost optimization:
+# By default, all models fall back to default_model. Override extraction_model
+# and summarization model for cost optimization:
 #
 #   default_model: anthropic/claude-opus-4-6
 #   extraction_model: anthropic/claude-sonnet-4-5-20250929
 #   signatures:
-#     deduplication:
-#       model: anthropic/claude-sonnet-4-5-20250929
 #     summarization:
 #       model: anthropic/claude-haiku-4-5-20251001
 # ============================================================================
@@ -143,14 +141,6 @@ signatures:
     max_reasoning_tokens: null           # SCOPE_MAX_REASONING_TOKENS
     temperature: null                    # SCOPE_TEMPERATURE
 
-  # Deduplicator signature
-  deduplication:
-    enabled: true                        # DEDUPLICATION_ENABLED
-    model: null                          # DEDUPLICATION_MODEL (falls back to default_model)
-    max_context_size: null               # DEDUPLICATION_MAX_CONTEXT_SIZE
-    max_reasoning_tokens: null           # DEDUPLICATION_MAX_REASONING_TOKENS
-    temperature: null                    # DEDUPLICATION_TEMPERATURE
-
   # Summarizer signature
   summarization:
     enabled: true                        # SUMMARIZATION_ENABLED

diff --git a/src/codespy/agents/reviewer/models.py b/src/codespy/agents/reviewer/models.py
@@ -108,7 +108,7 @@ def location(self) -> str:
 class SignatureStatsResult(BaseModel):
     """Statistics for a single signature's execution during review."""
 
-    name: str = Field(description="Signature name (e.g., code_review, doc, scope, supply_chain, deduplication, summarization)")
+    name: str = Field(description="Signature name (e.g., code_review, doc, scope, supply_chain, summarization)")
     cost: float = Field(default=0.0, description="Cost in USD for this signature")
     tokens: int = Field(default=0, description="Tokens used by this signature")
     call_count: int = Field(default=0, description="Number of LLM calls made by this signature")

diff --git a/src/codespy/agents/reviewer/modules/__init__.py b/src/codespy/agents/reviewer/modules/__init__.py
@@ -1,15 +1,13 @@
 """DSPy modules for code review."""
 
 from codespy.agents.reviewer.modules.code_reviewer import CodeReviewer
-from codespy.agents.reviewer.modules.deduplicator import IssueDeduplicator
 from codespy.agents.reviewer.modules.doc_reviewer import DocReviewer
 from codespy.agents.reviewer.modules.scope_identifier import ScopeIdentifier
 from codespy.agents.reviewer.modules.supply_chain_auditor import SupplyChainAuditor
 
 __all__ = [
     "CodeReviewer",
     "DocReviewer",
-    "IssueDeduplicator",
     "ScopeIdentifier",
     "SupplyChainAuditor",
 ]
diff --git a/src/codespy/agents/reviewer/modules/deduplicator.py b/src/codespy/agents/reviewer/modules/deduplicator.py
diff --git a/src/codespy/agents/reviewer/reviewer.py b/src/codespy/agents/reviewer/reviewer.py
@@ -21,7 +21,6 @@
 from codespy.agents.reviewer.modules import (
     CodeReviewer,
     DocReviewer,
-    IssueDeduplicator,
     ScopeIdentifier,
     SupplyChainAuditor,
 )
@@ -79,7 +78,6 @@ def __init__(self, settings: Settings | None = None) -> None:
         self.code_reviewer = CodeReviewer()
         self.doc_reviewer = DocReviewer()
         self.supply_chain_auditor = SupplyChainAuditor()
-        self.deduplicator = IssueDeduplicator()
 
     def _verify_model_access(self) -> None:
         """Verify LLM model access."""
@@ -211,13 +209,8 @@ def forward(self, config: ReviewConfig) -> ReviewResult:
         all_issues = asyncio.run(
             self._run_review_modules(scopes, repo_path, module_names)
         )
-        logger.info(f"Found {len(all_issues)} issues before deduplication")
+        logger.info(f"Found {len(all_issues)} issues")
 
-        # Deduplicate issues across reviewers (deduplicator checks if enabled internally)
-        if len(all_issues) > 1:
-            logger.info("Deduplicating issues...")
-            all_issues = self.deduplicator(all_issues)
-            logger.info(f"After deduplication: {len(all_issues)} unique issues")
         # Collect in-scope files from identified scopes (excludes binaries, vendor, lock files, etc.)
         scoped_files = self._collect_scoped_files(scopes)
         logger.info(

diff --git a/src/codespy/agents/reviewer/server.py b/src/codespy/agents/reviewer/server.py
@@ -104,7 +104,7 @@ async def review_local_changes(
     No PR or remote platform required — works with any local git repository.
     Diffs the current HEAD against the base_ref to find changed files, then runs
     the full codespy review pipeline (scope identification, code & doc review,
-    supply chain audit, deduplication, and summarization).
+    supply chain audit, and summarization).
 
     Args:
         repo_path: Absolute path to the local git repository to review