cipher813 · cipher813 · May 22, 2026 · May 22, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -24,8 +24,13 @@ jobs:
       - name: Install dependencies
         run: pip install -e ".[dev]"
 
-      - name: Run tests
-        run: pytest tests/ -v
+      - name: Run tests with coverage gate
+        # --cov-fail-under=80 enforces the project's 80% coverage floor
+        # per pyproject.toml [tool.coverage.report]. Build fails if a
+        # PR drops coverage below the gate. Configuration (source paths,
+        # omits, exclude_lines) lives in pyproject.toml so this stays
+        # a one-line invocation.
+        run: pytest tests/ -v --cov --cov-report=term-missing
 
   secrets:
     runs-on: ubuntu-latest

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,27 @@
 
 ## [0.7.0] - Unreleased
 
+### Test coverage
+
+- **CI now enforces ≥80% test coverage.** `pyproject.toml` gains
+  `[tool.coverage.run]` + `[tool.coverage.report]` config with
+  `fail_under = 80`; `ci.yml` runs `pytest --cov` so a PR that drops
+  coverage below the floor fails the build. Excluded modules
+  (`dashboard/*`, `__main__.py`, `upgrade.py`, `downgrade.py`,
+  `llm.py`) are under-testable-by-design and documented in the
+  config — Streamlit UI / entry-point shim / release-engineering
+  scripts requiring real Fly+AWS / deprecated optional-LLM module
+  the deployed product doesn't use.
+- **Current coverage: 86%** (suite 850 → 855 passing).
+- **README coverage badge** added: `coverage-86%-brightgreen`.
+  Static, manually updated on each release (matches the existing
+  static-badge pattern for Status / Python / License / MCP).
+- New `tests/test_nli.py` additions cover: `_ensure_loaded` HF
+  download failure → `NLIUnavailableError`; `_ensure_loaded`
+  unexpected label-set rejection; `prewarm()` swallows
+  unavailability per acceptable-secondary-observability category;
+  `classify_pair` softmax + input-building path with stubbed session.
+
 ### CI / release tooling
 
 - **New `.github/workflows/ci-server-extras.yml` workflow.** Installs

diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
 [![MCP](https://img.shields.io/badge/MCP-compatible-blueviolet.svg)](https://modelcontextprotocol.io)
 [![PyPI](https://img.shields.io/pypi/v/mnemon-memory.svg)](https://pypi.org/project/mnemon-memory/)
+[![Coverage](https://img.shields.io/badge/coverage-86%25-brightgreen.svg)]()
 
 > One memory vault. Every MCP client. Self-hosted.
 >

diff --git a/pyproject.toml b/pyproject.toml
@@ -77,6 +77,46 @@ markers = [
     "integration: end-to-end tests that spawn a real mnemon serve-remote subprocess (skip with -m 'not integration')",
 ]
 
+# Coverage configuration (used when pytest is invoked with --cov).
+# CI runs `pytest --cov=src/mnemon --cov-fail-under=80` so a regression
+# below the 80% floor fails the build. Excluded modules below are
+# under-testable-by-design — keeping them in the coverage denominator
+# would force fake/superficial tests that don't actually exercise the
+# code (Streamlit dashboard, entry-point shim, release-engineering
+# scripts that require real Fly/AWS).
+[tool.coverage.run]
+source = ["src/mnemon"]
+omit = [
+    # Streamlit dashboard — UI code that's only meaningfully testable
+    # by running Streamlit. Operator runs this; CI doesn't.
+    "src/mnemon/dashboard/*",
+    # __main__ entry-point shim — 4 lines, no logic
+    "src/mnemon/__main__.py",
+    # Release-engineering operations on Fly + AWS — exercised by the
+    # Layer-3 web test ritual (scripts/promote_stable.sh), not unit
+    # tests. Mocking the full upgrade/downgrade path adds maintenance
+    # cost without surfacing real bugs (those surface in Layer-3).
+    "src/mnemon/upgrade.py",
+    "src/mnemon/downgrade.py",
+    # Deprecated / optional LLM path — kept for local-mode operators
+    # who explicitly install [llm] extras. The deployed product is
+    # LLM-free by design (2026-05-22 decision); NLI replaced the only
+    # production use of this module (mnemon.contradiction). Tests
+    # cover the surface that's actually called.
+    "src/mnemon/llm.py",
+]
+
+[tool.coverage.report]
+fail_under = 80
+show_missing = true
+skip_empty = true
+exclude_lines = [
+    "pragma: no cover",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+]
+
 [tool.ruff.lint.per-file-ignores]
 # Streamlit convention: st.set_page_config(...) must precede any other
 # Streamlit command, so page modules import library code after the

diff --git a/tests/test_nli.py b/tests/test_nli.py
@@ -8,7 +8,7 @@
 
 from __future__ import annotations
 
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -121,3 +121,124 @@ def test_unavailable_error_has_descriptive_message(self):
         # The message survives string conversion (needed for the MCP
         # tool's clear-error path).
         assert "model load failed" in str(e)
+
+    def test_ensure_loaded_raises_on_hub_download_failure(self):
+        """Network / 403 / 404 errors during model download must
+        surface as NLIUnavailableError, not bubble unchanged."""
+        import mnemon.nli
+        original_session = mnemon.nli._session
+        mnemon.nli._session = None
+        try:
+            with patch(
+                "huggingface_hub.hf_hub_download",
+                side_effect=ConnectionError("simulated network failure"),
+            ):
+                with pytest.raises(NLIUnavailableError) as exc:
+                    mnemon.nli._ensure_loaded()
+            assert "simulated network failure" in str(exc.value)
+        finally:
+            mnemon.nli._session = original_session
+
+    def test_ensure_loaded_raises_on_unexpected_label_set(self, tmp_path):
+        """A model with a different label space than the expected
+        contradiction/entailment/neutral triple must fail fast at
+        load time, not produce mis-classifications downstream."""
+        import json as json_mod
+        import mnemon.nli
+
+        # Fake config + tokenizer files
+        config_path = tmp_path / "config.json"
+        config_path.write_text(json_mod.dumps({"id2label": {"0": "happy", "1": "sad"}}))
+        tokenizer_path = tmp_path / "tokenizer.json"
+        tokenizer_path.write_text('{"version":"1.0"}')  # minimal stub
+        onnx_path = tmp_path / "model.onnx"
+        onnx_path.write_bytes(b"")  # not actually loaded; rejected earlier
+
+        original_session = mnemon.nli._session
+        mnemon.nli._session = None
+        try:
+            def fake_download(repo_id, filename):
+                if filename == "config.json":
+                    return str(config_path)
+                if filename == "tokenizer.json":
+                    return str(tokenizer_path)
+                return str(onnx_path)
+
+            with patch("huggingface_hub.hf_hub_download", side_effect=fake_download):
+                with pytest.raises(NLIUnavailableError) as exc:
+                    mnemon.nli._ensure_loaded()
+            assert "unexpected label set" in str(exc.value)
+        finally:
+            mnemon.nli._session = original_session
+
+
+class TestPrewarm:
+    def test_prewarm_swallows_unavailability(self):
+        """prewarm() is best-effort observability — must NOT raise
+        even when the underlying model can't load. (Acceptable
+        swallow per feedback_no_silent_fails — pre-warm is secondary;
+        the first real call surfaces the named error.)"""
+        from mnemon.nli import prewarm
+        with patch("mnemon.nli._ensure_loaded",
+                   side_effect=NLIUnavailableError("simulated")):
+            prewarm()  # must not raise
+
+    def test_prewarm_calls_ensure_loaded(self):
+        from mnemon.nli import prewarm
+        with patch("mnemon.nli._ensure_loaded") as ensure:
+            prewarm()
+            assert ensure.called
+
+
+class TestClassifyPairTokenization:
+    """Smoke-test the input-building path with a stubbed session +
+    tokenizer. Exercises lines 164-189 (the input dict construction
+    + softmax over logits) without paying the real model load cost."""
+
+    def test_classify_pair_returns_argmax_label(self):
+        """Given fake logits that favor 'contradiction', the result
+        label is 'contradiction' and probs sum to ~1.0."""
+        import numpy as np
+        import mnemon.nli
+
+        # Stub the session: return fixed logits favoring contradiction (idx 0)
+        fake_session = MagicMock()
+        fake_session.get_inputs.return_value = [
+            MagicMock(name="input_ids"),
+            MagicMock(name="attention_mask"),
+        ]
+        # Configure .name attrs explicitly (MagicMock(name=...) doesn't
+        # set the attribute, just the repr)
+        for input_mock, real_name in zip(
+            fake_session.get_inputs.return_value, ["input_ids", "attention_mask"]
+        ):
+            input_mock.name = real_name
+        fake_session.run.return_value = [np.array([[5.0, -2.0, 0.0]], dtype=np.float32)]
+
+        fake_tokenizer = MagicMock()
+        fake_enc = MagicMock()
+        fake_enc.ids = [101, 1000, 102, 2000, 102]
+        fake_enc.attention_mask = [1, 1, 1, 1, 1]
+        fake_enc.type_ids = [0, 0, 0, 1, 1]
+        fake_tokenizer.encode.return_value = fake_enc
+
+        original_session = mnemon.nli._session
+        original_tokenizer = mnemon.nli._tokenizer
+        original_id2label = mnemon.nli._id2label
+        mnemon.nli._session = fake_session
+        mnemon.nli._tokenizer = fake_tokenizer
+        mnemon.nli._id2label = {0: "contradiction", 1: "entailment", 2: "neutral"}
+        try:
+            result = mnemon.nli.classify_pair("premise", "hypothesis")
+            assert result.label == "contradiction"
+            # Softmax probs sum to 1
+            assert abs(sum(result.probs.values()) - 1.0) < 1e-5
+            # Contradiction has highest prob
+            assert result.probs["contradiction"] > result.probs["entailment"]
+            assert result.probs["contradiction"] > result.probs["neutral"]
+        finally:
+            mnemon.nli._session = original_session
+            mnemon.nli._tokenizer = original_tokenizer
+            mnemon.nli._id2label = original_id2label
+
+