Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: CI

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
test:
name: Run all test suites (Python ${{ matrix.python-version }})
runs-on: ubuntu-latest

strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]

steps:
# ── 1. Checkout ────────────────────────────────────────────────────────
- name: Checkout repository
uses: actions/checkout@v4

# ── 2. Python setup ────────────────────────────────────────────────────
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip

# ── 3. Install runtime dependencies ───────────────────────────────────
- name: Install runtime dependencies
run: pip install -r requirements.txt

# ── 4. Install test dependencies ───────────────────────────────────────
# pytest — test runner
# httpx — required internally by starlette.testclient.TestClient
# These are not in requirements.txt (they are dev/test only).
- name: Install test dependencies
run: pip install pytest httpx

# ── 5. Unit tests ──────────────────────────────────────────────────────
# Tests tokenizer/slang_serializer.py round-trip.
# No network, no solver, no env vars needed.
- name: Run unit tests
run: pytest tests/unit/ -v --tb=short
env:
GROQ_API_KEY: ""

# ── 6. Integration tests ───────────────────────────────────────────────
# Tests POST /solve against the live Starlette app using TestClient.
# GROQ_API_KEY must be empty so FallbackSolver is always selected.
- name: Run integration tests
run: pytest tests/integration/ -v --tb=short
env:
GROQ_API_KEY: ""

# ── 7. Regression tests ────────────────────────────────────────────────
# Loads every .json fixture from tests/regression/fixtures/ and
# runs solver.solve() against it, comparing to the golden output.
# GROQ_API_KEY must be empty so FallbackSolver is always selected.
- name: Run regression tests
run: pytest tests/regression/ -v --tb=short
env:
GROQ_API_KEY: ""

# ── 8. Full suite run (all three together) ─────────────────────────────
# Runs all suites in one pytest process to catch any cross-suite
# singleton leakage that the per-suite runs would not detect.
# conftest.py's autouse fixture handles singleton reset between tests.
- name: Run full test suite
run: pytest tests/ -v --tb=short
env:
GROQ_API_KEY: ""
43 changes: 28 additions & 15 deletions api/_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,18 +136,31 @@ def get_solver():
_solver_error = str(exc)
print(f"[CalculusSolver] Groq load failed: {exc}", flush=True)

# 2. Fall back to deterministic solver (pure Python, no model)
from inference.fallback_solver import FallbackSolver

_solver = FallbackSolver()
_solver_mode = "fallback"
if not _solver_error:
_solver_error = "No GROQ_API_KEY provided. Falling back to deterministic solver."
print(
"[CalculusSolver] Running in FALLBACK mode — "
"supports diff, partial, integrate, gradient, tangent_line.",
flush=True,
)
# 2. Load B's real solver
try:
from inference.solve import CalculusSolverInference
_solver = CalculusSolverInference()
_solver_mode = "inference"
_solver_error = None
print(
"[CalculusSolver] B's solver loaded successfully.",
flush=True,
)
except Exception as exc:
_solver_error = str(exc)
print(f"[CalculusSolver] B's solver load failed: {exc}", flush=True)
# Final fallback — always available
from inference.fallback_solver import FallbackSolver

_solver = FallbackSolver()
_solver_mode = "fallback"
if not _solver_error:
_solver_error = "B's solver failed to load. Using deterministic fallback."
print(
"[CalculusSolver] Running in FALLBACK mode — "
"supports diff, partial, integrate, gradient, tangent_line.",
flush=True,
)
return _solver, _solver_mode


Expand Down Expand Up @@ -220,7 +233,7 @@ def fraction_to_latex(expr: dict) -> str:

def normalize_solver_result(result: dict, mode: str) -> dict:
"""Normalize/unwrap solver output into the standard API response format."""
if mode == "neural":
if mode in ("neural", "inference"):
output = result.get("output") or {}
if isinstance(output, dict) and "expr" in output:
expr = output["expr"]
Expand All @@ -235,10 +248,10 @@ def normalize_solver_result(result: dict, mode: str) -> dict:
"steps": steps,
"latex": latex,
"confidence": float(result.get("confidence", 0.0)),
"verified": result.get("verified"),
"verified": bool(result.get("verified")),
"warning": result.get("warning"),
"rule": result.get("rule"),
"mode": "neural",
"mode": mode,
}
else:
# Fallback and Groq solver results already have the correct structure
Expand Down
45 changes: 45 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
Root conftest.py — shared fixtures for all test suites.

Provides:
reset_solver_singleton — autouse fixture that clears the api._shared
solver singleton before every test. This prevents solver state from
leaking between test modules when pytest runs all suites in one process.

Explicitly removes GROQ_API_KEY from the environment so that
get_solver() always falls through to FallbackSolver in CI and locally.
"""

import os
import pytest


@pytest.fixture(autouse=True)
def reset_solver_singleton():
"""
Clear the solver singleton and remove GROQ_API_KEY before every test.

autouse=True means this runs automatically for every test in every
test file, with no need to import or reference it explicitly.

Yields control to the test, then does nothing on teardown — the
singleton stays reset for the next test because the next invocation
of this fixture will reset it again before that test runs.
"""
# Remove Groq key first so that any import of api._shared triggered
# below does not accidentally instantiate GroqSolver
os.environ.pop("GROQ_API_KEY", None)

# Import here (not at module level) to avoid circular import issues
# if conftest is loaded before the package is fully on sys.path
try:
import api._shared as _shared
_shared._solver = None
_shared._solver_mode = "unloaded"
_shared._solver_error = None
except ImportError:
# api._shared not importable yet (e.g. during unit-only runs
# where the api package is not needed). Safe to ignore.
pass

yield
Empty file added tests/__init__.py
Empty file.
Empty file added tests/integration/__init__.py
Empty file.
Loading