diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..192b05d --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,76 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + name: Run all test suites (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + # ── 1. Checkout ──────────────────────────────────────────────────────── + - name: Checkout repository + uses: actions/checkout@v4 + + # ── 2. Python setup ──────────────────────────────────────────────────── + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + # ── 3. Install runtime dependencies ─────────────────────────────────── + - name: Install runtime dependencies + run: pip install -r requirements.txt + + # ── 4. Install test dependencies ─────────────────────────────────────── + # pytest — test runner + # httpx — required internally by starlette.testclient.TestClient + # These are not in requirements.txt (they are dev/test only). + - name: Install test dependencies + run: pip install pytest httpx + + # ── 5. Unit tests ────────────────────────────────────────────────────── + # Tests tokenizer/slang_serializer.py round-trip. + # No network, no solver, no env vars needed. + - name: Run unit tests + run: pytest tests/unit/ -v --tb=short + env: + GROQ_API_KEY: "" + + # ── 6. Integration tests ─────────────────────────────────────────────── + # Tests POST /solve against the live Starlette app using TestClient. + # GROQ_API_KEY must be empty so FallbackSolver is always selected. + - name: Run integration tests + run: pytest tests/integration/ -v --tb=short + env: + GROQ_API_KEY: "" + + # ── 7. Regression tests ──────────────────────────────────────────────── + # Loads every .json fixture from tests/regression/fixtures/ and + # runs solver.solve() against it, comparing to the golden output. + # GROQ_API_KEY must be empty so FallbackSolver is always selected. + - name: Run regression tests + run: pytest tests/regression/ -v --tb=short + env: + GROQ_API_KEY: "" + + # ── 8. Full suite run (all three together) ───────────────────────────── + # Runs all suites in one pytest process to catch any cross-suite + # singleton leakage that the per-suite runs would not detect. + # conftest.py's autouse fixture handles singleton reset between tests. + - name: Run full test suite + run: pytest tests/ -v --tb=short + env: + GROQ_API_KEY: "" diff --git a/api/_shared.py b/api/_shared.py index 3dad29f..5d7edf1 100644 --- a/api/_shared.py +++ b/api/_shared.py @@ -136,18 +136,31 @@ def get_solver(): _solver_error = str(exc) print(f"[CalculusSolver] Groq load failed: {exc}", flush=True) - # 2. Fall back to deterministic solver (pure Python, no model) - from inference.fallback_solver import FallbackSolver - - _solver = FallbackSolver() - _solver_mode = "fallback" - if not _solver_error: - _solver_error = "No GROQ_API_KEY provided. Falling back to deterministic solver." - print( - "[CalculusSolver] Running in FALLBACK mode — " - "supports diff, partial, integrate, gradient, tangent_line.", - flush=True, - ) + # 2. Load B's real solver + try: + from inference.solve import CalculusSolverInference + _solver = CalculusSolverInference() + _solver_mode = "inference" + _solver_error = None + print( + "[CalculusSolver] B's solver loaded successfully.", + flush=True, + ) + except Exception as exc: + _solver_error = str(exc) + print(f"[CalculusSolver] B's solver load failed: {exc}", flush=True) + # Final fallback — always available + from inference.fallback_solver import FallbackSolver + + _solver = FallbackSolver() + _solver_mode = "fallback" + if not _solver_error: + _solver_error = "B's solver failed to load. Using deterministic fallback." + print( + "[CalculusSolver] Running in FALLBACK mode — " + "supports diff, partial, integrate, gradient, tangent_line.", + flush=True, + ) return _solver, _solver_mode @@ -220,7 +233,7 @@ def fraction_to_latex(expr: dict) -> str: def normalize_solver_result(result: dict, mode: str) -> dict: """Normalize/unwrap solver output into the standard API response format.""" - if mode == "neural": + if mode in ("neural", "inference"): output = result.get("output") or {} if isinstance(output, dict) and "expr" in output: expr = output["expr"] @@ -235,10 +248,10 @@ def normalize_solver_result(result: dict, mode: str) -> dict: "steps": steps, "latex": latex, "confidence": float(result.get("confidence", 0.0)), - "verified": result.get("verified"), + "verified": bool(result.get("verified")), "warning": result.get("warning"), "rule": result.get("rule"), - "mode": "neural", + "mode": mode, } else: # Fallback and Groq solver results already have the correct structure diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..3bdb68a --- /dev/null +++ b/conftest.py @@ -0,0 +1,45 @@ +""" +Root conftest.py — shared fixtures for all test suites. + +Provides: + reset_solver_singleton — autouse fixture that clears the api._shared + solver singleton before every test. This prevents solver state from + leaking between test modules when pytest runs all suites in one process. + + Explicitly removes GROQ_API_KEY from the environment so that + get_solver() always falls through to FallbackSolver in CI and locally. +""" + +import os +import pytest + + +@pytest.fixture(autouse=True) +def reset_solver_singleton(): + """ + Clear the solver singleton and remove GROQ_API_KEY before every test. + + autouse=True means this runs automatically for every test in every + test file, with no need to import or reference it explicitly. + + Yields control to the test, then does nothing on teardown — the + singleton stays reset for the next test because the next invocation + of this fixture will reset it again before that test runs. + """ + # Remove Groq key first so that any import of api._shared triggered + # below does not accidentally instantiate GroqSolver + os.environ.pop("GROQ_API_KEY", None) + + # Import here (not at module level) to avoid circular import issues + # if conftest is loaded before the package is fully on sys.path + try: + import api._shared as _shared + _shared._solver = None + _shared._solver_mode = "unloaded" + _shared._solver_error = None + except ImportError: + # api._shared not importable yet (e.g. during unit-only runs + # where the api package is not needed). Safe to ignore. + pass + + yield diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/integration/test_solve_endpoint.py b/tests/integration/test_solve_endpoint.py new file mode 100644 index 0000000..14d9af1 --- /dev/null +++ b/tests/integration/test_solve_endpoint.py @@ -0,0 +1,506 @@ +""" +Integration tests for POST /solve against the FallbackSolver. + +Uses Starlette's TestClient (synchronous, no running server needed). +The FallbackSolver is the "stub solver" — it runs when no GROQ_API_KEY +is set and no neural checkpoint exists, which is always true in CI. + +Covers: + - Health check + - Happy-path solve for every supported operation + - Response shape invariants + - Step object structure + - LaTeX field presence and content + - Error handling (400, 422, 503 paths) +""" + +import os +import pytest + +# ── Reset solver singleton BEFORE importing the app ─────────────────────────── +# _shared uses module-level globals to cache the solver after first load. +# We clear them here so tests always start with FallbackSolver, never Groq. +os.environ.pop("GROQ_API_KEY", None) + +import api._shared as _shared +_shared._solver = None +_shared._solver_mode = "unloaded" +_shared._solver_error = None + +import warnings +with warnings.catch_warnings(): + warnings.simplefilter("ignore") + from starlette.testclient import TestClient +from api.app import app + +client = TestClient(app, raise_server_exceptions=True) + + +# ── Helper ───────────────────────────────────────────────────────────────────── + +def make_envelope(op: str, var: str, terms: list, deno=1, point: dict = None) -> dict: + """Build a standard SLaNg input envelope for POST /solve.""" + inner = { + "op": op, + "var": var, + "expr": { + "numi": {"terms": terms}, + "deno": deno + } + } + if point is not None: + inner["point"] = point + return {"input": inner} + + +# ── Health check ─────────────────────────────────────────────────────────────── + +def test_health_returns_200(): + r = client.get("/api/health") + assert r.status_code == 200 + + +def test_health_status_is_ok(): + r = client.get("/api/health") + assert r.json()["status"] == "ok" + + +def test_health_solver_mode_is_fallback(): + r = client.get("/api/health") + assert r.json()["solver_mode"] == "fallback" + + +def test_health_solver_loaded_is_true(): + r = client.get("/api/health") + assert r.json()["solver_loaded"] is True + + +def test_health_also_reachable_without_api_prefix(): + r = client.get("/health") + assert r.status_code == 200 + assert r.json()["status"] == "ok" + + +# ── POST /solve — diff ───────────────────────────────────────────────────────── + +def test_diff_single_term_status_200(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + + +def test_diff_single_term_status_field(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.json()["status"] == "solved" + + +def test_diff_single_term_mode_is_fallback(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.json()["mode"] == "fallback" + + +def test_diff_single_term_verified_true(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.json()["verified"] is True + + +def test_diff_single_term_correct_coefficient(): + # d/dx (3x²) = 6x → coeff=6 + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + terms = r.json()["expr"]["numi"]["terms"] + assert any(t["coeff"] == 6 for t in terms) + + +def test_diff_single_term_correct_exponent(): + # d/dx (3x²) = 6x → x^1 + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + terms = r.json()["expr"]["numi"]["terms"] + assert any(t.get("var", {}).get("x") == 1 for t in terms) + + +def test_diff_single_term_latex_contains_x(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert "x" in r.json()["latex"] + + +def test_diff_constant_term_gives_zero(): + # d/dx (7) = 0 + payload = make_envelope("diff", "x", [{"coeff": 7}]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + terms = r.json()["expr"]["numi"]["terms"] + assert all(t["coeff"] == 0 for t in terms) + + +def test_diff_multi_term_polynomial(): + # d/dx (3x² - x + 5) = 6x - 1 + payload = make_envelope("diff", "x", [ + {"coeff": 3, "var": {"x": 2}}, + {"coeff": -1, "var": {"x": 1}}, + {"coeff": 5} + ]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + terms = r.json()["expr"]["numi"]["terms"] + coeff_by_power = {t.get("var", {}).get("x", 0): t["coeff"] for t in terms} + assert coeff_by_power.get(1) == 6 # 6x term + assert coeff_by_power.get(0) == -1 # -1 constant term + + +def test_diff_confidence_is_one(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.json()["confidence"] == 1.0 + + +def test_diff_also_reachable_without_api_prefix(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + + +# ── POST /solve — integrate ──────────────────────────────────────────────────── + +def test_integrate_single_term_status_200(): + payload = make_envelope("integrate", "x", [{"coeff": 6, "var": {"x": 1}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + + +def test_integrate_single_term_correct_coefficient(): + # ∫ 6x dx = 3x² → coeff=3.0 + payload = make_envelope("integrate", "x", [{"coeff": 6, "var": {"x": 1}}]) + r = client.post("/solve", json=payload) + terms = r.json()["expr"]["numi"]["terms"] + assert any(t["coeff"] == 3.0 for t in terms) + + +def test_integrate_single_term_correct_exponent(): + # ∫ 6x dx = 3x² → x^2 + payload = make_envelope("integrate", "x", [{"coeff": 6, "var": {"x": 1}}]) + r = client.post("/solve", json=payload) + terms = r.json()["expr"]["numi"]["terms"] + assert any(t.get("var", {}).get("x") == 2 for t in terms) + + +def test_integrate_verified_true(): + payload = make_envelope("integrate", "x", [{"coeff": 6, "var": {"x": 1}}]) + r = client.post("/solve", json=payload) + assert r.json()["verified"] is True + + +def test_integrate_step_rule_is_power_rule_integral(): + payload = make_envelope("integrate", "x", [{"coeff": 6, "var": {"x": 1}}]) + r = client.post("/solve", json=payload) + steps = r.json()["steps"] + assert any(s["rule"] == "power_rule_integral" for s in steps) + + +def test_integrate_constant_term(): + # ∫ 4 dx = 4x + payload = make_envelope("integrate", "x", [{"coeff": 4}]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + terms = r.json()["expr"]["numi"]["terms"] + assert any(t.get("var", {}).get("x") == 1 for t in terms) + + +# ── POST /solve — partial ────────────────────────────────────────────────────── + +def test_partial_status_200(): + payload = make_envelope("partial", "x", [{"coeff": 5, "var": {"x": 3}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + + +def test_partial_status_field_is_solved(): + payload = make_envelope("partial", "x", [{"coeff": 5, "var": {"x": 3}}]) + r = client.post("/solve", json=payload) + assert r.json()["status"] == "solved" + + +def test_partial_treats_other_var_as_constant(): + # ∂/∂x (3x²y) — y is treated as a constant by fallback + payload = make_envelope("partial", "x", [{"coeff": 3, "var": {"x": 2, "y": 1}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + + +def test_partial_step_rule_is_power_rule(): + payload = make_envelope("partial", "x", [{"coeff": 5, "var": {"x": 3}}]) + r = client.post("/solve", json=payload) + steps = r.json()["steps"] + assert any(s["rule"] == "power_rule" for s in steps) + + +# ── POST /solve — gradient ───────────────────────────────────────────────────── + +def test_gradient_status_200(): + payload = make_envelope("gradient", "x", [ + {"coeff": 1, "var": {"x": 2}}, + {"coeff": 1, "var": {"y": 2}} + ]) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + + +def test_gradient_expr_contains_gradient_key(): + payload = make_envelope("gradient", "x", [ + {"coeff": 1, "var": {"x": 2}}, + {"coeff": 1, "var": {"y": 2}} + ]) + r = client.post("/solve", json=payload) + assert "gradient" in r.json()["expr"] + + +def test_gradient_contains_partial_for_x(): + payload = make_envelope("gradient", "x", [ + {"coeff": 1, "var": {"x": 2}}, + {"coeff": 1, "var": {"y": 2}} + ]) + r = client.post("/solve", json=payload) + assert "x" in r.json()["expr"]["gradient"] + + +def test_gradient_contains_partial_for_y(): + payload = make_envelope("gradient", "x", [ + {"coeff": 1, "var": {"x": 2}}, + {"coeff": 1, "var": {"y": 2}} + ]) + r = client.post("/solve", json=payload) + assert "y" in r.json()["expr"]["gradient"] + + +def test_gradient_latex_contains_nabla(): + payload = make_envelope("gradient", "x", [ + {"coeff": 1, "var": {"x": 2}}, + {"coeff": 1, "var": {"y": 2}} + ]) + r = client.post("/solve", json=payload) + assert "nabla" in r.json()["latex"] or "∇" in r.json()["latex"] + + +# ── POST /solve — tangent_line ───────────────────────────────────────────────── + +def test_tangent_line_status_200(): + payload = make_envelope( + "tangent_line", "x", + [{"coeff": 1, "var": {"x": 2}}], + point={"x": 2} + ) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + + +def test_tangent_line_latex_contains_y_equals(): + # Tangent line is always formatted as "y = ..." + payload = make_envelope( + "tangent_line", "x", + [{"coeff": 1, "var": {"x": 2}}], + point={"x": 2} + ) + r = client.post("/solve", json=payload) + assert "y" in r.json()["latex"] + + +def test_tangent_line_at_x2_slope_is_4(): + # f(x) = x² → f'(x) = 2x → slope at x=2 is 4 + payload = make_envelope( + "tangent_line", "x", + [{"coeff": 1, "var": {"x": 2}}], + point={"x": 2} + ) + r = client.post("/solve", json=payload) + terms = r.json()["expr"]["numi"]["terms"] + slope_term = next( + (t for t in terms if t.get("var", {}).get("x") == 1), None + ) + assert slope_term is not None + assert slope_term["coeff"] == 4.0 + + +def test_tangent_line_missing_point_returns_422(): + # tangent_line without a point must raise ValueError → 422 + payload = make_envelope("tangent_line", "x", [{"coeff": 1, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 422 + + +# ── Response shape invariants ────────────────────────────────────────────────── + +REQUIRED_RESPONSE_KEYS = {"status", "expr", "steps", "latex", "confidence", "verified", "mode"} + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), + ("partial", [{"coeff": 5, "var": {"x": 3}}]), +]) +def test_response_has_all_required_keys(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + assert r.status_code == 200 + body = r.json() + missing = REQUIRED_RESPONSE_KEYS - body.keys() + assert not missing, f"Response missing keys: {missing}" + + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), + ("partial", [{"coeff": 5, "var": {"x": 3}}]), +]) +def test_expr_has_numi_and_deno(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + expr = r.json()["expr"] + assert "numi" in expr or "gradient" in expr # gradient op has different expr shape + + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), +]) +def test_latex_is_nonempty_string(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + latex = r.json()["latex"] + assert isinstance(latex, str) + assert len(latex) > 0 + + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), +]) +def test_confidence_is_float(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + assert isinstance(r.json()["confidence"], float) + + +# ── Step object structure ────────────────────────────────────────────────────── + +REQUIRED_STEP_KEYS = {"rule", "description", "before", "after"} + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), + ("partial", [{"coeff": 5, "var": {"x": 3}}]), +]) +def test_steps_is_list(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + assert isinstance(r.json()["steps"], list) + + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), + ("partial", [{"coeff": 5, "var": {"x": 3}}]), +]) +def test_steps_has_at_least_one_entry(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + assert len(r.json()["steps"]) >= 1 + + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), +]) +def test_each_step_has_required_keys(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + for step in r.json()["steps"]: + missing = REQUIRED_STEP_KEYS - step.keys() + assert not missing, f"Step missing keys: {missing}. Step was: {step}" + + +@pytest.mark.parametrize("op,terms", [ + ("diff", [{"coeff": 3, "var": {"x": 2}}]), + ("integrate", [{"coeff": 2, "var": {"x": 1}}]), +]) +def test_each_step_fields_are_strings(op, terms): + payload = make_envelope(op, "x", terms) + r = client.post("/solve", json=payload) + for step in r.json()["steps"]: + for key in ("rule", "description", "before", "after"): + assert isinstance(step[key], str), ( + f"Step field '{key}' is not a string: {step[key]!r}" + ) + + +# ── Error handling ───────────────────────────────────────────────────────────── + +def test_invalid_json_body_returns_400(): + r = client.post( + "/solve", + content=b"this is not json", + headers={"Content-Type": "application/json"} + ) + assert r.status_code == 400 + + +def test_input_not_a_dict_returns_422(): + r = client.post("/solve", json={"input": "not-an-object"}) + assert r.status_code == 422 + + +def test_missing_expr_field_returns_422(): + # op and var present but no expr — FallbackSolver raises ValueError + r = client.post("/solve", json={"input": {"op": "diff", "var": "x"}}) + assert r.status_code == 422 + + +def test_unsupported_op_returns_422(): + # taylor is not supported by FallbackSolver → raises ValueError → 422 + payload = make_envelope("taylor", "x", [{"coeff": 1, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 422 + + +def test_unsupported_op_hessian_returns_422(): + payload = make_envelope("hessian", "x", [{"coeff": 1, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 422 + + +def test_unsupported_op_lagrange_returns_422(): + payload = make_envelope("lagrange", "x", [{"coeff": 1, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert r.status_code == 422 + + +def test_empty_json_object_returns_422(): + # {} has no expr field → solver raises ValueError + r = client.post("/solve", json={}) + assert r.status_code == 422 + + +def test_response_content_type_is_json(): + payload = make_envelope("diff", "x", [{"coeff": 3, "var": {"x": 2}}]) + r = client.post("/solve", json=payload) + assert "application/json" in r.headers.get("content-type", "") + + +# ── Direct envelope (no "input" wrapper) ────────────────────────────────────── + +def test_direct_envelope_without_input_wrapper_accepted(): + # The API does body.get("input", body) — so a direct envelope also works + direct = { + "op": "diff", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 3, "var": {"x": 2}}]}, + "deno": 1 + } + } + r = client.post("/solve", json=direct) + assert r.status_code == 200 + assert r.json()["status"] == "solved" diff --git a/tests/regression/__init__.py b/tests/regression/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/regression/fixtures/diff_3x2.json b/tests/regression/fixtures/diff_3x2.json new file mode 100644 index 0000000..bfcfd52 --- /dev/null +++ b/tests/regression/fixtures/diff_3x2.json @@ -0,0 +1,20 @@ +{ + "input": { + "op": "diff", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 3, "var": {"x": 2}}]}, + "deno": 1 + } + }, + "expected": { + "status": "solved", + "verified": true, + "latex": "6x", + "rule": "power_rule", + "expr": { + "numi": {"terms": [{"coeff": 6, "var": {"x": 1}}]}, + "deno": 1 + } + } +} diff --git a/tests/regression/fixtures/diff_constant.json b/tests/regression/fixtures/diff_constant.json new file mode 100644 index 0000000..8b4a97d --- /dev/null +++ b/tests/regression/fixtures/diff_constant.json @@ -0,0 +1,18 @@ +{ + "input": { + "op": "diff", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 7}]}, + "deno": 1 + } + }, + "expected": { + "status": "solved", + "verified": true, + "expr": { + "numi": {"terms": [{"coeff": 0}]}, + "deno": 1 + } + } +} diff --git a/tests/regression/fixtures/diff_multi_term.json b/tests/regression/fixtures/diff_multi_term.json new file mode 100644 index 0000000..b11760b --- /dev/null +++ b/tests/regression/fixtures/diff_multi_term.json @@ -0,0 +1,26 @@ +{ + "input": { + "op": "diff", + "var": "x", + "expr": { + "numi": {"terms": [ + {"coeff": 3, "var": {"x": 2}}, + {"coeff": -1, "var": {"x": 1}}, + {"coeff": 5} + ]}, + "deno": 1 + } + }, + "expected": { + "status": "solved", + "verified": true, + "rule": "power_rule", + "expr": { + "numi": {"terms": [ + {"coeff": 6, "var": {"x": 1}}, + {"coeff": -1} + ]}, + "deno": 1 + } + } +} diff --git a/tests/regression/fixtures/gradient_x2_y2.json b/tests/regression/fixtures/gradient_x2_y2.json new file mode 100644 index 0000000..6598b56 --- /dev/null +++ b/tests/regression/fixtures/gradient_x2_y2.json @@ -0,0 +1,28 @@ +{ + "input": { + "op": "gradient", + "var": "x", + "expr": { + "numi": {"terms": [ + {"coeff": 1, "var": {"x": 2}}, + {"coeff": 1, "var": {"y": 2}} + ]}, + "deno": 1 + } + }, + "expected": { + "status": "solved", + "verified": true, + "rule": "gradient", + "gradient_partials": { + "x": { + "numi": {"terms": [{"coeff": 2, "var": {"x": 1}}]}, + "deno": 1 + }, + "y": { + "numi": {"terms": [{"coeff": 2, "var": {"y": 1}}]}, + "deno": 1 + } + } + } +} diff --git a/tests/regression/fixtures/integrate_6x.json b/tests/regression/fixtures/integrate_6x.json new file mode 100644 index 0000000..dfb56c7 --- /dev/null +++ b/tests/regression/fixtures/integrate_6x.json @@ -0,0 +1,19 @@ +{ + "input": { + "op": "integrate", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 6, "var": {"x": 1}}]}, + "deno": 1 + } + }, + "expected": { + "status": "solved", + "verified": true, + "rule": "power_rule_integral", + "expr": { + "numi": {"terms": [{"coeff": 3.0, "var": {"x": 2}}]}, + "deno": 1 + } + } +} diff --git a/tests/regression/fixtures/integrate_constant.json b/tests/regression/fixtures/integrate_constant.json new file mode 100644 index 0000000..78febc9 --- /dev/null +++ b/tests/regression/fixtures/integrate_constant.json @@ -0,0 +1,19 @@ +{ + "input": { + "op": "integrate", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 4}]}, + "deno": 1 + } + }, + "expected": { + "status": "solved", + "verified": true, + "rule": "power_rule_integral", + "expr": { + "numi": {"terms": [{"coeff": 4.0, "var": {"x": 1}}]}, + "deno": 1 + } + } +} diff --git a/tests/regression/fixtures/partial_5x3.json b/tests/regression/fixtures/partial_5x3.json new file mode 100644 index 0000000..f1e5e2f --- /dev/null +++ b/tests/regression/fixtures/partial_5x3.json @@ -0,0 +1,19 @@ +{ + "input": { + "op": "partial", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 5, "var": {"x": 3}}]}, + "deno": 1 + } + }, + "expected": { + "status": "solved", + "verified": true, + "rule": "power_rule", + "expr": { + "numi": {"terms": [{"coeff": 15, "var": {"x": 2}}]}, + "deno": 1 + } + } +} diff --git a/tests/regression/fixtures/tangent_line_x2_at_2.json b/tests/regression/fixtures/tangent_line_x2_at_2.json new file mode 100644 index 0000000..9b4b725 --- /dev/null +++ b/tests/regression/fixtures/tangent_line_x2_at_2.json @@ -0,0 +1,17 @@ +{ + "input": { + "op": "tangent_line", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 1, "var": {"x": 2}}]}, + "deno": 1 + }, + "point": {"x": 2} + }, + "expected": { + "status": "solved", + "verified": true, + "rule": "tangent_line", + "latex": "y = 4.0x - 4.0" + } +} diff --git a/tests/regression/test_regression.py b/tests/regression/test_regression.py new file mode 100644 index 0000000..9a66dc6 --- /dev/null +++ b/tests/regression/test_regression.py @@ -0,0 +1,261 @@ +""" +Regression test scaffolding for CalculusSolver FallbackSolver. + +Design: + - Every fixture in tests/regression/fixtures/*.json is auto-discovered. + - Each fixture contains an "input" envelope and an "expected" subset. + - The test runs solver.solve(input) and asserts each field in "expected". + - Adding a new regression case = drop a new .json file. No code changes needed. + +Comparison strategy for "expr": + - Never compare expr dicts with == (int/float coeff differences cause false failures). + - Instead, re-serialize both sides with serialize_slang_math and compare token lists. + +Gradient fixtures use "gradient_partials" instead of "expr" because the gradient +result has a different shape: {"gradient": {"x": ..., "y": ...}}. +""" + +import json +import os + +import pytest + +# ── Force FallbackSolver — no Groq, no neural ───────────────────────────────── +os.environ.pop("GROQ_API_KEY", None) + +from inference.fallback_solver import FallbackSolver +from tokenizer.slang_serializer import serialize_slang_math + +FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures") + +solver = FallbackSolver() + + +# ── Fixture loader ───────────────────────────────────────────────────────────── + +def load_fixtures(): + """ + Auto-discover every .json file in the fixtures directory. + Returns a list of pytest.param objects, one per fixture file. + Each param is labelled with the filename (minus .json) for clear test IDs. + """ + cases = [] + for fname in sorted(os.listdir(FIXTURES_DIR)): + if not fname.endswith(".json"): + continue + fpath = os.path.join(FIXTURES_DIR, fname) + with open(fpath, encoding="utf-8") as f: + data = json.load(f) + cases.append(pytest.param(data, id=fname.replace(".json", ""))) + return cases + + +# ── Helpers ──────────────────────────────────────────────────────────────────── + +def _assert_expr(result_expr: dict, expected_expr: dict, fixture_id: str) -> None: + """ + Compare two SLaNg expr dicts by re-serializing both to token lists. + This avoids int vs float coeff false failures (e.g. 3 vs 3.0). + """ + try: + result_tokens = serialize_slang_math(result_expr) + expected_tokens = serialize_slang_math(expected_expr) + except Exception as exc: + raise AssertionError( + f"[{fixture_id}] serialize_slang_math failed during expr comparison: {exc}\n" + f" result_expr: {result_expr}\n" + f" expected_expr: {expected_expr}" + ) from exc + assert result_tokens == expected_tokens, ( + f"[{fixture_id}] expr mismatch after token comparison.\n" + f" result tokens: {result_tokens}\n" + f" expected tokens: {expected_tokens}\n" + f" result expr: {result_expr}\n" + f" expected expr: {expected_expr}" + ) + + +def _assert_gradient_partials( + result_expr: dict, + gradient_partials: dict, + fixture_id: str +) -> None: + """ + For gradient results, result_expr has shape {"gradient": {"x": ..., "y": ...}}. + Compare each partial derivative separately using token-list comparison. + """ + assert "gradient" in result_expr, ( + f"[{fixture_id}] Expected 'gradient' key in expr, got: {result_expr}" + ) + result_gradient = result_expr["gradient"] + for var_name, expected_partial in gradient_partials.items(): + assert var_name in result_gradient, ( + f"[{fixture_id}] Expected partial for variable '{var_name}' " + f"in gradient result. Found keys: {list(result_gradient.keys())}" + ) + _assert_expr(result_gradient[var_name], expected_partial, fixture_id) + + +# ── Main regression test ─────────────────────────────────────────────────────── + +@pytest.mark.parametrize("case", load_fixtures()) +def test_regression(case: dict, request) -> None: + """ + For every fixture file, run the solver and assert all expected fields. + + Fields checked (when present in "expected"): + status — always checked + verified — always checked + latex — checked when present (exact string match) + rule — checked when present (exact string match) + confidence — checked when present (exact value match) + expr — checked when present (token-list comparison) + gradient_partials — checked when present (per-variable token-list comparison) + """ + fixture_id = request.node.callspec.id + + input_env = case.get("input") + expected = case.get("expected") + + assert input_env is not None, ( + f"[{fixture_id}] Fixture is missing 'input' key." + ) + assert expected is not None, ( + f"[{fixture_id}] Fixture is missing 'expected' key." + ) + + # ── Run solver ──────────────────────────────────────────────────────────── + try: + result = solver.solve(input_env) + except Exception as exc: + raise AssertionError( + f"[{fixture_id}] solver.solve() raised an unexpected exception: {exc}\n" + f" input: {input_env}" + ) from exc + + # ── Assert status (always required) ─────────────────────────────────────── + assert "status" in expected, ( + f"[{fixture_id}] Fixture 'expected' must contain 'status'." + ) + assert result["status"] == expected["status"], ( + f"[{fixture_id}] status mismatch.\n" + f" got: {result['status']}\n" + f" expected: {expected['status']}" + ) + + # ── Assert verified (always required) ───────────────────────────────────── + assert "verified" in expected, ( + f"[{fixture_id}] Fixture 'expected' must contain 'verified'." + ) + assert result["verified"] == expected["verified"], ( + f"[{fixture_id}] verified mismatch.\n" + f" got: {result['verified']}\n" + f" expected: {expected['verified']}" + ) + + # ── Assert latex (optional) ──────────────────────────────────────────────── + if "latex" in expected: + assert result.get("latex") == expected["latex"], ( + f"[{fixture_id}] latex mismatch.\n" + f" got: {result.get('latex')!r}\n" + f" expected: {expected['latex']!r}" + ) + + # ── Assert rule (optional) ──────────────────────────────────────────────── + if "rule" in expected: + assert result.get("rule") == expected["rule"], ( + f"[{fixture_id}] rule mismatch.\n" + f" got: {result.get('rule')!r}\n" + f" expected: {expected['rule']!r}" + ) + + # ── Assert confidence (optional) ────────────────────────────────────────── + if "confidence" in expected: + assert result.get("confidence") == expected["confidence"], ( + f"[{fixture_id}] confidence mismatch.\n" + f" got: {result.get('confidence')}\n" + f" expected: {expected['confidence']}" + ) + + # ── Assert expr (optional, token-list comparison) ───────────────────────── + if "expr" in expected: + assert "expr" in result, ( + f"[{fixture_id}] Expected 'expr' in result but it was missing.\n" + f" result keys: {list(result.keys())}" + ) + _assert_expr(result["expr"], expected["expr"], fixture_id) + + # ── Assert gradient_partials (optional, gradient-specific) ──────────────── + if "gradient_partials" in expected: + assert "expr" in result, ( + f"[{fixture_id}] Expected 'expr' in result for gradient check, " + f"but it was missing.\n result keys: {list(result.keys())}" + ) + _assert_gradient_partials( + result["expr"], + expected["gradient_partials"], + fixture_id + ) + + +# ── Scaffolding sanity checks ────────────────────────────────────────────────── + +def test_fixtures_directory_exists(): + """The fixtures directory must exist and be a directory.""" + assert os.path.isdir(FIXTURES_DIR), ( + f"Fixtures directory not found: {FIXTURES_DIR}" + ) + + +def test_at_least_one_fixture_exists(): + """There must be at least one .json fixture file present.""" + fixtures = [f for f in os.listdir(FIXTURES_DIR) if f.endswith(".json")] + assert len(fixtures) >= 1, ( + f"No .json fixture files found in {FIXTURES_DIR}" + ) + + +def test_all_fixtures_are_valid_json(): + """Every .json file in the fixtures directory must be valid JSON.""" + for fname in os.listdir(FIXTURES_DIR): + if not fname.endswith(".json"): + continue + fpath = os.path.join(FIXTURES_DIR, fname) + try: + with open(fpath, encoding="utf-8") as f: + json.load(f) + except json.JSONDecodeError as exc: + pytest.fail(f"Fixture file {fname} contains invalid JSON: {exc}") + + +def test_all_fixtures_have_input_and_expected_keys(): + """Every fixture must have both 'input' and 'expected' top-level keys.""" + for fname in sorted(os.listdir(FIXTURES_DIR)): + if not fname.endswith(".json"): + continue + fpath = os.path.join(FIXTURES_DIR, fname) + with open(fpath, encoding="utf-8") as f: + data = json.load(f) + assert "input" in data, ( + f"Fixture {fname} is missing the 'input' key." + ) + assert "expected" in data, ( + f"Fixture {fname} is missing the 'expected' key." + ) + + +def test_all_fixtures_expected_has_status_and_verified(): + """Every fixture's 'expected' block must contain 'status' and 'verified'.""" + for fname in sorted(os.listdir(FIXTURES_DIR)): + if not fname.endswith(".json"): + continue + fpath = os.path.join(FIXTURES_DIR, fname) + with open(fpath, encoding="utf-8") as f: + data = json.load(f) + expected = data.get("expected", {}) + assert "status" in expected, ( + f"Fixture {fname} 'expected' block is missing 'status'." + ) + assert "verified" in expected, ( + f"Fixture {fname} 'expected' block is missing 'verified'." + ) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_slang_serializer.py b/tests/unit/test_slang_serializer.py new file mode 100644 index 0000000..f4e5596 --- /dev/null +++ b/tests/unit/test_slang_serializer.py @@ -0,0 +1,312 @@ +""" +Unit tests for tokenizer/slang_serializer.py +Tests the serialize_slang_math and deserialize_slang_math round-trip. +""" + +import pytest +from tokenizer.slang_serializer import serialize_slang_math, deserialize_slang_math + + +# ── AST fixtures ─────────────────────────────────────────────────────────────── + +CONSTANT_FRACTION = { + "numi": {"terms": [{"coeff": 5}]}, + "deno": 1 +} + +SIMPLE_FRACTION = { + "numi": {"terms": [{"coeff": 3, "var": {"x": 2}}]}, + "deno": 1 +} + +MULTI_TERM_FRACTION = { + "numi": {"terms": [ + {"coeff": 3, "var": {"x": 2}}, + {"coeff": -1, "var": {"x": 1}}, + {"coeff": 5} + ]}, + "deno": 1 +} + +MULTI_VAR_FRACTION = { + "numi": {"terms": [{"coeff": 2, "var": {"x": 1, "y": 2}}]}, + "deno": 1 +} + +ZERO_COEFF_FRACTION = { + "numi": {"terms": [{"coeff": 0}]}, + "deno": 1 +} + +NEGATIVE_COEFF_FRACTION = { + "numi": {"terms": [{"coeff": -4, "var": {"x": 3}}]}, + "deno": 1 +} + +LARGE_COEFF_FRACTION = { + "numi": {"terms": [{"coeff": 100, "var": {"x": 1}}]}, + "deno": 1 +} + +OP_NODE_DIFF = { + "op": "diff", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 3, "var": {"x": 2}}]}, + "deno": 1 + } +} + +OP_NODE_INTEGRATE = { + "op": "integrate", + "var": "x", + "expr": { + "numi": {"terms": [{"coeff": 6, "var": {"x": 1}}]}, + "deno": 1 + } +} + + +# ── Round-trip tests: AST → tokens → AST → tokens (compare token lists) ─────── +# +# We compare round-trips by re-serializing the deserialized output and checking +# the token list matches the original. This is more stable than deep dict equality +# because it avoids int/float coeff representation differences. + +@pytest.mark.parametrize("ast", [ + pytest.param(CONSTANT_FRACTION, id="constant"), + pytest.param(SIMPLE_FRACTION, id="simple_fraction"), + pytest.param(MULTI_TERM_FRACTION, id="multi_term"), + pytest.param(MULTI_VAR_FRACTION, id="multi_var"), + pytest.param(ZERO_COEFF_FRACTION, id="zero_coeff"), + pytest.param(NEGATIVE_COEFF_FRACTION, id="negative_coeff"), + pytest.param(LARGE_COEFF_FRACTION, id="large_coeff"), +]) +def test_fraction_round_trip(ast): + """serialize → deserialize → re-serialize must produce identical token list.""" + tokens = serialize_slang_math(ast) + reconstructed = deserialize_slang_math(tokens) + retokenized = serialize_slang_math(reconstructed) + assert retokenized == tokens, ( + f"Round-trip failed.\n" + f" Original tokens: {tokens}\n" + f" Retokenized: {retokenized}" + ) + + +def test_op_node_diff_round_trip(): + """Op node (diff) round-trip preserves op, var, and expr.""" + tokens = serialize_slang_math(OP_NODE_DIFF) + result = deserialize_slang_math(tokens) + assert result["op"] == "diff" + assert result["var"] == "x" + # Verify the inner expr round-trips correctly too + expr_tokens_original = serialize_slang_math(OP_NODE_DIFF["expr"]) + expr_tokens_result = serialize_slang_math(result["expr"]) + assert expr_tokens_result == expr_tokens_original + + +def test_op_node_integrate_round_trip(): + """Op node (integrate) round-trip preserves op, var, and expr.""" + tokens = serialize_slang_math(OP_NODE_INTEGRATE) + result = deserialize_slang_math(tokens) + assert result["op"] == "integrate" + assert result["var"] == "x" + expr_tokens_original = serialize_slang_math(OP_NODE_INTEGRATE["expr"]) + expr_tokens_result = serialize_slang_math(result["expr"]) + assert expr_tokens_result == expr_tokens_original + + +# ── Token-level structural assertions ───────────────────────────────────────── + +def test_fraction_token_starts_with_node_frac(): + tokens = serialize_slang_math(SIMPLE_FRACTION) + assert tokens[0] == "NODE:FRAC" + + +def test_fraction_contains_struct_open(): + # STRUCT:OPEN is the bracket token used by the serializer. + # NOTE: this token is defined as OPEN = "STRUCT:OPEN" inside slang_serializer.py + # but does NOT appear in vocab.json — this is a known discrepancy. + tokens = serialize_slang_math(SIMPLE_FRACTION) + assert "STRUCT:OPEN" in tokens + + +def test_fraction_contains_numi_and_deno(): + tokens = serialize_slang_math(SIMPLE_FRACTION) + assert "STRUCT:NUMI" in tokens + assert "STRUCT:DENO" in tokens + + +def test_fraction_contains_close(): + tokens = serialize_slang_math(SIMPLE_FRACTION) + assert "STRUCT:CLOSE" in tokens + + +def test_single_term_token_content(): + tokens = serialize_slang_math(SIMPLE_FRACTION) + assert "NODE:TERM" in tokens + assert "COEF:3" in tokens + assert "VAR:x" in tokens + assert "EXP:2" in tokens + + +def test_multi_term_has_correct_term_count(): + tokens = serialize_slang_math(MULTI_TERM_FRACTION) + assert tokens.count("NODE:TERM") == 4 + + +def test_multi_term_has_separators(): + tokens = serialize_slang_math(MULTI_TERM_FRACTION) + # Two SEPs separating three terms + assert tokens.count("STRUCT:SEP") >= 2 + + +def test_op_node_token_starts_with_op_prefix(): + tokens = serialize_slang_math(OP_NODE_DIFF) + assert tokens[0] == "OP:diff" + + +def test_op_node_token_second_is_opvar(): + tokens = serialize_slang_math(OP_NODE_DIFF) + assert tokens[1] == "OPVAR:x" + + +def test_op_node_contains_inner_fraction_tokens(): + tokens = serialize_slang_math(OP_NODE_DIFF) + # The inner expr is a fraction, so its tokens must appear inside + assert "NODE:FRAC" in tokens + assert "NODE:TERM" in tokens + assert "COEF:3" in tokens + + +# ── Variable sort order ──────────────────────────────────────────────────────── + +def test_multi_var_variables_sorted_alphabetically(): + """Variables in a term must be serialized in alphabetical order (x before y).""" + tokens = serialize_slang_math(MULTI_VAR_FRACTION) + x_pos = tokens.index("VAR:x") + y_pos = tokens.index("VAR:y") + assert x_pos < y_pos, ( + f"Expected VAR:x before VAR:y in token list, got positions {x_pos} and {y_pos}" + ) + + +# ── Coefficient normalization ────────────────────────────────────────────────── + +def test_float_whole_number_coeff_serializes_as_int(): + """A coeff of 3.0 (float but whole) must produce COEF:3, not COEF:3.0.""" + ast = {"numi": {"terms": [{"coeff": 3.0, "var": {"x": 1}}]}, "deno": 1} + tokens = serialize_slang_math(ast) + assert "COEF:3" in tokens + assert "COEF:3.0" not in tokens + + +def test_fractional_coeff_serializes_as_float(): + """A coeff of 1.5 (non-whole float) must produce COEF:1.5.""" + ast = {"numi": {"terms": [{"coeff": 1.5, "var": {"x": 1}}]}, "deno": 1} + tokens = serialize_slang_math(ast) + assert "COEF:1.5" in tokens + + +def test_negative_coeff_serializes_correctly(): + tokens = serialize_slang_math(NEGATIVE_COEFF_FRACTION) + assert "COEF:-4" in tokens + + +def test_zero_coeff_serializes_as_coef_zero(): + tokens = serialize_slang_math(ZERO_COEFF_FRACTION) + assert "COEF:0" in tokens + + +# ── Exponent normalization ───────────────────────────────────────────────────── + +def test_float_whole_number_exp_serializes_as_int(): + """An exponent of 2.0 must produce EXP:2, not EXP:2.0.""" + ast = {"numi": {"terms": [{"coeff": 1, "var": {"x": 2.0}}]}, "deno": 1} + tokens = serialize_slang_math(ast) + assert "EXP:2" in tokens + assert "EXP:2.0" not in tokens + + +# ── Token list is a flat list of strings ────────────────────────────────────── + +def test_serialize_returns_list(): + result = serialize_slang_math(SIMPLE_FRACTION) + assert isinstance(result, list) + + +def test_serialize_returns_list_of_strings(): + result = serialize_slang_math(SIMPLE_FRACTION) + assert all(isinstance(t, str) for t in result) + + +def test_serialize_returns_nonempty_list(): + result = serialize_slang_math(SIMPLE_FRACTION) + assert len(result) > 0 + + +# ── Error cases: serialize ──────────────────────────────────────────────────── + +def test_serialize_none_raises(): + with pytest.raises((ValueError, TypeError)): + serialize_slang_math(None) + + +def test_serialize_unknown_dict_raises(): + """A dict that doesn't match any known SLaNg node shape must raise ValueError.""" + with pytest.raises(ValueError): + serialize_slang_math({"unknown_key": 42}) + + +def test_serialize_bare_string_raises(): + with pytest.raises((ValueError, TypeError, AttributeError)): + serialize_slang_math("3x^2") + + +# ── Error cases: deserialize ────────────────────────────────────────────────── + +def test_deserialize_returns_dict(): + tokens = serialize_slang_math(SIMPLE_FRACTION) + result = deserialize_slang_math(tokens) + assert isinstance(result, dict) + + +def test_deserialize_truncated_tokens_raises(): + """An incomplete token list must raise ValueError, not silently return garbage.""" + with pytest.raises(ValueError): + deserialize_slang_math(["NODE:FRAC", "STRUCT:OPEN"]) + + +def test_deserialize_extra_trailing_tokens_raises(): + """Extra tokens after a complete node must raise ValueError.""" + tokens = serialize_slang_math(SIMPLE_FRACTION) + with pytest.raises(ValueError): + deserialize_slang_math(tokens + ["NODE:TERM", "COEF:1"]) + + +def test_deserialize_empty_list_raises(): + with pytest.raises((ValueError, IndexError)): + deserialize_slang_math([]) + + +def test_deserialize_wrong_opening_token_raises(): + with pytest.raises(ValueError): + deserialize_slang_math(["COEF:3", "VAR:x", "EXP:2"]) + + +def test_deserialize_non_list_raises(): + with pytest.raises((ValueError, TypeError)): + deserialize_slang_math("NODE:FRAC STRUCT:OPEN") + + +# ── Idempotency ─────────────────────────────────────────────────────────────── + +def test_double_round_trip_is_stable(): + """Two full round-trips must produce the same token list as one.""" + tokens_1 = serialize_slang_math(SIMPLE_FRACTION) + round_1 = deserialize_slang_math(tokens_1) + tokens_2 = serialize_slang_math(round_1) + round_2 = deserialize_slang_math(tokens_2) + tokens_3 = serialize_slang_math(round_2) + assert tokens_1 == tokens_2 == tokens_3 diff --git a/tokenizer/slang_serializer.py b/tokenizer/slang_serializer.py index e356540..df2474a 100644 --- a/tokenizer/slang_serializer.py +++ b/tokenizer/slang_serializer.py @@ -209,8 +209,7 @@ def parse_fraction(index: int) -> Tuple[Dict[str, Any], int]: index = expect_token(index, DENO) denominator_terms, index = parse_wrapped_term_list(index) index = expect_token(index, CLOSE) - index = expect_token(index, CLOSE) - + return { "numi": {"terms": numerator_terms}, "deno": {"terms": denominator_terms},