ragbot/Makefile at main · synthesisengineering/ragbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Ragbot Makefile
#
# Convenience targets that wrap the most common dev / test / eval flows.
# Designed to be idempotent and free of platform-specific assumptions
# beyond "python3 is on PATH and the requirements are installed."

# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

PYTHON ?= python3
PIP ?= $(PYTHON) -m pip
PYTEST ?= $(PYTHON) -m pytest

# Where the eval runner writes its markdown scorecard.
EVAL_SCORECARD ?= tests/evals/last-scorecard.md

.DEFAULT_GOAL := help

.PHONY: help install test test-fast lint typecheck eval eval-quick \
        eval-regressions eval-clean observability-test metrics-curl clean

# ---------------------------------------------------------------------------
# Help
# ---------------------------------------------------------------------------

help:
	@echo "Ragbot Makefile — common targets:"
	@echo ""
	@echo "  make install            Install / upgrade Python dependencies."
	@echo "  make test               Run the full pytest suite."
	@echo "  make test-fast          Run pytest excluding the integration suite."
	@echo "  make observability-test Run just the observability test module."
	@echo "  make eval               Run the offline eval suite, emit scorecard."
	@echo "  make eval-quick         Run only the quick subset of the eval suite."
	@echo "  make eval-regressions   Run only the regression-capture cases."
	@echo "  make eval-clean         Remove the last eval scorecard."
	@echo "  make metrics-curl       Hit /api/metrics on a local running server."
	@echo "  make clean              Remove caches and build artifacts."

# ---------------------------------------------------------------------------
# Install / lint
# ---------------------------------------------------------------------------

install:
	$(PIP) install -r requirements.txt

# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------

test:
	$(PYTEST) tests/ -v

test-fast:
	$(PYTEST) tests/ -v --ignore=tests/test_models_integration.py

observability-test:
	$(PYTEST) tests/test_observability.py -v

# ---------------------------------------------------------------------------
# Eval suite
#
# The runner is invoked as a module so its package-relative imports work
# correctly regardless of the CWD.
# ---------------------------------------------------------------------------

eval:
	@mkdir -p $(dir $(EVAL_SCORECARD))
	$(PYTHON) -m tests.evals.runner --output $(EVAL_SCORECARD)
	@echo ""
	@echo "Scorecard written to $(EVAL_SCORECARD)"

eval-quick:
	@mkdir -p $(dir $(EVAL_SCORECARD))
	$(PYTHON) -m tests.evals.runner --quick --output $(EVAL_SCORECARD)
	@echo ""
	@echo "Quick scorecard written to $(EVAL_SCORECARD)"

# Regression-only mode. The runner loads every YAML under
# tests/evals/regressions/, runs only those, and writes a focused
# scorecard. Exit code is non-zero if any regression re-emerges.
eval-regressions:
	@mkdir -p $(dir $(EVAL_SCORECARD))
	$(PYTHON) -m tests.evals.runner --regressions-only --output $(EVAL_SCORECARD)
	@echo ""
	@echo "Regression scorecard written to $(EVAL_SCORECARD)"

eval-clean:
	rm -f $(EVAL_SCORECARD)

# ---------------------------------------------------------------------------
# Observability quick-checks
# ---------------------------------------------------------------------------

metrics-curl:
	@echo "==> /api/metrics (Prometheus exposition)"
	@curl -fsS http://localhost:8000/api/metrics || true
	@echo ""
	@echo "==> /api/metrics/cache (60-minute window)"
	@curl -fsS 'http://localhost:8000/api/metrics/cache?window_minutes=60' || true
	@echo ""

# ---------------------------------------------------------------------------
# Housekeeping
# ---------------------------------------------------------------------------

clean:
	find . -type d -name __pycache__ -prune -exec rm -rf {} +
	find . -type d -name .pytest_cache -prune -exec rm -rf {} +
	find . -type d -name .mypy_cache -prune -exec rm -rf {} +