diff --git a/rag/pipelines/emit_manifest.py b/rag/pipelines/emit_manifest.py index a5d0cc7..91e2e3e 100644 --- a/rag/pipelines/emit_manifest.py +++ b/rag/pipelines/emit_manifest.py @@ -41,10 +41,11 @@ logger = logging.getLogger(__name__) -# Hardcoded; ``rag.embeddings.embed_*`` defaults to voyage-3-lite (1024d). +# Hardcoded; ``rag.embeddings.embed_*`` defaults to voyage-3-lite (512d, +# matches the ``embedding vector(512)`` column in ``rag/schema.sql``). # Surfaced in the manifest so consumers don't have to re-derive it. _EMBEDDING_MODEL = "voyage-3-lite" -_EMBEDDING_DIMENSION = 1024 +_EMBEDDING_DIMENSION = 512 def _by_source() -> dict[str, dict[str, int]]: diff --git a/tests/test_emit_manifest.py b/tests/test_emit_manifest.py index 9e261bd..8234ac4 100644 --- a/tests/test_emit_manifest.py +++ b/tests/test_emit_manifest.py @@ -86,7 +86,9 @@ def test_coverage_percentiles(manifest): def test_embedding_metadata(manifest): - assert manifest["embedding"] == {"model": "voyage-3-lite", "dimension": 1024} + # voyage-3-lite is 512d — matches `embedding vector(512)` in the lib's + # rag/schema.sql. pgvector enforces dim on INSERT. + assert manifest["embedding"] == {"model": "voyage-3-lite", "dimension": 512} def test_ingestion_overall_picks_max(manifest):