From 59ded129d718833d268f7da4c216f6a5843f7e52 Mon Sep 17 00:00:00 2001 From: Brian McMahon Date: Tue, 5 May 2026 08:13:31 -0700 Subject: [PATCH] fix(rag): correct voyage-3-lite dimension from 1024 to 512 in manifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial PR commit followed the (now-corrected) embeddings.py docstring and reported `embedding.dimension = 1024`. The schema declares `vector(512)` and pgvector enforces dim on INSERT — the production column has to be 512 for ingestion to be working. voyage-3-lite is 512-d. Companion fix to alpha-engine-lib PR #17 which updates the docstring. Tests: 8/8 (test_embedding_metadata updated to assert 512). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) --- rag/pipelines/emit_manifest.py | 5 +++-- tests/test_emit_manifest.py | 4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/rag/pipelines/emit_manifest.py b/rag/pipelines/emit_manifest.py index a5d0cc7..91e2e3e 100644 --- a/rag/pipelines/emit_manifest.py +++ b/rag/pipelines/emit_manifest.py @@ -41,10 +41,11 @@ logger = logging.getLogger(__name__) -# Hardcoded; ``rag.embeddings.embed_*`` defaults to voyage-3-lite (1024d). +# Hardcoded; ``rag.embeddings.embed_*`` defaults to voyage-3-lite (512d, +# matches the ``embedding vector(512)`` column in ``rag/schema.sql``). # Surfaced in the manifest so consumers don't have to re-derive it. _EMBEDDING_MODEL = "voyage-3-lite" -_EMBEDDING_DIMENSION = 1024 +_EMBEDDING_DIMENSION = 512 def _by_source() -> dict[str, dict[str, int]]: diff --git a/tests/test_emit_manifest.py b/tests/test_emit_manifest.py index 9e261bd..8234ac4 100644 --- a/tests/test_emit_manifest.py +++ b/tests/test_emit_manifest.py @@ -86,7 +86,9 @@ def test_coverage_percentiles(manifest): def test_embedding_metadata(manifest): - assert manifest["embedding"] == {"model": "voyage-3-lite", "dimension": 1024} + # voyage-3-lite is 512d — matches `embedding vector(512)` in the lib's + # rag/schema.sql. pgvector enforces dim on INSERT. + assert manifest["embedding"] == {"model": "voyage-3-lite", "dimension": 512} def test_ingestion_overall_picks_max(manifest):