From ad8c5ddd19a6e77f36e2464a63b7d166e1bd7d0f Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Wed, 18 Mar 2026 19:01:30 -0700 Subject: [PATCH 1/4] fix: add NVIDIA NIM provider profile for input_type embedding field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVIDIA NIM rejects the `task` field and requires `input_type` instead. This adds a proper "nvidia" provider profile following the architecture introduced in #216, rather than hardcoding URL checks in buildPayload. Detection: matches *.nvidia.com base URLs, nvidia/* model prefixes, and nv-embed* model names. Capabilities: sends input_type instead of task, maps retrieval.query → query and retrieval.passage → passage, supports encoding_format: float. Includes 5 automated tests covering: - NVIDIA sends input_type (not task) - retrieval.passage → passage value mapping - nvidia/ model prefix detection - Jina still sends task field - Generic providers send neither Co-Authored-By: Claude Opus 4.6 (1M context) --- src/embedder.ts | 21 +++ test/nvidia-nim-provider-profile.test.mjs | 153 ++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 test/nvidia-nim-provider-profile.test.mjs diff --git a/src/embedder.ts b/src/embedder.ts index 379c648b..761c24bf 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -105,6 +105,7 @@ type EmbeddingProviderProfile = | "openai" | "jina" | "voyage-compatible" + | "nvidia" | "generic-openai-compatible"; interface EmbeddingCapabilities { @@ -204,6 +205,7 @@ function getProviderLabel(baseURL: string | undefined, model: string): string { if (profile === "jina" && /api\.jina\.ai/i.test(base)) return "Jina"; if (profile === "voyage-compatible" && /api\.voyageai\.com/i.test(base)) return "Voyage"; if (profile === "openai" && /api\.openai\.com/i.test(base)) return "OpenAI"; + if (profile === "nvidia") return "NVIDIA NIM"; try { return new URL(base).host; @@ -219,6 +221,8 @@ function getProviderLabel(baseURL: string | undefined, model: string): string { return "Voyage"; case "openai": return "OpenAI"; + case "nvidia": + return "NVIDIA NIM"; default: return "embedding provider"; } @@ -236,6 +240,10 @@ function detectEmbeddingProviderProfile( return "voyage-compatible"; } + if (/\.nvidia\.com|integrate\.api\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) { + return "nvidia"; + } + return "generic-openai-compatible"; } @@ -268,6 +276,19 @@ function getEmbeddingCapabilities(profile: EmbeddingProviderProfile): EmbeddingC }, dimensionsField: "output_dimension", }; + case "nvidia": + return { + encoding_format: true, + normalized: false, + taskField: "input_type", + taskValueMap: { + "retrieval.query": "query", + "retrieval.passage": "passage", + "query": "query", + "passage": "passage", + }, + dimensionsField: null, + }; case "generic-openai-compatible": default: return { diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs new file mode 100644 index 00000000..39233357 --- /dev/null +++ b/test/nvidia-nim-provider-profile.test.mjs @@ -0,0 +1,153 @@ +import assert from "node:assert/strict"; +import http from "node:http"; +import { describe, it } from "node:test"; + +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); +const { Embedder } = jiti("../src/embedder.ts"); + +/** + * Create a capture server that records POST bodies and returns embeddings + * with configurable dimension count. + */ +async function withCaptureServer(dims, fn) { + let capturedBody = null; + const fakeVec = Array.from({ length: dims }, (_, i) => i * 0.01); + const server = http.createServer((req, res) => { + if (req.url === "/v1/embeddings" && req.method === "POST") { + const chunks = []; + req.on("data", (c) => chunks.push(c)); + req.on("end", () => { + capturedBody = JSON.parse(Buffer.concat(chunks).toString()); + res.writeHead(200, { "content-type": "application/json" }); + res.end( + JSON.stringify({ + object: "list", + data: [{ object: "embedding", index: 0, embedding: fakeVec }], + usage: { prompt_tokens: 5, total_tokens: 5 }, + }), + ); + }); + return; + } + res.writeHead(404); + res.end("not found"); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const address = server.address(); + const port = typeof address === "object" && address ? address.port : 0; + const baseURL = `http://127.0.0.1:${port}/v1`; + + try { + await fn({ baseURL, port, getCaptured: () => capturedBody }); + } finally { + await new Promise((resolve) => server.close(resolve)); + } +} + +describe("NVIDIA NIM provider profile", () => { + it("sends input_type=query for NVIDIA NIM (nv-embed model prefix)", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "nv-embedqa-e5-v5", + apiKey: "test-key", + dimensions: dims, + taskQuery: "retrieval.query", + taskPassage: "retrieval.passage", + }); + + await embedder.embedQuery("test query"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.input_type, "query", "Should send input_type=query for NVIDIA"); + assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA"); + }); + }); + + it("maps retrieval.passage → passage for NVIDIA NIM", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "nv-embedqa-e5-v5", + apiKey: "test-key", + dimensions: dims, + taskQuery: "retrieval.query", + taskPassage: "retrieval.passage", + }); + + await embedder.embedPassage("test document"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.input_type, "passage", "Should map retrieval.passage → passage"); + assert.equal(body.task, undefined, "Should NOT send task field for NVIDIA"); + }); + }); + + it("detects NVIDIA from nvidia/ model prefix", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "nvidia/llama-3.2-nv-embedqa-1b-v2", + apiKey: "test-key", + dimensions: dims, + taskQuery: "query", + taskPassage: "passage", + }); + + await embedder.embedQuery("test"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.input_type, "query", "nvidia/ model prefix should trigger input_type"); + assert.equal(body.task, undefined, "nvidia/ model prefix should NOT send task"); + }); + }); + + it("non-NVIDIA: Jina sends task field", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "jina-embeddings-v5-text-small", + apiKey: "test-key", + dimensions: dims, + taskQuery: "retrieval.query", + taskPassage: "retrieval.passage", + }); + + await embedder.embedQuery("test query"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.task, "retrieval.query", "Jina should send task field"); + assert.equal(body.input_type, undefined, "Jina should NOT send input_type"); + }); + }); + + it("non-NVIDIA: generic OpenAI-compatible sends neither task nor input_type", async () => { + const dims = 128; + await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { + const embedder = new Embedder({ + baseURL, + model: "custom-embed-model", + apiKey: "test-key", + dimensions: dims, + }); + + await embedder.embedQuery("test query"); + const body = getCaptured(); + + assert.ok(body, "Request body should be captured"); + assert.equal(body.task, undefined, "Generic provider should NOT send task"); + assert.equal(body.input_type, undefined, "Generic provider should NOT send input_type"); + }); + }); +}); From 5ae2f292c6a6c795410f7cf356d3a19834b3bb74 Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Wed, 18 Mar 2026 23:23:56 -0700 Subject: [PATCH 2/4] fix: forward dimensions for NVIDIA dynamic embedding models NVIDIA NIM's OpenAI-compatible API supports a `dimensions` parameter for dynamic models like nvidia/llama-3.2-nv-embedqa-1b-v2. Setting dimensionsField to null prevented buildPayload() from forwarding the configured dimensions, causing dimension mismatch errors. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/embedder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/embedder.ts b/src/embedder.ts index 761c24bf..62435837 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -287,7 +287,7 @@ function getEmbeddingCapabilities(profile: EmbeddingProviderProfile): EmbeddingC "query": "query", "passage": "passage", }, - dimensionsField: null, + dimensionsField: "dimensions", }; case "generic-openai-compatible": default: From 7871fa00b0f3e4bc1928284ccb069f28cff440a0 Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:26:31 -0700 Subject: [PATCH 3/4] Add NVIDIA detection test and update imports Added a test case to detect NVIDIA from a .nvidia.com baseURL and modified import to include formatEmbeddingProviderError. --- test/nvidia-nim-provider-profile.test.mjs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/nvidia-nim-provider-profile.test.mjs b/test/nvidia-nim-provider-profile.test.mjs index 39233357..cfd80ca5 100644 --- a/test/nvidia-nim-provider-profile.test.mjs +++ b/test/nvidia-nim-provider-profile.test.mjs @@ -5,7 +5,7 @@ import { describe, it } from "node:test"; import jitiFactory from "jiti"; const jiti = jitiFactory(import.meta.url, { interopDefault: true }); -const { Embedder } = jiti("../src/embedder.ts"); +const { Embedder, formatEmbeddingProviderError } = jiti("../src/embedder.ts"); /** * Create a capture server that records POST bodies and returns embeddings @@ -111,6 +111,16 @@ describe("NVIDIA NIM provider profile", () => { }); }); + it("detects NVIDIA from a .nvidia.com baseURL", () => { + const message = formatEmbeddingProviderError(new Error("boom"), { + baseURL: "https://build.nvidia.com/v1", + model: "custom-embed-model", + mode: "single", + }); + + assert.equal(message, "Failed to generate embedding from NVIDIA NIM: boom"); + }); + it("non-NVIDIA: Jina sends task field", async () => { const dims = 128; await withCaptureServer(dims, async ({ baseURL, getCaptured }) => { From 15079944d90dbd387af9cce7fc03fdeddc58d979 Mon Sep 17 00:00:00 2001 From: VicJay <98076606+vicjayjay@users.noreply.github.com> Date: Sat, 21 Mar 2026 20:28:34 -0700 Subject: [PATCH 4/4] Refactor NVIDIA compatibility check regex Simplifies the NVIDIA provider baseURL detection pattern now that .nvidia.com already covers integrate.api.nvidia.com. --- src/embedder.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/embedder.ts b/src/embedder.ts index 62435837..5bf3c628 100644 --- a/src/embedder.ts +++ b/src/embedder.ts @@ -240,7 +240,7 @@ function detectEmbeddingProviderProfile( return "voyage-compatible"; } - if (/\.nvidia\.com|integrate\.api\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) { + if (/\.nvidia\.com/i.test(base) || /^nvidia\//i.test(model) || /^nv-embed/i.test(model)) { return "nvidia"; }