Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions sdks/python/openrag_sdk/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,27 +125,46 @@ async def wait_for_task(
await asyncio.sleep(poll_interval)
elapsed += poll_interval

raise TimeoutError(f"Ingestion task {task_id} did not complete within {timeout}s")
raise TimeoutError(
f"Ingestion task {task_id} did not complete within {timeout}s"
)

async def delete(self, filename: str) -> DeleteDocumentResponse:
async def delete(
self,
filename: str | None = None,
*,
filter_id: str | None = None,
) -> DeleteDocumentResponse:
"""
Delete a document from the knowledge base.
Delete document(s) from the knowledge base.

Args:
filename: Name of the file to delete.
Provide exactly one of:
filename: delete all chunks for that filename.
filter_id: delete chunks for each filename in the filter's data_sources.

Returns:
DeleteDocumentResponse with deleted chunk count.
"""
if bool(filename) == bool(filter_id):
raise ValueError("Provide exactly one of `filename` or `filter_id`")

body: dict[str, str] = {}
if filename is not None:
body["filename"] = filename
if filter_id is not None:
body["filter_id"] = filter_id
Comment on lines +148 to +155
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Normalize selector validation and payload construction consistently.

Line 148 validates by truthiness, but Lines 152-155/167 branch on is not None. This lets filename="" + filter_id="..." bypass the exclusivity check, send both keys, and potentially misclassify a filter 404 as idempotent filename delete.

Suggested fix
-        if bool(filename) == bool(filter_id):
+        normalized_filename = filename.strip() if filename is not None else None
+        normalized_filter_id = filter_id.strip() if filter_id is not None else None
+        has_filename = bool(normalized_filename)
+        has_filter_id = bool(normalized_filter_id)
+
+        if has_filename == has_filter_id:
             raise ValueError("Provide exactly one of `filename` or `filter_id`")
 
-        body: dict[str, str] = {}
-        if filename is not None:
-            body["filename"] = filename
-        if filter_id is not None:
-            body["filter_id"] = filter_id
+        body: dict[str, str] = (
+            {"filename": normalized_filename} if has_filename else {"filter_id": normalized_filter_id}
+        )
@@
-            if filename is not None and getattr(e, "status_code", None) == 404:
+            if has_filename and getattr(e, "status_code", None) == 404:
                 return DeleteDocumentResponse(
                     success=False,
                     deleted_chunks=0,
-                    filename=filename,
+                    filename=normalized_filename,

Also applies to: 167-167

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@sdks/python/openrag_sdk/documents.py` around lines 148 - 155, The exclusivity
check uses truthiness (bool(filename) == bool(filter_id)) but the payload is
built using "is not None", which lets empty-string filename bypass validation
and still be sent; change the validation to use explicit None checks (e.g.,
(filename is None) == (filter_id is None) or check filename is not None and
filter_id is not None) so the exclusivity logic matches how body is constructed,
and make the same change for the other occurrence around the filter block (the
later 167 block); reference the filename and filter_id variables and the body
dict so only the truly None selector is considered absent.


try:
response = await self._client._request(
"DELETE",
"/api/v1/documents",
json={"filename": filename},
json=body,
)
except NotFoundError as e:
# Keep delete idempotent for SDK callers: a missing document is not an exception.
if getattr(e, "status_code", None) == 404:
# Keep delete idempotent for SDK callers: a missing document is not
# an exception.
# (Filter-not-found 404s do raise — that's a caller error, not idempotency.)
if filename is not None and getattr(e, "status_code", None) == 404:
return DeleteDocumentResponse(
success=False,
deleted_chunks=0,
Expand Down
7 changes: 5 additions & 2 deletions sdks/python/openrag_sdk/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""OpenRAG SDK data models."""

from datetime import datetime
from typing import Any, Literal
from typing import Literal

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -98,6 +97,10 @@ class DeleteDocumentResponse(BaseModel):
filename: str | None = None
message: str | None = None
error: str | None = None
# Populated when deleting by filter_id — one entry per resolved data_source.
filenames: list[str] | None = None
filter_id: str | None = None
per_file: list[dict] | None = None


# Chat history models
Expand Down
43 changes: 34 additions & 9 deletions sdks/typescript/src/documents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import type { OpenRAGClient } from "./client";
import type {
DeleteDocumentOptions,
DeleteDocumentResponse,
IngestResponse,
IngestTaskStatus,
Expand Down Expand Up @@ -144,33 +145,57 @@ export class DocumentsClient {
}

/**
* Delete a document from the knowledge base.
* Delete document(s) from the knowledge base.
*
* Provide exactly one of:
* - filename: a single filename, or
* - { filename } / { filterId }: an options object.
*
* @param filename - Name of the file to delete.
* @returns DeleteDocumentResponse with deleted chunk count.
*/
async delete(filename: string): Promise<DeleteDocumentResponse> {
async delete(
arg: string | DeleteDocumentOptions
): Promise<DeleteDocumentResponse> {
const opts: DeleteDocumentOptions =
typeof arg === "string" ? { filename: arg } : arg;

if (!opts.filename === !opts.filterId) {
throw new Error(
"Provide exactly one of `filename` or `filterId`"
);
}

const body: Record<string, string> = {};
if (opts.filename) body.filename = opts.filename;
if (opts.filterId) body.filter_id = opts.filterId;
Comment on lines +162 to +170
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Enforce selector exclusivity using normalized values.

Current truthy checks allow edge cases like { filename: "", filterId: "..." } to pass exclusivity and proceed. That weakens the “exactly one selector” contract and can hide invalid inputs.

Suggested fix
-    if (!opts.filename === !opts.filterId) {
+    const filename = opts.filename?.trim();
+    const filterId = opts.filterId?.trim();
+    const hasFilename = Boolean(filename);
+    const hasFilterId = Boolean(filterId);
+
+    if (hasFilename === hasFilterId) {
       throw new Error(
         "Provide exactly one of `filename` or `filterId`"
       );
     }
 
     const body: Record<string, string> = {};
-    if (opts.filename) body.filename = opts.filename;
-    if (opts.filterId) body.filter_id = opts.filterId;
+    if (hasFilename) body.filename = filename!;
+    if (hasFilterId) body.filter_id = filterId!;
@@
-        opts.filename &&
+        hasFilename &&
         (error as NotFoundError)?.statusCode === 404
       ) {
         return {
           success: false,
           deleted_chunks: 0,
-          filename: opts.filename,
+          filename: filename!,

Also applies to: 192-193

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@sdks/typescript/src/documents.ts` around lines 162 - 170, The exclusivity
check uses truthy checks on opts.filename and opts.filterId which permits empty
strings (e.g., filename: "") to bypass validation; update the logic to normalize
and treat empty or whitespace-only strings as absent (e.g., consider value
present only if typeof value === "string" and value.trim() !== "" or for other
types use non-null/undefined), then enforce "exactly one present" by computing
normalized booleans for filename and filterId and throwing if they are both or
neither present; update the subsequent assignments that set body.filename and
body.filter_id to only use the normalized values; apply the same fix to the
similar check around lines 192-193.


try {
const response = await this.client._request("DELETE", "/api/v1/documents", {
body: JSON.stringify({ filename }),
body: JSON.stringify(body),
});

const data = await response.json();
return {
success: data.success ?? false,
deleted_chunks: data.deleted_chunks ?? 0,
filename: data.filename ?? filename,
filename: data.filename ?? opts.filename ?? null,
message: data.message ?? null,
error: data.error ?? null,
filenames: data.filenames ?? null,
filter_id: data.filter_id ?? null,
per_file: data.per_file ?? null,
};
} catch (error) {
// Delete is idempotent: if no chunks match, backend may return 404.
// Surface this as a non-throwing "nothing deleted" response.
if ((error as NotFoundError)?.statusCode === 404) {
// Filename delete stays idempotent (404 -> success:false). Filter-id 404s
// are caller errors (bad filter id) and should propagate.
if (
opts.filename &&
(error as NotFoundError)?.statusCode === 404
) {
return {
success: false,
deleted_chunks: 0,
filename,
filename: opts.filename,
message: null,
error: (error as Error)?.message ?? "Resource not found",
};
Expand Down
9 changes: 9 additions & 0 deletions sdks/typescript/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ export interface DeleteDocumentResponse {
filename?: string | null;
message?: string | null;
error?: string | null;
// Populated when deleting by filter_id — one entry per resolved data_source.
filenames?: string[] | null;
filter_id?: string | null;
per_file?: Array<Record<string, unknown>> | null;
}

export interface DeleteDocumentOptions {
filename?: string;
filterId?: string;
}

// Chat history types
Expand Down
134 changes: 113 additions & 21 deletions sdks/typescript/tests/integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,55 +190,147 @@ describe.skipIf(SKIP_TESTS)("OpenRAG TypeScript SDK Integration", () => {
expect(filter).toBeNull();
});

it("should use filterId in chat", async () => {
// Create a filter first
it("filterId in chat actually scopes retrieval to data_sources", async () => {
// Ingest two distinguishable docs.
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "sdk-filter-"));
const alphaName = `alpha_${Date.now()}.md`;
const betaName = `beta_${Date.now()}.md`;
const alphaPath = path.join(tmpDir, alphaName);
const betaPath = path.join(tmpDir, betaName);
fs.writeFileSync(alphaPath, "# Alpha\n\nPurple elephants live here.\n");
fs.writeFileSync(betaPath, "# Beta\n\nYellow tigers live here.\n");
await client.documents.ingest({ filePath: alphaPath });
await client.documents.ingest({ filePath: betaPath });

const createResult = await client.knowledgeFilters.create({
name: "Chat Test Filter",
description: "Filter for testing chat with filterId",
name: `TS chat filter scope ${Date.now()}`,
description: "Filter scoped to alpha only",
queryData: {
query: "test",
limit: 5,
query: "",
filters: {
data_sources: [alphaName],
document_types: ["*"],
owners: ["*"],
connector_types: ["*"],
},
limit: 10,
scoreThreshold: 0,
},
});
expect(createResult.success).toBe(true);
const filterId = createResult.id!;

try {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
// Use filter in chat
const response = await client.chat.create({
message: "Hello with filter",
message: "What animals appear in these documents?",
filterId,
});
expect(response.sources).toBeDefined();
const names = (response.sources ?? []).map((s) => s.filename);
// Beta must NOT leak through the filter.
expect(names).not.toContain(betaName);
} finally {
await client.knowledgeFilters.delete(filterId);
await client.documents.delete(alphaName);
await client.documents.delete(betaName);
}
}, 60_000);

it("filterId in search actually scopes results to data_sources", async () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "sdk-filter-"));
const alphaName = `alpha_${Date.now()}.md`;
const betaName = `beta_${Date.now()}.md`;
const alphaPath = path.join(tmpDir, alphaName);
const betaPath = path.join(tmpDir, betaName);
fs.writeFileSync(alphaPath, "# Alpha\n\nPurple elephants live here.\n");
fs.writeFileSync(betaPath, "# Beta\n\nYellow tigers live here.\n");
await client.documents.ingest({ filePath: alphaPath });
await client.documents.ingest({ filePath: betaPath });

expect(response.response).toBeDefined();
const createResult = await client.knowledgeFilters.create({
name: `TS search filter scope ${Date.now()}`,
description: "Filter scoped to alpha only",
queryData: {
query: "",
filters: {
data_sources: [alphaName],
document_types: ["*"],
owners: ["*"],
connector_types: ["*"],
},
limit: 10,
scoreThreshold: 0,
},
});
expect(createResult.success).toBe(true);
const filterId = createResult.id!;

try {
const results = await client.search.query("animals", { filterId });
for (const r of results.results) {
expect(r.filename).not.toBe(betaName);
}
} finally {
// Cleanup
await client.knowledgeFilters.delete(filterId);
await client.documents.delete(alphaName);
await client.documents.delete(betaName);
}
});
}, 60_000);

it("documents.delete(filterId) only removes filenames in the filter", async () => {
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "sdk-filter-"));
const alphaName = `alpha_${Date.now()}.md`;
const betaName = `beta_${Date.now()}.md`;
const alphaPath = path.join(tmpDir, alphaName);
const betaPath = path.join(tmpDir, betaName);
fs.writeFileSync(alphaPath, "# Alpha\n\nPurple elephants.\n");
fs.writeFileSync(betaPath, "# Beta\n\nYellow tigers.\n");
await client.documents.ingest({ filePath: alphaPath });
await client.documents.ingest({ filePath: betaPath });

it("should use filterId in search", async () => {
// Create a filter first
const createResult = await client.knowledgeFilters.create({
name: "Search Test Filter",
description: "Filter for testing search with filterId",
name: `TS delete-by-filter ${Date.now()}`,
description: "Filter scoped to alpha only",
queryData: {
query: "test",
limit: 5,
query: "",
filters: {
data_sources: [alphaName],
document_types: ["*"],
owners: ["*"],
connector_types: ["*"],
},
limit: 10,
scoreThreshold: 0,
},
});
expect(createResult.success).toBe(true);
const filterId = createResult.id!;

try {
// Use filter in search
const results = await client.search.query("test query", { filterId });
const result = await client.documents.delete({ filterId });
expect(result.success).toBe(true);
expect(result.filenames).toContain(alphaName);
expect(result.filenames ?? []).not.toContain(betaName);

expect(results.results).toBeDefined();
// Beta still searchable
const remaining = await client.search.query("tigers");
const remainingNames = remaining.results.map((r) => r.filename);
expect(remainingNames).toContain(betaName);
} finally {
// Cleanup
await client.knowledgeFilters.delete(filterId);
await client.documents.delete(alphaName);
await client.documents.delete(betaName);
}
}, 60_000);

it("documents.delete rejects both filename and filterId together", async () => {
await expect(
client.documents.delete({ filename: "x.pdf", filterId: "y" })
).rejects.toThrow();
});

it("documents.delete rejects when neither arg is set", async () => {
await expect(client.documents.delete({})).rejects.toThrow();
});
});

Expand Down
Loading
Loading