diff --git a/CHANGELOG.md b/CHANGELOG.md index cb8f4fb2d..b311a8735 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **Corpus Home document-tree loads ~order-of-magnitude faster** (`config/graphql/filters.py`, `frontend/src/graphql/queries.ts`, `frontend/src/components/corpuses/DocumentTableOfContents.tsx`). The "Loading document structure…" wait on Corpus Home was driven by two over-fetched GraphQL queries. (1) `GET_DOCUMENT_RELATIONSHIPS` (capped at `first: 500`) selected the full `sourceDocument`/`targetDocument` objects (title, description, fileType, **icon**, slug, creator), plus `corpus { …creator }`, `creator { …username }`, `annotationLabel { id, text, color, icon }`, `data`, `created`, `modified`, and `myPermissions`. `DocumentTableOfContents.tsx:535-611` then consumed only four of those fields (`relationshipType`, `annotationLabel.text`, `sourceDocument.id`, `targetDocument.id`) — every `icon` selection still triggered `build_absolute_uri()` via `create_file_resolver` and the per-row document payloads were thrown away because `GET_CORPUS_DOCUMENTS_FOR_TOC` already provided the same fields. Worse, all relationships came back regardless of type/label and were filtered client-side to `relationshipType === "RELATIONSHIP"` AND `annotationLabel.text.toLowerCase() === "parent"`. (2) `GET_CORPUS_DOCUMENTS_FOR_TOC` selected `icon` and `creator { id, slug }`, neither of which the TOC renders (`renderNode` derives the on-screen icon from `fileType` via `getFileIcon`, and creator metadata is never displayed). Fix: a new `annotation_label_text` `iexact` filter on `DocumentRelationshipFilter` (paired with the existing `relationship_type` filter) so the parent-only restriction now runs on the server; a new ultra-lean `GET_CORPUS_DOCUMENT_TOC_EDGES` query that returns just `{ id, sourceDocument { id }, targetDocument { id } }` and lives alongside (not in place of) the original `GET_DOCUMENT_RELATIONSHIPS` because `CorpusDocumentRelationships.tsx:314` and `DocumentRelationshipModal.tsx:611` still need the rich payload; `GET_CORPUS_DOCUMENTS_FOR_TOC` slimmed to `{ id, title, description, slug, fileType }`. `RunCorpusActionModal.tsx` (the only other consumer of the doc query) reads only `node.id` / `node.title`, so the field removal is safe. End result: for a 76-document corpus, the relationships payload drops from "76+ source-and-target document blobs" to "76 ID-only edge rows," up to 152 `build_absolute_uri()` icon-resolver calls and 76 description blobs are eliminated, and any non-parent relationships (notes, future link types) never leave the server. + - **Fork ≡ export+import: V2 parity refactor + roundtrip-loss fixes** (`opencontractserver/tasks/fork_tasks.py`, `opencontractserver/tasks/export_tasks_v2.py`, `opencontractserver/utils/{export_v2,import_v2,etl}.py`, `opencontractserver/tests/{test_corpus_export_import_v2.py,test_corpus_forking.py,test_ingestion_source.py}`, and shared fixture helpers `opencontractserver/tests/{_corpus_fixture,_corpus_snapshot}.py`). Replaces the legacy bespoke `fork_corpus` machinery with a thin shell that drives the V2 export → V2 import pipeline so the two code paths can no longer drift. `build_corpus_v2_zip` is the pure builder that `package_corpus_export_v2` (Celery task) now wraps; `import_corpus_v2_from_bytes` is the in-process entry point that `fork_corpus` invokes after `build_corpus_v2_zip`. Restores fields that previously dropped on round-trip (manual-metadata `Fieldset` / `Column` / `Datacell` rows, `IngestionSource` rows, `DocumentPath` and `CorpusFolder` snapshots, structural-set membership) and adds a three-roundtrip invariant test (`TestV2ThreeRoundTripDataIntegrity`) plus error-handling coverage (`CorpusForkErrorHandlingTest`). - **Behavioural change:** `fork_corpus` no longer respects selective `doc_ids` / `annotation_ids` arguments. Any caller passing those now gets a *full* fork (with a `logger.warning`). No live caller in `opencontractserver/`, `config/`, or tests still passes selective args; legacy queued Celery tasks would still run safely (full fork instead of partial). A short note here flags the contract change for downstream forks of this repo. - **Import-side correctness:** `import_metadata_schema` now clears its `column_map` on rollback so callers can't accidentally re-link freshly imported rows to pks that no longer exist. diff --git a/config/graphql/filters.py b/config/graphql/filters.py index 6e666d048..8dbaee66f 100644 --- a/config/graphql/filters.py +++ b/config/graphql/filters.py @@ -605,6 +605,10 @@ class Meta: class DocumentRelationshipFilter(django_filters.FilterSet): """Filter set for DocumentRelationship model.""" + annotation_label_text = filters.CharFilter( + field_name="annotation_label__text", lookup_expr="iexact" + ) + class Meta: model = DocumentRelationship fields = [ diff --git a/frontend/src/assets/configurations/constants.ts b/frontend/src/assets/configurations/constants.ts index 5386fd353..4d47b641b 100644 --- a/frontend/src/assets/configurations/constants.ts +++ b/frontend/src/assets/configurations/constants.ts @@ -254,6 +254,14 @@ export const DOCUMENT_RELATIONSHIP_TOC_LIMIT = 500; // Backend enforces max 100 records per page on documents connection export const CORPUS_DOCUMENTS_TOC_LIMIT = 100; +// Document relationship type / label filters used by the corpus TOC tree. +// Mirrors the backend's `RELATIONSHIP_TYPE_CHOICES` for the "RELATIONSHIP" +// member and the conventional "parent" annotation label text. Used as +// GraphQL variables so the server-side filter restricts the edges to the +// hierarchy-defining rows only. +export const DOCUMENT_RELATIONSHIP_TYPE_RELATIONSHIP = "RELATIONSHIP"; +export const DOCUMENT_RELATIONSHIP_LABEL_PARENT = "parent"; + // Document annotation index (within-document TOC) // Keep in sync with opencontractserver/constants/annotations.py export const DOCUMENT_ANNOTATION_INDEX_LIMIT = 500; diff --git a/frontend/src/components/corpuses/DocumentTableOfContents.tsx b/frontend/src/components/corpuses/DocumentTableOfContents.tsx index 09dba9b9e..3e11520b6 100644 --- a/frontend/src/components/corpuses/DocumentTableOfContents.tsx +++ b/frontend/src/components/corpuses/DocumentTableOfContents.tsx @@ -18,10 +18,9 @@ import { } from "lucide-react"; import { - GET_DOCUMENT_RELATIONSHIPS, - GetDocumentRelationshipsOutput, - GetDocumentRelationshipsInput, - DocumentRelationshipNode, + GET_CORPUS_DOCUMENT_TOC_EDGES, + GetCorpusDocumentTocEdgesInput, + GetCorpusDocumentTocEdgesOutput, GET_CORPUS_DOCUMENTS_FOR_TOC, GetCorpusDocumentsForTocInput, GetCorpusDocumentsForTocOutput, @@ -38,6 +37,8 @@ import { mediaQuery } from "./styles/corpusDesignTokens"; import { DOCUMENT_RELATIONSHIP_TOC_LIMIT, CORPUS_DOCUMENTS_TOC_LIMIT, + DOCUMENT_RELATIONSHIP_TYPE_RELATIONSHIP, + DOCUMENT_RELATIONSHIP_LABEL_PARENT, } from "../../assets/configurations/constants"; import { DocumentAnnotationIndex } from "./DocumentAnnotationIndex"; @@ -60,7 +61,6 @@ interface DocumentNode { description?: string; fileType?: string; slug?: string; - icon?: string; children: DocumentNode[]; } @@ -469,17 +469,22 @@ export const DocumentTableOfContents: React.FC< // URL-driven expand all state const expandAllFromUrl = useReactiveVar(tocExpandAll); - // Query for document relationships in this corpus + // Query for "parent"-labeled document relationship edges in this corpus. + // Uses the lean TOC-specific query that returns only source/target IDs and + // pushes the relationship_type/label filters to the server, so we don't + // fetch hundreds of unrelated rows or duplicate document metadata. const { data: relationshipsData, loading: relationshipsLoading, error: relationshipsError, - } = useQuery( - GET_DOCUMENT_RELATIONSHIPS, + } = useQuery( + GET_CORPUS_DOCUMENT_TOC_EDGES, { variables: { corpusId, first: DOCUMENT_RELATIONSHIP_TOC_LIMIT, + relationshipType: DOCUMENT_RELATIONSHIP_TYPE_RELATIONSHIP, + annotationLabelText: DOCUMENT_RELATIONSHIP_LABEL_PARENT, }, skip: !corpusId, fetchPolicy: "cache-and-network", @@ -507,7 +512,13 @@ export const DocumentTableOfContents: React.FC< const loading = relationshipsLoading || documentsLoading; const error = relationshipsError || documentsError; - // Check if we've hit the limits (potential truncation) + // Check if we've hit the limits (potential truncation). + // NOTE: `relationshipTotalCount` here is the count of *parent-labeled + // RELATIONSHIP* rows only (the server-side filter narrows the queryset + // before it is counted). It is intentionally narrower than the legacy + // `GET_DOCUMENT_RELATIONSHIPS` total, which counted every relationship + // type — a corpus with 600 total relationships but only 50 parent ones + // would have triggered the old warning and won't trigger this one. const relationshipTotalCount = relationshipsData?.documentRelationships?.totalCount ?? 0; const documentsTotalCount = documentsData?.documents?.totalCount ?? 0; @@ -532,15 +543,8 @@ export const DocumentTableOfContents: React.FC< }; } - // Filter to only "parent" labeled relationships - const parentRelationships = relationships - .map((e) => e.node) - .filter( - (rel): rel is DocumentRelationshipNode => - rel != null && - rel.relationshipType === "RELATIONSHIP" && - rel.annotationLabel?.text?.toLowerCase() === "parent" - ); + // Server-side filters already restrict edges to "parent"-labeled + // RELATIONSHIP rows, so no client-side filter is needed here. // Build a map of document info from ALL corpus documents const documentMap = new Map< @@ -551,7 +555,6 @@ export const DocumentTableOfContents: React.FC< description?: string; fileType?: string; slug?: string; - icon?: string; } >(); @@ -564,7 +567,6 @@ export const DocumentTableOfContents: React.FC< description: doc.description || undefined, fileType: doc.fileType || undefined, slug: doc.slug, - icon: doc.icon || undefined, }); }); @@ -574,33 +576,10 @@ export const DocumentTableOfContents: React.FC< const parentMap = new Map(); // child -> parent const childrenMap = new Map(); // parent -> children - parentRelationships.forEach((rel) => { - const sourceId = rel.sourceDocument.id; - const targetId = rel.targetDocument.id; - - // Update document info with richer data from relationships if available - if (rel.sourceDocument.title) { - documentMap.set(sourceId, { - ...documentMap.get(sourceId), - id: sourceId, - title: rel.sourceDocument.title || "Untitled", - description: rel.sourceDocument.description || undefined, - fileType: rel.sourceDocument.fileType || undefined, - slug: rel.sourceDocument.slug, - icon: rel.sourceDocument.icon, - }); - } - if (rel.targetDocument.title) { - documentMap.set(targetId, { - ...documentMap.get(targetId), - id: targetId, - title: rel.targetDocument.title || "Untitled", - description: rel.targetDocument.description || undefined, - fileType: rel.targetDocument.fileType || undefined, - slug: rel.targetDocument.slug, - icon: rel.targetDocument.icon, - }); - } + relationships.forEach((edge) => { + const sourceId = edge.node?.sourceDocument?.id; + const targetId = edge.node?.targetDocument?.id; + if (!sourceId || !targetId) return; // Source's parent is target (source "has parent" target) parentMap.set(sourceId, targetId); @@ -658,7 +637,6 @@ export const DocumentTableOfContents: React.FC< description: docInfo.description, fileType: docInfo.fileType, slug: docInfo.slug, - icon: docInfo.icon, children, }; }; diff --git a/frontend/src/graphql/queries.ts b/frontend/src/graphql/queries.ts index e95ce4e5c..b21dbe9e6 100644 --- a/frontend/src/graphql/queries.ts +++ b/frontend/src/graphql/queries.ts @@ -5469,7 +5469,10 @@ export const GET_DOCUMENT_RELATIONSHIPS = gql` } `; -// Lightweight query for TOC - gets all documents in a corpus with minimal fields +// Lightweight query for TOC - gets all documents in a corpus with minimal fields. +// Intentionally omits `icon` (the TOC renders an icon derived from `fileType` +// on the frontend) and `creator` (unused). Dropping these avoids one file-URL +// resolver call per document plus an extra join on every page load. export interface GetCorpusDocumentsForTocInput { corpusId: string; first?: number; @@ -5480,11 +5483,7 @@ export interface CorpusDocumentForToc { title: string; description: string | null; slug: string; - icon: string | null; fileType: string | null; - creator: { - slug: string; - }; } export interface GetCorpusDocumentsForTocOutput { @@ -5511,12 +5510,7 @@ export const GET_CORPUS_DOCUMENTS_FOR_TOC = gql` title description slug - icon fileType - creator { - id - slug - } } } totalCount @@ -5530,6 +5524,74 @@ export const GET_CORPUS_DOCUMENTS_FOR_TOC = gql` } `; +// Ultra-lean relationships query for the corpus TOC tree. +// Only source/target IDs and the relationship identity are needed to compute +// parent/child edges; document metadata is supplied by GET_CORPUS_DOCUMENTS_FOR_TOC. +// Server-side filtering on `relationshipType` and `annotationLabelText` keeps +// the result set restricted to "parent"-labeled RELATIONSHIP rows. +export interface GetCorpusDocumentTocEdgesInput { + corpusId: string; + first?: number; + relationshipType?: string; + annotationLabelText?: string; +} + +export interface CorpusDocumentTocEdge { + id: string; + // `sourceDocument` and `targetDocument` are typed nullable because the + // GraphQL schema marks every relation field as nullable by default. At the + // database level the underlying FKs on `DocumentRelationship` are non-null, + // so in practice these are always present — but consumers must still null- + // guard on the unwrapped value to keep TypeScript happy (and to remain + // safe against any future permission-scoped scrubs of the related rows). + sourceDocument: { id: string } | null; + targetDocument: { id: string } | null; +} + +export interface GetCorpusDocumentTocEdgesOutput { + documentRelationships: { + edges: Array<{ + node: CorpusDocumentTocEdge; + }>; + totalCount: number; + pageInfo: { + hasNextPage: boolean; + }; + }; +} + +export const GET_CORPUS_DOCUMENT_TOC_EDGES = gql` + query GetCorpusDocumentTocEdges( + $corpusId: ID + $first: Int + $relationshipType: String + $annotationLabelText: String + ) { + documentRelationships( + corpusId: $corpusId + first: $first + relationshipType: $relationshipType + annotationLabelText: $annotationLabelText + ) { + edges { + node { + id + sourceDocument { + id + } + targetDocument { + id + } + } + } + totalCount + pageInfo { + hasNextPage + } + } + } +`; + // ============================================================================ // CAML ARTICLE (Readme.CAML document) // ============================================================================ diff --git a/frontend/tests/DocumentTableOfContents.ct.tsx b/frontend/tests/DocumentTableOfContents.ct.tsx index 889da0f63..37aa43bca 100644 --- a/frontend/tests/DocumentTableOfContents.ct.tsx +++ b/frontend/tests/DocumentTableOfContents.ct.tsx @@ -71,7 +71,12 @@ test.describe("DocumentTableOfContents", () => { timeout: 10000, }); - await expect(page.getByText("Parent Document")).toBeVisible(); + // Use exact match — the mocked description "A parent document for testing + // hierarchy" also contains the substring "Parent Document" and would + // trigger Playwright's strict-mode violation otherwise. + await expect( + page.getByText("Parent Document", { exact: true }) + ).toBeVisible(); }); test("displays child documents", async ({ mount, page }) => { @@ -229,8 +234,9 @@ test.describe("DocumentTableOfContents", () => { timeout: 10000, }); - // Click on a document title - await page.getByText("Parent Document").click(); + // Click on a document title (exact match — the description also contains + // the substring "Parent Document"). + await page.getByText("Parent Document", { exact: true }).click(); // Navigation would happen via React Router - we can't easily test the actual navigation // but we can verify the click handler is called (no errors thrown) @@ -331,7 +337,11 @@ test.describe("DocumentTableOfContents", () => { await expect(page.getByText("Table of Contents")).toBeVisible({ timeout: 10000, }); - await expect(page.getByText("Parent Document")).toBeVisible(); + // Exact match — the mocked description "A parent document for testing + // hierarchy" also contains the substring "Parent Document". + await expect( + page.getByText("Parent Document", { exact: true }) + ).toBeVisible(); // Expand parent document — use its treeitem's chevron directly const parentItem = page.getByRole("treeitem", { diff --git a/frontend/tests/DocumentTableOfContentsTestWrapper.tsx b/frontend/tests/DocumentTableOfContentsTestWrapper.tsx index 27c7ab875..583487252 100644 --- a/frontend/tests/DocumentTableOfContentsTestWrapper.tsx +++ b/frontend/tests/DocumentTableOfContentsTestWrapper.tsx @@ -6,7 +6,7 @@ import { MemoryRouter } from "react-router-dom"; import { relayStylePagination } from "@apollo/client/utilities"; import { DocumentTableOfContents } from "../src/components/corpuses/DocumentTableOfContents"; import { - GET_DOCUMENT_RELATIONSHIPS, + GET_CORPUS_DOCUMENT_TOC_EDGES, GET_CORPUS_DOCUMENTS_FOR_TOC, GET_DOCUMENT_ANNOTATION_INDEX, } from "../src/graphql/queries"; @@ -16,6 +16,8 @@ import { CORPUS_DOCUMENTS_TOC_LIMIT, DOCUMENT_ANNOTATION_INDEX_LIMIT, OC_SECTION_LABEL, + DOCUMENT_RELATIONSHIP_TYPE_RELATIONSHIP, + DOCUMENT_RELATIONSHIP_LABEL_PARENT, } from "../src/assets/configurations/constants"; // Test corpus ID @@ -28,7 +30,9 @@ const mockCorpus = { creator: { id: "user-1", slug: "test-user" }, }; -// Mock documents for the corpus (used by GET_CORPUS_DOCUMENTS_FOR_TOC) +// Mock documents for the corpus (used by GET_CORPUS_DOCUMENTS_FOR_TOC). +// The TOC document query was slimmed down to omit `icon` and `creator` — +// the TOC derives icons from `fileType` and never displays creator info. const mockCorpusDocuments = [ { node: { @@ -36,9 +40,7 @@ const mockCorpusDocuments = [ title: "Parent Document", description: "A parent document for testing hierarchy", slug: "parent-document", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -49,9 +51,7 @@ const mockCorpusDocuments = [ title: "Child Document 1", description: "First child document", slug: "child-document-1", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -62,47 +62,30 @@ const mockCorpusDocuments = [ title: "Child Document 2", description: "Second child document", slug: "child-document-2", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", }, ]; -// Mock relationships for testing +// Mock relationships for testing. +// The TOC relationship query was slimmed down to GET_CORPUS_DOCUMENT_TOC_EDGES, +// which only fetches the relationship id + source/target document ids. Server-side +// filters keep the result set restricted to "parent"-labeled RELATIONSHIP rows, +// so the relationship rows themselves carry no relationshipType / label fields. const mockParentRelationships = [ { node: { id: "rel-1", - relationshipType: "RELATIONSHIP", - data: null, sourceDocument: { id: "doc-2", - title: "Child Document 1", - icon: null, - slug: "child-document-1", - creator: { slug: "test-user" }, + __typename: "DocumentType", }, targetDocument: { id: "doc-1", - title: "Parent Document", - icon: null, - slug: "parent-document", - creator: { slug: "test-user" }, + __typename: "DocumentType", }, - annotationLabel: { - id: "label-1", - text: "parent", - color: "#3b82f6", - icon: null, - }, - corpus: { id: TEST_CORPUS_ID }, - creator: { id: "user-1", username: "testuser" }, - created: "2025-01-01T00:00:00Z", - modified: "2025-01-01T00:00:00Z", - myPermissions: ["read"], __typename: "DocumentRelationshipType", }, __typename: "DocumentRelationshipTypeEdge", @@ -110,49 +93,29 @@ const mockParentRelationships = [ { node: { id: "rel-2", - relationshipType: "RELATIONSHIP", - data: null, sourceDocument: { id: "doc-3", - title: "Child Document 2", - icon: null, - slug: "child-document-2", - creator: { slug: "test-user" }, + __typename: "DocumentType", }, targetDocument: { id: "doc-1", - title: "Parent Document", - icon: null, - slug: "parent-document", - creator: { slug: "test-user" }, - }, - annotationLabel: { - id: "label-1", - text: "parent", - color: "#3b82f6", - icon: null, + __typename: "DocumentType", }, - corpus: { id: TEST_CORPUS_ID }, - creator: { id: "user-1", username: "testuser" }, - created: "2025-01-01T00:00:00Z", - modified: "2025-01-01T00:00:00Z", - myPermissions: ["read"], __typename: "DocumentRelationshipType", }, __typename: "DocumentRelationshipTypeEdge", }, ]; -// Deep hierarchy documents +// Deep hierarchy documents (TOC document query: no icon/creator fields). const mockDeepHierarchyDocuments = [ { node: { id: "doc-root", title: "Root Document", + description: null, slug: "root-doc", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -161,10 +124,9 @@ const mockDeepHierarchyDocuments = [ node: { id: "doc-level1", title: "Level 1 Document", + description: null, slug: "level-1", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -173,10 +135,9 @@ const mockDeepHierarchyDocuments = [ node: { id: "doc-level2", title: "Level 2 Document", + description: null, slug: "level-2", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -185,10 +146,9 @@ const mockDeepHierarchyDocuments = [ node: { id: "doc-level3", title: "Level 3 Document", + description: null, slug: "level-3", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -197,49 +157,24 @@ const mockDeepHierarchyDocuments = [ node: { id: "doc-level4", title: "Level 4 Document", + description: null, slug: "level-4", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", }, ]; -// Deep hierarchy relationships (5 levels: Root -> Level1 -> Level2 -> Level3 -> Level4) +// Deep hierarchy relationships (5 levels: Root -> Level1 -> Level2 -> Level3 -> Level4). +// Uses the lean GET_CORPUS_DOCUMENT_TOC_EDGES shape — only IDs are returned. const mockDeepHierarchy = [ // Level1 -> Root { node: { id: "rel-deep-1", - relationshipType: "RELATIONSHIP", - data: null, - sourceDocument: { - id: "doc-level1", - title: "Level 1 Document", - icon: null, - slug: "level-1", - creator: { slug: "test-user" }, - }, - targetDocument: { - id: "doc-root", - title: "Root Document", - icon: null, - slug: "root-doc", - creator: { slug: "test-user" }, - }, - annotationLabel: { - id: "label-1", - text: "parent", - color: "#3b82f6", - icon: null, - }, - corpus: { id: TEST_CORPUS_ID }, - creator: { id: "user-1", username: "testuser" }, - created: "2025-01-01T00:00:00Z", - modified: "2025-01-01T00:00:00Z", - myPermissions: ["read"], + sourceDocument: { id: "doc-level1", __typename: "DocumentType" }, + targetDocument: { id: "doc-root", __typename: "DocumentType" }, __typename: "DocumentRelationshipType", }, __typename: "DocumentRelationshipTypeEdge", @@ -248,33 +183,8 @@ const mockDeepHierarchy = [ { node: { id: "rel-deep-2", - relationshipType: "RELATIONSHIP", - data: null, - sourceDocument: { - id: "doc-level2", - title: "Level 2 Document", - icon: null, - slug: "level-2", - creator: { slug: "test-user" }, - }, - targetDocument: { - id: "doc-level1", - title: "Level 1 Document", - icon: null, - slug: "level-1", - creator: { slug: "test-user" }, - }, - annotationLabel: { - id: "label-1", - text: "parent", - color: "#3b82f6", - icon: null, - }, - corpus: { id: TEST_CORPUS_ID }, - creator: { id: "user-1", username: "testuser" }, - created: "2025-01-01T00:00:00Z", - modified: "2025-01-01T00:00:00Z", - myPermissions: ["read"], + sourceDocument: { id: "doc-level2", __typename: "DocumentType" }, + targetDocument: { id: "doc-level1", __typename: "DocumentType" }, __typename: "DocumentRelationshipType", }, __typename: "DocumentRelationshipTypeEdge", @@ -283,33 +193,8 @@ const mockDeepHierarchy = [ { node: { id: "rel-deep-3", - relationshipType: "RELATIONSHIP", - data: null, - sourceDocument: { - id: "doc-level3", - title: "Level 3 Document", - icon: null, - slug: "level-3", - creator: { slug: "test-user" }, - }, - targetDocument: { - id: "doc-level2", - title: "Level 2 Document", - icon: null, - slug: "level-2", - creator: { slug: "test-user" }, - }, - annotationLabel: { - id: "label-1", - text: "parent", - color: "#3b82f6", - icon: null, - }, - corpus: { id: TEST_CORPUS_ID }, - creator: { id: "user-1", username: "testuser" }, - created: "2025-01-01T00:00:00Z", - modified: "2025-01-01T00:00:00Z", - myPermissions: ["read"], + sourceDocument: { id: "doc-level3", __typename: "DocumentType" }, + targetDocument: { id: "doc-level2", __typename: "DocumentType" }, __typename: "DocumentRelationshipType", }, __typename: "DocumentRelationshipTypeEdge", @@ -318,33 +203,8 @@ const mockDeepHierarchy = [ { node: { id: "rel-deep-4", - relationshipType: "RELATIONSHIP", - data: null, - sourceDocument: { - id: "doc-level4", - title: "Level 4 Document", - icon: null, - slug: "level-4", - creator: { slug: "test-user" }, - }, - targetDocument: { - id: "doc-level3", - title: "Level 3 Document", - icon: null, - slug: "level-3", - creator: { slug: "test-user" }, - }, - annotationLabel: { - id: "label-1", - text: "parent", - color: "#3b82f6", - icon: null, - }, - corpus: { id: TEST_CORPUS_ID }, - creator: { id: "user-1", username: "testuser" }, - created: "2025-01-01T00:00:00Z", - modified: "2025-01-01T00:00:00Z", - myPermissions: ["read"], + sourceDocument: { id: "doc-level4", __typename: "DocumentType" }, + targetDocument: { id: "doc-level3", __typename: "DocumentType" }, __typename: "DocumentRelationshipType", }, __typename: "DocumentRelationshipTypeEdge", @@ -476,7 +336,12 @@ const emptyAnnotationIndexMock = (documentId: string): MockedResponse => ({ }, }); -// Cache configuration +// Cache configuration. +// NOTE: keyArgs must match GraphQL FIELD ARGUMENT names, not variable names. +// The lean TOC edges query uses `corpusId`, `relationshipType`, and +// `annotationLabelText` as field arguments on `documentRelationships`, so the +// pagination key must include all three to isolate TOC results from any other +// `documentRelationships` cache entries. const createTestCache = () => new InMemoryCache({ typePolicies: { @@ -485,6 +350,8 @@ const createTestCache = () => documentRelationships: relayStylePagination([ "corpusId", "documentId", + "relationshipType", + "annotationLabelText", ]), documents: relayStylePagination(["inCorpusWithId"]), annotations: relayStylePagination([ @@ -533,9 +400,14 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ // Build mocks based on mockType const getMocks = (): MockedResponse[] => { + // The lean TOC edges query supplies the relationship_type / label filters + // server-side; they must match the variables the component sends exactly, + // since MockedProvider matches mocks by deep-equal variable comparison. const relationshipsVariables = { corpusId: TEST_CORPUS_ID, first: DOCUMENT_RELATIONSHIP_TOC_LIMIT, + relationshipType: DOCUMENT_RELATIONSHIP_TYPE_RELATIONSHIP, + annotationLabelText: DOCUMENT_RELATIONSHIP_LABEL_PARENT, }; const documentsVariables = { @@ -570,7 +442,7 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ // Empty corpus - no documents const emptyRelationshipsMock = { request: { - query: GET_DOCUMENT_RELATIONSHIPS, + query: GET_CORPUS_DOCUMENT_TOC_EDGES, variables: relationshipsVariables, }, result: { @@ -580,9 +452,6 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ totalCount: 0, pageInfo: { hasNextPage: false, - hasPreviousPage: false, - startCursor: null, - endCursor: null, }, __typename: "DocumentRelationshipTypeConnection", }, @@ -637,7 +506,7 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ if (mockType === "singleStandalone") { const emptyRelationshipsMock = { request: { - query: GET_DOCUMENT_RELATIONSHIPS, + query: GET_CORPUS_DOCUMENT_TOC_EDGES, variables: relationshipsVariables, }, result: { @@ -647,9 +516,6 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ totalCount: 0, pageInfo: { hasNextPage: false, - hasPreviousPage: false, - startCursor: null, - endCursor: null, }, __typename: "DocumentRelationshipTypeConnection", }, @@ -663,9 +529,7 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ title: "Single Standalone Document", description: "Only document in this corpus", slug: "single-standalone-document", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -684,49 +548,22 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ // Documents exist but no parent relationships - shows docs as standalone root items const noParentRelsMock = { request: { - query: GET_DOCUMENT_RELATIONSHIPS, + query: GET_CORPUS_DOCUMENT_TOC_EDGES, variables: relationshipsVariables, }, result: { data: { + // The lean TOC edges query already applies server-side filters for + // relationshipType="RELATIONSHIP" + annotationLabelText="parent", + // so non-parent relationships (e.g. NOTES) never come back to the + // client. The mock therefore returns an empty edge list, and the + // two documents render as standalone root items via + // GET_CORPUS_DOCUMENTS_FOR_TOC. documentRelationships: { - edges: [ - { - node: { - id: "rel-other", - relationshipType: "NOTES", // Not a parent relationship - data: null, - sourceDocument: { - id: "doc-a", - title: "Doc A", - icon: null, - slug: "doc-a", - creator: { slug: "test-user" }, - }, - targetDocument: { - id: "doc-b", - title: "Doc B", - icon: null, - slug: "doc-b", - creator: { slug: "test-user" }, - }, - annotationLabel: null, - corpus: { id: TEST_CORPUS_ID }, - creator: { id: "user-1", username: "testuser" }, - created: "2025-01-01T00:00:00Z", - modified: "2025-01-01T00:00:00Z", - myPermissions: ["read"], - __typename: "DocumentRelationshipType", - }, - __typename: "DocumentRelationshipTypeEdge", - }, - ], - totalCount: 1, + edges: [], + totalCount: 0, pageInfo: { hasNextPage: false, - hasPreviousPage: false, - startCursor: null, - endCursor: null, }, __typename: "DocumentRelationshipTypeConnection", }, @@ -739,10 +576,9 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ node: { id: "doc-a", title: "Doc A", + description: null, slug: "doc-a", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -751,10 +587,9 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ node: { id: "doc-b", title: "Doc B", + description: null, slug: "doc-b", - icon: null, fileType: "application/pdf", - creator: { slug: "test-user" }, __typename: "DocumentType", }, __typename: "DocumentTypeEdge", @@ -784,7 +619,7 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ // Return duplicate mocks for cache-and-network fetch policy const relationshipsMock = { request: { - query: GET_DOCUMENT_RELATIONSHIPS, + query: GET_CORPUS_DOCUMENT_TOC_EDGES, variables: relationshipsVariables, }, result: { @@ -794,9 +629,6 @@ export const DocumentTableOfContentsTestWrapper: React.FC = ({ totalCount: relationshipsMockData.length, pageInfo: { hasNextPage: false, - hasPreviousPage: false, - startCursor: null, - endCursor: null, }, __typename: "DocumentRelationshipTypeConnection", }, diff --git a/opencontractserver/tests/test_document_relationships.py b/opencontractserver/tests/test_document_relationships.py index 450d6307a..5f2f17ecc 100644 --- a/opencontractserver/tests/test_document_relationships.py +++ b/opencontractserver/tests/test_document_relationships.py @@ -215,3 +215,63 @@ def test_document_all_relationships_query(self): ) # Should have both relationship and note relationship_types = {r["relationshipType"] for r in relationships} self.assertEqual(relationship_types, {"RELATIONSHIP", "NOTES"}) + + def test_document_relationships_annotation_label_text_filter(self): + """ + The corpus Table of Contents query relies on a server-side + `annotationLabelText` filter (defined as `annotation_label_text` + with `iexact` lookup on `DocumentRelationshipFilter`) to restrict + edges to only the parent-labeled hierarchy rows. Pin the behavior + so a future refactor cannot silently drop the filter and revert + the TOC to fetching every relationship row. + """ + # Build a second relationship whose label is "parent" so we can + # prove the filter narrows on label text (case-insensitively). + parent_label = AnnotationLabel.objects.create( + text="parent", + label_type="DOC_RELATIONSHIP_LABEL", + creator=self.user, + ) + parent_relationship = DocumentRelationship.objects.create( + source_document=self.source_doc, + target_document=self.target_doc, + relationship_type="RELATIONSHIP", + annotation_label=parent_label, + creator=self.user, + corpus=self.corpus, + ) + + # Sanity check: the corpus has 3 relationships total (the two from + # `setUp` plus the parent-labeled one we just made). The filter + # must return exactly 1 edge — only the parent-labeled relationship. + corpus_gid = to_global_id("CorpusType", self.corpus.id) + query = """ + query($corpusId: ID, $labelText: String) { + documentRelationships( + corpusId: $corpusId + annotationLabelText: $labelText + ) { + edges { + node { + id + relationshipType + annotationLabel { text } + } + } + totalCount + } + } + """ + # Mixed case input — the filter uses `iexact` so "PARENT" must match. + result = self.client.execute( + query, variables={"corpusId": corpus_gid, "labelText": "PARENT"} + ) + self.assertIsNone(result.get("errors")) + rels = result["data"]["documentRelationships"] + self.assertEqual(rels["totalCount"], 1) + self.assertEqual(len(rels["edges"]), 1) + self.assertEqual( + rels["edges"][0]["node"]["id"], + to_global_id("DocumentRelationshipType", parent_relationship.id), + ) + self.assertEqual(rels["edges"][0]["node"]["annotationLabel"]["text"], "parent")