From bfd42c1ecd2c4bbc17ff6e0674f8fb6e1ceabffa Mon Sep 17 00:00:00 2001 From: Dayenne Souza Date: Tue, 3 Mar 2026 10:18:34 -0300 Subject: [PATCH] remove relationships with phantom entities (#2261) * filter phantom relationships * fix flush size --- .../patch-20260302221432185149.json | 4 + .../index/operations/embed_text/embed_text.py | 10 +- .../operations/extract_graph/extract_graph.py | 2 + .../index/operations/extract_graph/utils.py | 53 +++ .../update_entities_relationships.py | 7 + .../operations/embed_text/test_embed_text.py | 10 +- .../indexing/operations/test_extract_graph.py | 301 ++++++++++++++++++ tests/unit/indexing/update/__init__.py | 2 + .../update/test_update_relationships.py | 227 +++++++++++++ 9 files changed, 612 insertions(+), 4 deletions(-) create mode 100644 .semversioner/next-release/patch-20260302221432185149.json create mode 100644 packages/graphrag/graphrag/index/operations/extract_graph/utils.py create mode 100644 tests/unit/indexing/operations/test_extract_graph.py create mode 100644 tests/unit/indexing/update/__init__.py create mode 100644 tests/unit/indexing/update/test_update_relationships.py diff --git a/.semversioner/next-release/patch-20260302221432185149.json b/.semversioner/next-release/patch-20260302221432185149.json new file mode 100644 index 000000000..9dd0aaa23 --- /dev/null +++ b/.semversioner/next-release/patch-20260302221432185149.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "filter phantom relationships in graph" +} diff --git a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py index 9c4366f3d..59424272d 100644 --- a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py +++ b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py @@ -33,12 +33,18 @@ async def embed_text( id_column: str = "id", output_table: Table | None = None, ) -> int: - """Embed text from a streaming Table into a vector store.""" + """Embed text from a streaming Table into a vector store. + + Rows are buffered before flushing to ``run_embed_text``, + which dispatches API batches concurrently up to + ``num_threads``. The buffer is sized so each flush produces + enough batches to saturate the concurrency limit. + """ vector_store.create_index() buffer: list[dict[str, Any]] = [] total_rows = 0 - flush_size = batch_size * 4 + flush_size = batch_size * num_threads async for row in input_table: text = row.get(embed_column) diff --git a/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py b/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py index 7ab881c2a..7ba79dce8 100644 --- a/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py +++ b/packages/graphrag/graphrag/index/operations/extract_graph/extract_graph.py @@ -11,6 +11,7 @@ from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks from graphrag.config.enums import AsyncType from graphrag.index.operations.extract_graph.graph_extractor import GraphExtractor +from graphrag.index.operations.extract_graph.utils import filter_orphan_relationships from graphrag.index.utils.derive_from_rows import derive_from_rows if TYPE_CHECKING: @@ -67,6 +68,7 @@ async def run_strategy(row): entities = _merge_entities(entity_dfs) relationships = _merge_relationships(relationship_dfs) + relationships = filter_orphan_relationships(relationships, entities) return (entities, relationships) diff --git a/packages/graphrag/graphrag/index/operations/extract_graph/utils.py b/packages/graphrag/graphrag/index/operations/extract_graph/utils.py new file mode 100644 index 000000000..82b361007 --- /dev/null +++ b/packages/graphrag/graphrag/index/operations/extract_graph/utils.py @@ -0,0 +1,53 @@ +# Copyright (C) 2026 Microsoft Corporation. +# Licensed under the MIT License + +"""Utility functions for graph extraction operations.""" + +import logging + +import pandas as pd + +logger = logging.getLogger(__name__) + + +def filter_orphan_relationships( + relationships: pd.DataFrame, + entities: pd.DataFrame, +) -> pd.DataFrame: + """Remove relationships whose source or target has no entity entry. + + After LLM graph extraction, the model may hallucinate entity + names in relationships that have no corresponding entity row. + This function drops those dangling references so downstream + processing never encounters broken graph edges. + + Parameters + ---------- + relationships: + Merged relationship DataFrame with at least ``source`` + and ``target`` columns. + entities: + Merged entity DataFrame with at least a ``title`` column. + + Returns + ------- + pd.DataFrame + Relationships filtered to only those whose ``source`` + and ``target`` both appear in ``entities["title"]``. + """ + if relationships.empty or entities.empty: + return relationships.iloc[0:0].reset_index(drop=True) + + entity_titles = set(entities["title"]) + before_count = len(relationships) + mask = relationships["source"].isin(entity_titles) & relationships["target"].isin( + entity_titles + ) + filtered = relationships[mask].reset_index(drop=True) + dropped = before_count - len(filtered) + if dropped > 0: + logger.warning( + "Dropped %d relationship(s) referencing non-existent entities.", + dropped, + ) + return filtered diff --git a/packages/graphrag/graphrag/index/workflows/update_entities_relationships.py b/packages/graphrag/graphrag/index/workflows/update_entities_relationships.py index 6c3937a99..eb8a65010 100644 --- a/packages/graphrag/graphrag/index/workflows/update_entities_relationships.py +++ b/packages/graphrag/graphrag/index/workflows/update_entities_relationships.py @@ -14,6 +14,9 @@ from graphrag.callbacks.workflow_callbacks import WorkflowCallbacks from graphrag.config.models.graph_rag_config import GraphRagConfig from graphrag.data_model.data_reader import DataReader +from graphrag.index.operations.extract_graph.utils import ( + filter_orphan_relationships, +) from graphrag.index.run.utils import get_update_table_providers from graphrag.index.typing.context import PipelineRunContext from graphrag.index.typing.workflow import WorkflowFunctionOutput @@ -79,6 +82,10 @@ async def _update_entities_and_relationships( delta_relationships, ) + merged_relationships_df = filter_orphan_relationships( + merged_relationships_df, merged_entities_df + ) + summarization_model_config = config.get_completion_model_config( config.summarize_descriptions.completion_model_id ) diff --git a/tests/unit/indexing/operations/embed_text/test_embed_text.py b/tests/unit/indexing/operations/embed_text/test_embed_text.py index 7a232b272..d55ab09ff 100644 --- a/tests/unit/indexing/operations/embed_text/test_embed_text.py +++ b/tests/unit/indexing/operations/embed_text/test_embed_text.py @@ -150,7 +150,13 @@ async def test_embed_text_basic(): @pytest.mark.asyncio async def test_embed_text_batching(): - """Verify rows are flushed in batches when buffer exceeds batch_size * 4.""" + """Verify rows are flushed in batches sized by batch_size * num_threads. + + With batch_size=2 and num_threads=4, each flush holds up to + 8 rows (enough to produce 4 API batches that saturate the + concurrency limit). 10 rows should produce 2 flushes: + one of 8 rows and a final remainder of 2. + """ rows = [{"id": str(i), "text": f"text {i}"} for i in range(10)] input_table = FakeInputTable(rows) vector_store = _make_mock_vector_store() @@ -172,7 +178,7 @@ async def test_embed_text_batching(): embed_column="text", batch_size=2, batch_max_tokens=8191, - num_threads=1, + num_threads=4, vector_store=vector_store, ) diff --git a/tests/unit/indexing/operations/test_extract_graph.py b/tests/unit/indexing/operations/test_extract_graph.py new file mode 100644 index 000000000..143f9c21f --- /dev/null +++ b/tests/unit/indexing/operations/test_extract_graph.py @@ -0,0 +1,301 @@ +# Copyright (C) 2026 Microsoft Corporation. +# Licensed under the MIT License + +"""Tests for extract_graph merge and orphan-filtering operations. + +Validates that _merge_entities, _merge_relationships, and +filter_orphan_relationships correctly aggregate per-text-unit +extraction results and remove relationships whose source or +target has no corresponding entity. +""" + +import pandas as pd +from graphrag.index.operations.extract_graph.extract_graph import ( + _merge_entities, + _merge_relationships, +) +from graphrag.index.operations.extract_graph.utils import ( + filter_orphan_relationships, +) + + +def _entity_row( + title: str, + entity_type: str = "THING", + description: str = "desc", + source_id: str = "tu1", +) -> dict: + """Build a single raw entity row as produced by the graph extractor.""" + return { + "title": title, + "type": entity_type, + "description": description, + "source_id": source_id, + } + + +def _relationship_row( + source: str, + target: str, + weight: float = 1.0, + description: str = "desc", + source_id: str = "tu1", +) -> dict: + """Build a single raw relationship row as produced by the graph extractor.""" + return { + "source": source, + "target": target, + "weight": weight, + "description": description, + "source_id": source_id, + } + + +class TestMergeEntities: + """Tests for the _merge_entities aggregation helper.""" + + def test_groups_by_title_and_type(self): + """Entities with the same title+type merge into one row.""" + df1 = pd.DataFrame([_entity_row("A", "PERSON")]) + df2 = pd.DataFrame([_entity_row("A", "PERSON", source_id="tu2")]) + merged = _merge_entities([df1, df2]) + + assert len(merged) == 1 + assert merged.iloc[0]["title"] == "A" + assert merged.iloc[0]["frequency"] == 2 + + def test_different_types_stay_separate(self): + """Same title but different type should not merge.""" + df = pd.DataFrame([ + _entity_row("A", "PERSON"), + _entity_row("A", "ORG"), + ]) + merged = _merge_entities([df]) + + assert len(merged) == 2 + + def test_empty_input(self): + """Empty entity list should produce an empty DataFrame.""" + df = pd.DataFrame(columns=["title", "type", "description", "source_id"]) + merged = _merge_entities([df]) + + assert len(merged) == 0 + + +class TestMergeRelationships: + """Tests for the _merge_relationships aggregation helper.""" + + def test_groups_by_source_target(self): + """Relationships with same source+target merge and sum weight.""" + df1 = pd.DataFrame([_relationship_row("A", "B", weight=2.0)]) + df2 = pd.DataFrame([_relationship_row("A", "B", weight=3.0)]) + merged = _merge_relationships([df1, df2]) + + assert len(merged) == 1 + assert merged.iloc[0]["weight"] == 5.0 + + def test_distinct_pairs_stay_separate(self): + """Different source-target pairs remain separate rows.""" + df = pd.DataFrame([ + _relationship_row("A", "B"), + _relationship_row("B", "C"), + ]) + merged = _merge_relationships([df]) + + assert len(merged) == 2 + + def test_empty_input(self): + """Empty relationship list should produce an empty DataFrame.""" + df = pd.DataFrame( + columns=["source", "target", "weight", "description", "source_id"] + ) + merged = _merge_relationships([df]) + + assert len(merged) == 0 + + +class TestFilterOrphanRelationships: + """Tests for orphan relationship filtering. + + After LLM graph extraction, relationships may reference entity + names that have no corresponding entity row. These must be + removed before downstream processing. + """ + + def test_all_valid_relationships_kept(self): + """Relationships whose endpoints all exist should be retained.""" + entities = pd.DataFrame([ + _entity_row("A"), + _entity_row("B"), + _entity_row("C"), + ]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("A", "B"), + _relationship_row("B", "C"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 2 + + def test_removes_relationship_with_missing_source(self): + """Relationship whose source has no entity entry is dropped.""" + entities = pd.DataFrame([_entity_row("B")]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("PHANTOM", "B"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 0 + + def test_removes_relationship_with_missing_target(self): + """Relationship whose target has no entity entry is dropped.""" + entities = pd.DataFrame([_entity_row("A")]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("A", "PHANTOM"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 0 + + def test_removes_relationship_with_both_missing(self): + """Relationship where both endpoints are missing is dropped.""" + entities = pd.DataFrame([_entity_row("A")]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("GHOST_1", "GHOST_2"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 0 + + def test_keeps_valid_drops_orphan_mixed(self): + """Valid and orphaned relationships coexist; only valid survive.""" + entities = pd.DataFrame([ + _entity_row("A"), + _entity_row("B"), + ]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("A", "B"), + _relationship_row("A", "PHANTOM"), + _relationship_row("PHANTOM", "B"), + _relationship_row("GHOST_1", "GHOST_2"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 1 + assert filtered.iloc[0]["source"] == "A" + assert filtered.iloc[0]["target"] == "B" + + def test_empty_entities_drops_all_relationships(self): + """If there are no entities, all relationships are orphaned.""" + entities = pd.DataFrame(columns=["title", "type", "description", "source_id"]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("A", "B"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 0 + + def test_empty_relationships_returns_empty(self): + """If there are no relationships, result is empty DataFrame.""" + entities = pd.DataFrame([_entity_row("A")]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame( + columns=["source", "target", "weight", "description", "source_id"] + ) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 0 + + def test_preserves_all_columns(self): + """Filtered DataFrame retains all original columns.""" + entities = pd.DataFrame([ + _entity_row("A"), + _entity_row("B"), + ]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("A", "B", weight=5.0, description="linked"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert set(filtered.columns) == set(merged_rels.columns) + assert filtered.iloc[0]["weight"] == 5.0 + assert filtered.iloc[0]["description"] == ["linked"] + + def test_multi_text_unit_orphan(self): + """Orphan detected across multiple text units after merge.""" + df1 = pd.DataFrame([ + _entity_row("A", source_id="tu1"), + _relationship_row("A", "HALLUCINATED", source_id="tu1"), + ]) + df2 = pd.DataFrame([ + _entity_row("A", source_id="tu2"), + _relationship_row("A", "HALLUCINATED", source_id="tu2"), + ]) + + entity_dfs = [ + df1[["title", "type", "description", "source_id"]], + df2[["title", "type", "description", "source_id"]], + ] + rel_dfs = [ + df1[["source", "target", "weight", "description", "source_id"]], + df2[["source", "target", "weight", "description", "source_id"]], + ] + + merged_entities = _merge_entities(entity_dfs) + merged_rels = _merge_relationships(rel_dfs) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 0 + + def test_resets_index_after_filter(self): + """Filtered DataFrame should have a clean 0-based index.""" + entities = pd.DataFrame([ + _entity_row("A"), + _entity_row("B"), + _entity_row("C"), + ]) + merged_entities = _merge_entities([entities]) + + relationships = pd.DataFrame([ + _relationship_row("PHANTOM", "B"), + _relationship_row("A", "B"), + _relationship_row("A", "PHANTOM"), + _relationship_row("B", "C"), + ]) + merged_rels = _merge_relationships([relationships]) + + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert list(filtered.index) == list(range(len(filtered))) diff --git a/tests/unit/indexing/update/__init__.py b/tests/unit/indexing/update/__init__.py new file mode 100644 index 000000000..7c0ab1513 --- /dev/null +++ b/tests/unit/indexing/update/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2026 Microsoft Corporation. +# Licensed under the MIT License diff --git a/tests/unit/indexing/update/test_update_relationships.py b/tests/unit/indexing/update/test_update_relationships.py new file mode 100644 index 000000000..996e84de2 --- /dev/null +++ b/tests/unit/indexing/update/test_update_relationships.py @@ -0,0 +1,227 @@ +# Copyright (C) 2026 Microsoft Corporation. +# Licensed under the MIT License + +"""Tests for incremental update merge operations. + +Covers _update_and_merge_relationships and orphan-filtering +in the update pipeline, where old finalized data is merged +with delta data from a new indexing run. +""" + +import pandas as pd +from graphrag.index.operations.extract_graph.utils import ( + filter_orphan_relationships, +) +from graphrag.index.update.relationships import ( + _update_and_merge_relationships, +) + + +def _finalized_entity_row( + title: str, + entity_id: str = "e1", + human_readable_id: int = 0, + entity_type: str = "THING", + description: str = "desc", + frequency: int = 1, + degree: int = 1, +) -> dict: + """Build a finalized entity row matching ENTITIES_FINAL_COLUMNS shape.""" + return { + "id": entity_id, + "human_readable_id": human_readable_id, + "title": title, + "type": entity_type, + "description": description, + "text_unit_ids": ["tu1"], + "frequency": frequency, + "degree": degree, + } + + +def _finalized_relationship_row( + source: str, + target: str, + relationship_id: str = "r1", + human_readable_id: int = 0, + weight: float = 1.0, + description: str = "desc", + combined_degree: int = 2, +) -> dict: + """Build a finalized relationship row matching RELATIONSHIPS_FINAL_COLUMNS.""" + return { + "id": relationship_id, + "human_readable_id": human_readable_id, + "source": source, + "target": target, + "description": description, + "weight": weight, + "combined_degree": combined_degree, + "text_unit_ids": ["tu1"], + } + + +class TestUpdateAndMergeRelationships: + """Tests for _update_and_merge_relationships.""" + + def test_merges_old_and_delta(self): + """Old and delta relationships with distinct pairs both appear.""" + old = pd.DataFrame([ + _finalized_relationship_row("A", "B", relationship_id="r1"), + ]) + delta = pd.DataFrame([ + _finalized_relationship_row("C", "D", relationship_id="r2"), + ]) + merged = _update_and_merge_relationships(old, delta) + + pairs = set(zip(merged["source"], merged["target"], strict=True)) + assert ("A", "B") in pairs + assert ("C", "D") in pairs + assert len(merged) == 2 + + def test_overlapping_pairs_aggregate(self): + """Same source+target in old and delta get grouped together.""" + old = pd.DataFrame([ + _finalized_relationship_row("A", "B", relationship_id="r1", weight=2.0), + ]) + delta = pd.DataFrame([ + _finalized_relationship_row("A", "B", relationship_id="r2", weight=4.0), + ]) + merged = _update_and_merge_relationships(old, delta) + + assert len(merged) == 1 + assert merged.iloc[0]["weight"] == 3.0 # mean of 2.0 and 4.0 + + def test_human_readable_ids_incremented(self): + """Delta human_readable_ids should be offset by old max + 1.""" + old = pd.DataFrame([ + _finalized_relationship_row("A", "B", human_readable_id=5), + ]) + delta = pd.DataFrame([ + _finalized_relationship_row("C", "D", human_readable_id=0), + ]) + merged = _update_and_merge_relationships(old, delta) + + ids = set(merged["human_readable_id"]) + assert len(ids) == 2 + + +class TestUpdatePathOrphanFiltering: + """Tests that orphan relationships are caught in the update path. + + The update pipeline merges old finalized entities with delta + entities, then merges old finalized relationships with delta + relationships. Delta relationships from LLM extraction may + reference hallucinated entity names that don't exist in the + merged entity set. + """ + + def test_delta_introduces_orphan_source(self): + """Delta relationship with hallucinated source is filtered out.""" + merged_entities = pd.DataFrame([ + _finalized_entity_row("A", entity_id="e1"), + _finalized_entity_row("B", entity_id="e2"), + ]) + + old_rels = pd.DataFrame([ + _finalized_relationship_row("A", "B", relationship_id="r1"), + ]) + delta_rels = pd.DataFrame([ + _finalized_relationship_row("HALLUCINATED", "B", relationship_id="r2"), + ]) + merged_rels = _update_and_merge_relationships(old_rels, delta_rels) + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 1 + assert filtered.iloc[0]["source"] == "A" + + def test_delta_introduces_orphan_target(self): + """Delta relationship with hallucinated target is filtered out.""" + merged_entities = pd.DataFrame([ + _finalized_entity_row("A", entity_id="e1"), + _finalized_entity_row("B", entity_id="e2"), + ]) + + old_rels = pd.DataFrame([ + _finalized_relationship_row("A", "B", relationship_id="r1"), + ]) + delta_rels = pd.DataFrame([ + _finalized_relationship_row("A", "HALLUCINATED", relationship_id="r2"), + ]) + merged_rels = _update_and_merge_relationships(old_rels, delta_rels) + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 1 + assert filtered.iloc[0]["target"] == "B" + + def test_delta_introduces_orphan_both_endpoints(self): + """Delta relationship where both endpoints are hallucinated.""" + merged_entities = pd.DataFrame([ + _finalized_entity_row("A", entity_id="e1"), + _finalized_entity_row("B", entity_id="e2"), + ]) + + old_rels = pd.DataFrame([ + _finalized_relationship_row( + "A", "B", relationship_id="r0", human_readable_id=0 + ), + ]) + delta_rels = pd.DataFrame([ + _finalized_relationship_row("GHOST_1", "GHOST_2", relationship_id="r1"), + ]) + merged_rels = _update_and_merge_relationships(old_rels, delta_rels) + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 1 + assert filtered.iloc[0]["source"] == "A" + assert filtered.iloc[0]["target"] == "B" + + def test_all_valid_after_update(self): + """When all endpoints exist, nothing is filtered.""" + merged_entities = pd.DataFrame([ + _finalized_entity_row("A", entity_id="e1"), + _finalized_entity_row("B", entity_id="e2"), + _finalized_entity_row("C", entity_id="e3"), + ]) + + old_rels = pd.DataFrame([ + _finalized_relationship_row("A", "B", relationship_id="r1"), + ]) + delta_rels = pd.DataFrame([ + _finalized_relationship_row("B", "C", relationship_id="r2"), + ]) + merged_rels = _update_and_merge_relationships(old_rels, delta_rels) + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + assert len(filtered) == 2 + + def test_old_relationship_becomes_orphan_after_entity_merge(self): + """Edge case: entity removed in delta makes old relationship orphan. + + This can happen if entity resolution during merge drops an + entity that was previously referenced by a relationship. + """ + merged_entities = pd.DataFrame([ + _finalized_entity_row("A", entity_id="e1"), + _finalized_entity_row("B", entity_id="e2"), + ]) + + old_rels = pd.DataFrame([ + _finalized_relationship_row( + "A", "REMOVED", relationship_id="r1", human_readable_id=0 + ), + _finalized_relationship_row( + "A", "B", relationship_id="r2", human_readable_id=1 + ), + ]) + delta_rels = pd.DataFrame([ + _finalized_relationship_row( + "B", "A", relationship_id="r3", human_readable_id=0 + ), + ]) + merged_rels = _update_and_merge_relationships(old_rels, delta_rels) + filtered = filter_orphan_relationships(merged_rels, merged_entities) + + surviving_pairs = set(zip(filtered["source"], filtered["target"], strict=True)) + assert ("A", "REMOVED") not in surviving_pairs + assert len(filtered) >= 1