Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/distillery/graph/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import math
from typing import Any

from distillery.graph import nx
Expand All @@ -25,3 +26,64 @@ def communities(g: Any) -> list[set[str]]:
return []
undirected = g.to_undirected() if g.is_directed() else g
return list(nx.community.louvain_communities(undirected))


def constraint(g: Any, *, k: int = 10) -> list[tuple[str, float]]:
"""Top-k structural-hole brokers by Burt's constraint (ascending).

Computed on the undirected projection. Burt's constraint is *low* for nodes
that bridge otherwise-disconnected neighbours (a structural hole / broker)
and *high* for nodes embedded in a dense, redundant clique. Results are
sorted ascending, so the first entries are the strongest brokers.

Nodes with no neighbours (constraint is NaN) are excluded.
"""
_require_networkx()
if g.number_of_nodes() == 0:
return []
undirected = g.to_undirected() if g.is_directed() else g
raw = nx.constraint(undirected)
ranked = [
(node, float(value))
for node, value in raw.items()
if value is not None and not math.isnan(value)
]
ranked.sort(key=lambda kv: kv[1])
return ranked[:k]


def link_prediction(
g: Any, *, source: str | None = None, k: int = 10
) -> list[tuple[str, str, float]]:
"""Top-k predicted edges by the Adamic-Adar index (descending).

Adamic-Adar scores a candidate (non-existent) edge by its shared
neighbours, weighting each by ``1 / log(degree)`` so a connection through a
niche shared node counts more than one through a hub. Computed on the
undirected projection.

When *source* is given, only candidate edges from that node to its
non-neighbours are scored (emerging adjacencies for one entry); the source
must be a node in the graph or an empty list is returned. When *source* is
``None``, all non-existent edges are scored — bound the graph first (e.g.
via ``scope="ego"``) since this is quadratic in node count.

Returns a list of ``(source, target, score)`` tuples.
"""
_require_networkx()
if g.number_of_nodes() == 0:
return []
undirected = g.to_undirected() if g.is_directed() else g
ebunch: list[tuple[str, str]] | None
if source is not None:
if source not in undirected:
return []
excluded = set(undirected[source])
excluded.add(source)
ebunch = [(source, target) for target in undirected.nodes if target not in excluded]
if not ebunch:
return []
else:
ebunch = None
ranked = sorted(nx.adamic_adar_index(undirected, ebunch), key=lambda t: t[2], reverse=True)
return [(u, v, float(p)) for u, v, p in ranked[:k]]
11 changes: 7 additions & 4 deletions src/distillery/mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,12 +1256,15 @@ async def distillery_relations( # noqa: PLR0913
- relation_id (str, required for remove): UUID of the relation to delete.
- hops (int, optional for traverse, default=2): BFS depth, capped at [1, 3].
- metric (str, required for metrics): Graph metric to compute.
Valid: [bridges, communities]. Requires the [graph] optional extra.
Valid: [bridges, communities, constraint, link_prediction]. Requires
the [graph] optional extra.
- scope (str, optional for metrics, default="global"): Subgraph scope.
Valid: [global, ego]. ``"ego"`` requires ``entry_id``.
- limit (int, optional for metrics, default=10): For ``metric="bridges"``
returns the top-k entries by betweenness centrality; for
``metric="communities"`` returns the K largest communities.
- limit (int, optional for metrics, default=10): top-k results.
``bridges`` = entries by betweenness centrality; ``communities`` = K
largest communities; ``constraint`` = entries by lowest Burt constraint
(strongest structural-hole brokers); ``link_prediction`` = top predicted
edges by Adamic-Adar (pass ``entry_id`` to score adjacencies for one entry).
- project / tags / date_from / date_to (optional, metrics global scope):
restrict the entries whose relations participate in the graph.

Expand Down
12 changes: 10 additions & 2 deletions src/distillery/mcp/tools/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
_GRAPH_METRICS_PAGE_SIZE = 1000
_GRAPH_METRICS_MAX_IDS = 100_000

_VALID_METRICS = {"bridges", "communities"}
_VALID_METRICS = {"bridges", "communities", "constraint", "link_prediction"}
_VALID_SCOPES = {"global", "ego"}

# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -582,7 +582,7 @@ async def _handle_metrics( # noqa: PLR0911, PLR0912
# ----- cache lookup -----
from distillery.graph.builders import build_relations_graph
from distillery.graph.cache import default_cache
from distillery.graph.metrics import bridges, communities
from distillery.graph.metrics import bridges, communities, constraint, link_prediction

cache = default_cache()
cache_key = (
Expand Down Expand Up @@ -634,6 +634,14 @@ async def _handle_metrics( # noqa: PLR0911, PLR0912
results: list[dict[str, Any]] = [
{"id": node, "score": round(score, 6)} for node, score in ranked
]
elif metric == "constraint":
# Ascending: lowest Burt constraint = strongest structural-hole broker.
ranked = constraint(g, k=limit)
results = [{"id": node, "score": round(score, 6)} for node, score in ranked]
elif metric == "link_prediction":
# entry_id (when given) is the source node — emerging adjacencies for it.
preds = link_prediction(g, source=entry_id_value, k=limit)
results = [{"source": u, "target": v, "score": round(p, 6)} for u, v, p in preds]
else: # metric == "communities"
comms = communities(g)
comms_sorted = sorted(comms, key=lambda c: len(c), reverse=True)[:limit]
Expand Down
69 changes: 68 additions & 1 deletion tests/graph/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,38 @@
pytest.importorskip("networkx")

from distillery.graph.builders import build_relations_graph # noqa: E402
from distillery.graph.metrics import bridges, communities # noqa: E402
from distillery.graph.metrics import ( # noqa: E402
bridges,
communities,
constraint,
link_prediction,
)

pytestmark = pytest.mark.unit


def _bowtie() -> list[dict[str, str]]:
"""M brokers two otherwise-disconnected pairs: {A,B} and {X,Y}."""
return [
{"from_id": "M", "to_id": "A", "relation_type": "link"},
{"from_id": "M", "to_id": "B", "relation_type": "link"},
{"from_id": "A", "to_id": "B", "relation_type": "link"},
{"from_id": "M", "to_id": "X", "relation_type": "link"},
{"from_id": "M", "to_id": "Y", "relation_type": "link"},
{"from_id": "X", "to_id": "Y", "relation_type": "link"},
]


def _shared_neighbors() -> list[dict[str, str]]:
"""A and B share neighbours C, D but are not directly connected."""
return [
{"from_id": "A", "to_id": "C", "relation_type": "link"},
{"from_id": "A", "to_id": "D", "relation_type": "link"},
{"from_id": "B", "to_id": "C", "relation_type": "link"},
{"from_id": "B", "to_id": "D", "relation_type": "link"},
]


def test_bridges_star_graph_center_first() -> None:
"""In a star A-{B,C,D}, the centre A has the highest betweenness."""
rels = [
Expand Down Expand Up @@ -54,3 +81,43 @@ def test_communities_two_clusters_with_bridge() -> None:
assert len(comms) == 2
members = sorted([sorted(c) for c in comms], key=lambda x: x[0])
assert members == [["A", "B", "C"], ["X", "Y", "Z"]]


def test_constraint_broker_has_lowest_score() -> None:
"""In a bowtie, the broker M sits in a structural hole -> lowest constraint."""
g = build_relations_graph(_bowtie(), directed=True)
ranked = constraint(g, k=10)
assert ranked
assert ranked[0][0] == "M"
scores = dict(ranked)
# The broker is less constrained than a node embedded in a dense triangle.
assert scores["M"] < scores["A"]


def test_constraint_empty_graph_returns_empty() -> None:
g = build_relations_graph([], directed=True)
assert constraint(g, k=10) == []


def test_link_prediction_source_predicts_shared_neighbour() -> None:
"""From A, the top Adamic-Adar candidate is B (they share C and D)."""
g = build_relations_graph(_shared_neighbors(), directed=True)
preds = link_prediction(g, source="A", k=5)
assert preds
src, tgt, score = preds[0]
assert src == "A"
assert tgt == "B"
assert score > 0


def test_link_prediction_global_surfaces_shared_pair() -> None:
"""With no source, the A-B (and C-D) non-edges are scored across the graph."""
g = build_relations_graph(_shared_neighbors(), directed=True)
preds = link_prediction(g, k=10)
pairs = {frozenset((u, v)) for u, v, _ in preds}
assert frozenset(("A", "B")) in pairs


def test_link_prediction_unknown_source_returns_empty() -> None:
g = build_relations_graph(_shared_neighbors(), directed=True)
assert link_prediction(g, source="ZZZ", k=5) == []
63 changes: 63 additions & 0 deletions tests/test_mcp_tools/test_relations_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,69 @@ async def test_metrics_communities_global(store) -> None: # type: ignore[no-unt
assert isinstance(row["members"], list)


async def _seed_bowtie(store): # type: ignore[no-untyped-def]
"""M brokers two otherwise-disconnected pairs {A,B} and {X,Y}. Returns id map."""
ids = {name: await _store_entry(store, content=f"entry {name}") for name in "MABXY"}
pairs = [("M", "A"), ("M", "B"), ("A", "B"), ("M", "X"), ("M", "Y"), ("X", "Y")]
for src, dst in pairs:
await store.add_relation(ids[src], ids[dst], "link")
return ids


async def test_metrics_constraint_broker_first(store) -> None: # type: ignore[no-untyped-def]
pytest.importorskip("networkx")

ids = await _seed_bowtie(store)
result = await _handle_relations(
store, {"action": "metrics", "metric": "constraint", "scope": "global"}
)
data = _parse(result)

assert data.get("error") is not True
assert data["metric"] == "constraint"
assert isinstance(data["results"], list) and data["results"]
for row in data["results"]:
assert "id" in row and "score" in row
# The broker M has the lowest Burt constraint -> ranked first.
assert data["results"][0]["id"] == ids["M"]


async def test_metrics_link_prediction_with_source(store) -> None: # type: ignore[no-untyped-def]
pytest.importorskip("networkx")

# A and B share neighbours C, D but are not directly connected.
ids = {name: await _store_entry(store, content=f"entry {name}") for name in "ABCD"}
for src, dst in [("A", "C"), ("A", "D"), ("B", "C"), ("B", "D")]:
await store.add_relation(ids[src], ids[dst], "link")

result = await _handle_relations(
store,
{"action": "metrics", "metric": "link_prediction", "scope": "global", "entry_id": ids["A"]},
)
data = _parse(result)

assert data.get("error") is not True
assert data["metric"] == "link_prediction"
assert isinstance(data["results"], list) and data["results"]
top = data["results"][0]
assert top["source"] == ids["A"]
assert top["target"] == ids["B"]
assert top["score"] > 0


async def test_metrics_invalid_metric_rejected(store) -> None: # type: ignore[no-untyped-def]
pytest.importorskip("networkx")

result = await _handle_relations(
store, {"action": "metrics", "metric": "pagerank", "scope": "global"}
)
data = _parse(result)
assert data.get("error") is True
assert data["code"] == "INVALID_PARAMS"
# Error message should enumerate the now-expanded metric set.
assert "constraint" in data["message"] and "link_prediction" in data["message"]


async def test_metrics_invalid_metric_returns_invalid_params(store) -> None: # type: ignore[no-untyped-def]
pytest.importorskip("networkx")

Expand Down