From c1d160ad0863f53e51e48eac9b9930dd1ea6e048 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sat, 30 May 2026 06:34:38 +0200 Subject: [PATCH] fix(memory,tools): propagate LLM edge confidence and fix tracing span Closes #4723: resolve_edge_typed passed hardcoded 0.8 instead of edge.confidence to the store in both the non-APEX and APEX-MEM paths. Introduces DEFAULT_EDGE_CONFIDENCE const to eliminate the magic number and forwards edge.confidence.unwrap_or(DEFAULT_EDGE_CONFIDENCE) at both call sites. Adds two regression tests (non-APEX and APEX paths). Closes #4731: resolve_and_validate created a tracing span before the async DNS lookup but entered it after .await, leaving the actual async work invisible in traces. Replaced manual span.enter() with #[tracing::instrument], consistent with the apply_ipi_filter fix (#4730). --- CHANGELOG.md | 5 + crates/zeph-memory/src/semantic/graph.rs | 148 ++++++++++++++++++++++- crates/zeph-tools/src/scrape.rs | 20 ++- 3 files changed, 159 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c562f2fa..594a79a01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Fixed +- `zeph-memory`: `resolve_edge_typed` now forwards `edge.confidence` to the store instead of + hardcoding `0.8`. Introduces `DEFAULT_EDGE_CONFIDENCE` constant and fixes both the non-APEX and + APEX-MEM paths. Closes #4723. +- `zeph-tools`: `resolve_and_validate` tracing span now correctly covers the async DNS lookup via + `#[tracing::instrument]`, consistent with the `apply_ipi_filter` fix in #4730. Closes #4731. - `zeph-core`: `ChannelError` is now marked `#[non_exhaustive]` so new variants can be added without a breaking change. Closes #4726. - `zeph-sanitizer`: `QuarantineError`, `MemoryValidationError`, and `CausalIpiError` are now diff --git a/crates/zeph-memory/src/semantic/graph.rs b/crates/zeph-memory/src/semantic/graph.rs index bac094fd3..b87bef6dd 100644 --- a/crates/zeph-memory/src/semantic/graph.rs +++ b/crates/zeph-memory/src/semantic/graph.rs @@ -138,6 +138,8 @@ const ENTITY_COLLECTION: &str = "zeph_graph_entities"; const MAX_RELATION_BYTES: usize = 256; /// Mirrors the constant from `graph/resolver/mod.rs` — used for sanitizing APEX-MEM inputs. const MAX_FACT_BYTES: usize = 2048; +/// Fallback confidence used when the LLM omits the `confidence` field in an extracted edge. +const DEFAULT_EDGE_CONFIDENCE: f32 = 0.8; /// Work item for a single entity during a note-linking pass. struct EntityWorkItem { @@ -598,7 +600,7 @@ async fn insert_edges( &relation_display, &canonical_relation, &normalized_fact, - 0.8, + edge.confidence.unwrap_or(DEFAULT_EDGE_CONFIDENCE), None, edge_type, true, @@ -625,7 +627,7 @@ async fn insert_edges( tgt_id, &edge.relation, &edge.fact, - 0.8, + edge.confidence.unwrap_or(DEFAULT_EDGE_CONFIDENCE), None, edge_type, belief_cfg.as_ref(), @@ -1464,6 +1466,148 @@ mod tests { ); } + /// Regression test for #4723: `extract_and_store` must forward `ExtractedEdge.confidence` + /// to the graph resolver instead of always using the hardcoded fallback 0.8. + /// + /// The LLM JSON sets `confidence: 0.3`. Before the fix, line 628 passed `0.8` unconditionally; + /// after the fix it passes `edge.confidence.unwrap_or(0.8)` which is `0.3` when present. + /// A freshly-inserted edge sets `confidence_fast = confidence`, so we compare against 0.3. + #[tokio::test] + async fn extract_and_store_forwards_edge_confidence_not_hardcoded_08() { + use crate::graph::{EntityType, GraphStore}; + + let sqlite = crate::store::SqliteStore::new(":memory:").await.unwrap(); + let pool = sqlite.pool().clone(); + + // confidence = 0.3 is far enough from 0.8 that float imprecision cannot mask the bug. + let extraction_json = r#"{ + "entities":[ + {"name":"Alice","type":"person","summary":"person"}, + {"name":"Bob","type":"person","summary":"person"} + ], + "edges":[{"source":"Alice","target":"Bob","relation":"knows","fact":"Alice knows Bob","edge_type":"semantic","confidence":0.3}] + }"#; + let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]); + let provider = AnyProvider::Mock(mock); + let config = GraphExtractionConfig { + max_entities: 10, + max_edges: 10, + extraction_timeout_secs: 10, + ..Default::default() + }; + + let result = extract_and_store( + "Alice knows Bob.".to_owned(), + vec![], + provider, + pool.clone(), + config, + None, + None, + ) + .await + .unwrap(); + + assert_eq!(result.stats.edges_inserted, 1, "one edge must be inserted"); + + let gs = GraphStore::new(pool); + let alice_id: i64 = gs + .find_entity("alice", EntityType::Person) + .await + .unwrap() + .expect("alice must exist") + .id + .0; + let bob_id: i64 = gs + .find_entity("bob", EntityType::Person) + .await + .unwrap() + .expect("bob must exist") + .id + .0; + + let mut edges = gs.edges_exact(alice_id, bob_id).await.unwrap(); + assert_eq!(edges.len(), 1, "exactly one active edge expected"); + let edge = edges.remove(0); + + // Before fix: confidence_fast would be ~0.8 (hardcoded); after fix: ~0.3 (from JSON). + assert!( + (edge.confidence_fast - 0.3_f32).abs() < 0.01, + "confidence_fast must be ~0.3 (from ExtractedEdge.confidence), got {} (regression for #4723)", + edge.confidence_fast + ); + } + + /// Regression for #4723 (APEX-MEM path): `extract_and_store` must forward + /// `ExtractedEdge.confidence` to `insert_or_supersede_with_turn_index_and_metrics` instead + /// of using the hardcoded literal `0.8`. + #[tokio::test] + async fn extract_and_store_apex_forwards_edge_confidence_not_hardcoded_08() { + use crate::graph::{EntityType, GraphStore}; + + let sqlite = crate::store::SqliteStore::new(":memory:").await.unwrap(); + let pool = sqlite.pool().clone(); + + // confidence = 0.3 is far enough from 0.8 that float imprecision cannot mask the bug. + let extraction_json = r#"{ + "entities":[ + {"name":"Alice","type":"person","summary":"person"}, + {"name":"Bob","type":"person","summary":"person"} + ], + "edges":[{"source":"Alice","target":"Bob","relation":"knows","fact":"Alice knows Bob","edge_type":"semantic","confidence":0.3}] + }"#; + let mock = zeph_llm::mock::MockProvider::with_responses(vec![extraction_json.to_owned()]); + let provider = AnyProvider::Mock(mock); + let config = GraphExtractionConfig { + max_entities: 10, + max_edges: 10, + extraction_timeout_secs: 10, + apex_mem_enabled: true, + ..Default::default() + }; + + let result = extract_and_store( + "Alice knows Bob.".to_owned(), + vec![], + provider, + pool.clone(), + config, + None, + None, + ) + .await + .unwrap(); + + assert_eq!(result.stats.edges_inserted, 1, "one edge must be inserted"); + + let gs = GraphStore::new(pool); + let alice_id: i64 = gs + .find_entity("alice", EntityType::Person) + .await + .unwrap() + .expect("alice must exist") + .id + .0; + let bob_id: i64 = gs + .find_entity("bob", EntityType::Person) + .await + .unwrap() + .expect("bob must exist") + .id + .0; + + let mut edges = gs.edges_exact(alice_id, bob_id).await.unwrap(); + assert_eq!(edges.len(), 1, "exactly one active edge expected"); + let edge = edges.remove(0); + + // Before fix: confidence_fast would be ~0.8 (hardcoded); after fix: ~0.3 (from JSON). + assert!( + (edge.confidence_fast - 0.3_f32).abs() < 0.01, + "confidence_fast must be ~0.3 (from ExtractedEdge.confidence), got {} (regression for #4723 APEX path)", + edge.confidence_fast + ); + } + /// Verify `GraphExtractionConfig` default benna rates match `GraphStore::new` defaults. /// /// If either default is changed in one place but not the other, behavior silently diverges diff --git a/crates/zeph-tools/src/scrape.rs b/crates/zeph-tools/src/scrape.rs index 5ea0f5c21..053c1501d 100644 --- a/crates/zeph-tools/src/scrape.rs +++ b/crates/zeph-tools/src/scrape.rs @@ -1132,26 +1132,22 @@ fn is_private_host(host: &url::Host<&str>) -> bool { /// /// Returning the addresses allows the caller to pin the HTTP client to these exact /// addresses, eliminating TOCTOU between DNS validation and the actual connection. +#[tracing::instrument(name = "tools.scrape.dns.resolve", skip(url), fields(host = url.host_str().unwrap_or("")))] async fn resolve_and_validate(url: &Url) -> Result<(String, Vec), ToolError> { let Some(host) = url.host_str() else { return Ok((String::new(), vec![])); }; let port = url.port_or_known_default().unwrap_or(443); - let span = tracing::info_span!("scrape.dns.resolve", host = host); - let dns_result = tokio::time::timeout( + let addrs: Vec = tokio::time::timeout( Duration::from_secs(10), tokio::net::lookup_host(format!("{host}:{port}")), ) - .await; - let addrs: Vec = { - let _enter = span.enter(); - dns_result - .map_err(|_| ToolError::Timeout { timeout_secs: 10 })? - .map_err(|e| ToolError::Blocked { - command: format!("DNS resolution failed: {e}"), - })? - .collect() - }; + .await + .map_err(|_| ToolError::Timeout { timeout_secs: 10 })? + .map_err(|e| ToolError::Blocked { + command: format!("DNS resolution failed: {e}"), + })? + .collect(); for addr in &addrs { if is_private_ip(addr.ip()) { return Err(ToolError::Blocked {