diff --git a/src/superlocalmemory/core/config.py b/src/superlocalmemory/core/config.py index 5d28fcd9..faf68faf 100644 --- a/src/superlocalmemory/core/config.py +++ b/src/superlocalmemory/core/config.py @@ -119,6 +119,33 @@ def as_dict(self) -> dict[str, float]: } +# --------------------------------------------------------------------------- +# Scope Weights +# --------------------------------------------------------------------------- + +@dataclass +class ScopeWeights: + """RRF fusion weights for multi-scope retrieval. + + Personal scope has highest weight (most relevant to current profile). + Shared scope has medium weight (team/group memories). + Global scope has lowest weight (public/common knowledge). + """ + + personal: float = 1.0 + shared: float = 0.7 + global_: float = 0.5 # trailing underscore avoids Python keyword + + def __post_init__(self) -> None: + for name in ("personal", "shared", "global_"): + val = getattr(self, name) + if val < 0: + raise ValueError(f"ScopeWeights values must be non-negative, got {name}={val}") + + def as_dict(self) -> dict[str, float]: + return {"personal": self.personal, "shared": self.shared, "global": self.global_} + + # --------------------------------------------------------------------------- # Encoding Config # --------------------------------------------------------------------------- @@ -692,6 +719,7 @@ class SLMConfig: embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig) llm: LLMConfig = field(default_factory=LLMConfig) channel_weights: ChannelWeights = field(default_factory=ChannelWeights) + scope_weights: ScopeWeights = field(default_factory=ScopeWeights) encoding: EncodingConfig = field(default_factory=EncodingConfig) retrieval: RetrievalConfig = field(default_factory=RetrievalConfig) math: MathConfig = field(default_factory=MathConfig) @@ -835,6 +863,14 @@ def load(cls, config_path: Path | None = None) -> SLMConfig: prestage_max_response_bytes=int(inj.get("prestage_max_response_bytes", 64 * 1024)), ) + # Multi-scope memory: scope weights + sw = data.get("scope_weights", {}) + if sw: + config.scope_weights = ScopeWeights(**{ + k: v for k, v in sw.items() + if k in ScopeWeights.__dataclass_fields__ + }) + return config def save( @@ -927,6 +963,13 @@ def save( "prestage_max_response_bytes": self.injection.prestage_max_response_bytes, } + # Multi-scope memory: scope weights + data["scope_weights"] = { + "personal": self.scope_weights.personal, + "shared": self.scope_weights.shared, + "global_": self.scope_weights.global_, + } + # Preserve existing V3.3 config sections that aren't in for_mode() for key in ("forgetting", "quantization", "sagq", "embedding_signature", "auto_invoke"): if key in existing: diff --git a/src/superlocalmemory/core/recall_pipeline.py b/src/superlocalmemory/core/recall_pipeline.py index 70e27226..d32849a1 100644 --- a/src/superlocalmemory/core/recall_pipeline.py +++ b/src/superlocalmemory/core/recall_pipeline.py @@ -587,9 +587,14 @@ def run_recall( access_log: Any = None, auto_linker: Any = None, fast: bool = False, + include_global: bool = True, + include_shared: bool = True, ) -> RecallResponse: """Recall relevant facts for a query. + Multi-scope: ``include_global`` / ``include_shared`` control which + scopes participate in retrieval (passed through to retrieval engine). + Pipeline: retrieval -> agentic sufficiency (if configured) -> post-recall updates. V3.4.40: ``fast=True`` adds spreading_activation to the per-recall @@ -623,6 +628,8 @@ def _mark(_label: str) -> None: response = retrieval_engine.recall( query, profile_id, m, limit, extra_disabled_channels=extra_disabled, + include_global=include_global, + include_shared=include_shared, ) _mark("retrieval(chan+rerank)") diff --git a/src/superlocalmemory/retrieval/bm25_channel.py b/src/superlocalmemory/retrieval/bm25_channel.py index 9ac13251..7a3fbbfa 100644 --- a/src/superlocalmemory/retrieval/bm25_channel.py +++ b/src/superlocalmemory/retrieval/bm25_channel.py @@ -86,9 +86,15 @@ def ensure_loaded(self, profile_id: str) -> None: return token_map = self._db.get_all_bm25_tokens(profile_id) + _inc_global = getattr(self, 'include_global', True) + _inc_shared = getattr(self, 'include_shared', True) if not token_map: # Fallback: tokenize facts directly if no pre-stored tokens - facts = self._db.get_all_facts(profile_id) + facts = self._db.get_all_facts( + profile_id, + include_global=_inc_global, + include_shared=_inc_shared, + ) for fact in facts: if fact.fact_id in self._fact_id_set: continue @@ -104,7 +110,11 @@ def ensure_loaded(self, profile_id: str) -> None: # Load raw texts for phrase matching (V3.3.12) fact_content_map = {} try: - facts = self._db.get_all_facts(profile_id) + facts = self._db.get_all_facts( + profile_id, + include_global=_inc_global, + include_shared=_inc_shared, + ) fact_content_map = {f.fact_id: f.content for f in facts} except Exception: pass diff --git a/src/superlocalmemory/retrieval/engine.py b/src/superlocalmemory/retrieval/engine.py index eefb2bb9..1d702fb1 100644 --- a/src/superlocalmemory/retrieval/engine.py +++ b/src/superlocalmemory/retrieval/engine.py @@ -117,9 +117,15 @@ def recall( mode: Mode = Mode.A, limit: int = 20, *, extra_disabled_channels: set[str] | None = None, + include_global: bool = True, + include_shared: bool = True, ) -> RecallResponse: """Full retrieval pipeline: strategy -> channels -> RRF -> rerank. + Multi-scope: ``include_global`` / ``include_shared`` control which + scopes participate in retrieval. Both default to True for backward + compatibility (existing data has scope='personal' — no effect). + V3.4.40 (2026-05-09): ``extra_disabled_channels`` allows callers to skip specific channels for a single recall (e.g. SpreadingActivation for the ``--fast`` CLI flag) without mutating shared config. @@ -127,6 +133,15 @@ def recall( t0 = time.monotonic() self._extra_disabled = set(extra_disabled_channels or ()) + # Multi-scope: set scope flags on channels before parallel execution. + for ch in (self._semantic, self._bm25, self._entity, self._temporal, + self._hopfield, self._spreading_activation, self._profile_channel): + if ch is not None: + ch.include_global = include_global + ch.include_shared = include_shared + self._include_global = include_global + self._include_shared = include_shared + # v3.5.0 diagnostic: stage timing inside retrieval (SLM_RECALL_TIMING=1). import os as _os_e import time as _time_e @@ -657,7 +672,11 @@ def _load_facts( needed = [fr.fact_id for fr in fused] if not needed: return {} - facts = self._db.get_facts_by_ids(needed, profile_id) + facts = self._db.get_facts_by_ids( + needed, profile_id, + include_global=getattr(self, '_include_global', True), + include_shared=getattr(self, '_include_shared', True), + ) return {f.fact_id: f for f in facts} # -- Cross-encoder rerank ----------------------------------------------- diff --git a/src/superlocalmemory/retrieval/entity_channel.py b/src/superlocalmemory/retrieval/entity_channel.py index 65aa6856..bb1c5e73 100644 --- a/src/superlocalmemory/retrieval/entity_channel.py +++ b/src/superlocalmemory/retrieval/entity_channel.py @@ -283,7 +283,7 @@ def search(self, query: str, profile_id: str, top_k: int = 50) -> list[tuple[str for fid in self._entity_to_facts.get(eid, ()): activation[fid] = max(activation[fid], 1.0) else: - for fact in self._db.get_facts_by_entity(eid, profile_id): + for fact in self._db.get_facts_by_entity(eid, profile_id, include_global=getattr(self, 'include_global', True), include_shared=getattr(self, 'include_shared', True)): activation[fact.fact_id] = max(activation[fact.fact_id], 1.0) # Spreading activation through graph edges (all in-memory O(1) lookups) @@ -317,7 +317,7 @@ def search(self, query: str, profile_id: str, top_k: int = 50) -> list[tuple[str # NOTE: SQL fallback path does NOT use graph intelligence (P1/P2/P3). # Graph intelligence is only available on the in-memory cache path. # This fallback exists for mock/test DBs. See Phase 7 LLD H-01. - for edge in self._db.get_edges_for_node(fid, profile_id): + for edge in self._db.get_edges_for_node(fid, profile_id, include_global=getattr(self, 'include_global', True), include_shared=getattr(self, 'include_shared', True)): neighbor = edge.target_id if edge.source_id == fid else edge.source_id propagated = activation[fid] * self._decay if propagated >= self._threshold and propagated > activation.get(neighbor, 0.0): @@ -342,7 +342,7 @@ def search(self, query: str, profile_id: str, top_k: int = 50) -> list[tuple[str new_eids_sql = self._discover_entities(frontier, profile_id, visited_entities) for eid in new_eids_sql: visited_entities.add(eid) - for fact in self._db.get_facts_by_entity(eid, profile_id): + for fact in self._db.get_facts_by_entity(eid, profile_id, include_global=getattr(self, 'include_global', True), include_shared=getattr(self, 'include_shared', True)): if hop_decay > activation.get(fact.fact_id, 0.0): activation[fact.fact_id] = hop_decay next_frontier.add(fact.fact_id) @@ -438,7 +438,7 @@ def score_candidates( for fid in self._entity_to_facts.get(eid, ()): activation[fid] = max(activation[fid], 1.0) else: - for fact in self._db.get_facts_by_entity(eid, profile_id): + for fact in self._db.get_facts_by_entity(eid, profile_id, include_global=getattr(self, 'include_global', True), include_shared=getattr(self, 'include_shared', True)): activation[fact.fact_id] = max(activation[fact.fact_id], 1.0) frontier = set(activation.keys()) @@ -628,7 +628,7 @@ def _search_via_cozo( # Map entity scores to fact scores fact_scores: list[tuple[str, float]] = [] for entity_id, score in scored: - facts = self._db.get_facts_by_entity(entity_id, profile_id) + facts = self._db.get_facts_by_entity(entity_id, profile_id, include_global=getattr(self, 'include_global', True), include_shared=getattr(self, 'include_shared', True)) for fact in facts: fact_scores.append((fact.fact_id, score)) diff --git a/src/superlocalmemory/retrieval/hopfield_channel.py b/src/superlocalmemory/retrieval/hopfield_channel.py index 179d9db8..7ed4783d 100644 --- a/src/superlocalmemory/retrieval/hopfield_channel.py +++ b/src/superlocalmemory/retrieval/hopfield_channel.py @@ -248,7 +248,11 @@ def _search_with_prefilter( # Stage 2: Load candidate facts candidate_ids = [fid for fid, _ in knn_results] - candidates = self._db.get_facts_by_ids(candidate_ids, profile_id) + candidates = self._db.get_facts_by_ids( + candidate_ids, profile_id, + include_global=getattr(self, 'include_global', True), + include_shared=getattr(self, 'include_shared', True), + ) if not candidates: return [] @@ -304,7 +308,11 @@ def _get_memory_matrix( # Step 2: Load facts (V3.3.12: cap to most recent 5000 to bound memory) # memory-bounding-02: push the cap into SQL (LIMIT) so we don't # deserialize the whole table just to slice it. - facts = self._db.get_all_facts(profile_id, limit=5000) + facts = self._db.get_all_facts( + profile_id, limit=5000, + include_global=getattr(self, 'include_global', True), + include_shared=getattr(self, 'include_shared', True), + ) if not facts: return (None, []) diff --git a/src/superlocalmemory/retrieval/semantic_channel.py b/src/superlocalmemory/retrieval/semantic_channel.py index 31453184..39175c60 100644 --- a/src/superlocalmemory/retrieval/semantic_channel.py +++ b/src/superlocalmemory/retrieval/semantic_channel.py @@ -168,7 +168,11 @@ def _search_via_vector_store( # Step 2: Load only the candidate facts (NOT all facts) candidate_ids = [fid for fid, _ in knn_results] knn_scores = {fid: score for fid, score in knn_results} - facts = self._db.get_facts_by_ids(candidate_ids, profile_id) + facts = self._db.get_facts_by_ids( + candidate_ids, profile_id, + include_global=getattr(self, 'include_global', True), + include_shared=getattr(self, 'include_shared', True), + ) if not facts: return [(fid, score) for fid, score in knn_results[:top_k]] @@ -230,7 +234,11 @@ def _search_full_scan( q_mean = np.array(qm, dtype=np.float32) q_var = np.array(qv, dtype=np.float32) - facts = self._db.get_all_facts(profile_id) + facts = self._db.get_all_facts( + profile_id, + include_global=getattr(self, 'include_global', True), + include_shared=getattr(self, 'include_shared', True), + ) scored: list[tuple[str, float]] = [] for fact in facts: diff --git a/src/superlocalmemory/storage/database.py b/src/superlocalmemory/storage/database.py index 30433631..48148fb2 100644 --- a/src/superlocalmemory/storage/database.py +++ b/src/superlocalmemory/storage/database.py @@ -43,6 +43,43 @@ def _jd(val: Any) -> str | None: _RETRY_BASE_DELAY = 0.1 # seconds — exponential backoff base +def _scope_where( + profile_id: str, + *, + include_global: bool = True, + include_shared: bool = True, + prefix: str = "", +) -> tuple[str, list]: + """Build scope-filtering WHERE clause for multi-scope retrieval. + + Returns ``(where_clause, params)`` for splicing into SQL queries. + + When ``include_global=True``, facts with ``scope='global'`` are included + regardless of profile. When ``include_shared=True``, facts explicitly + shared with this profile (via ``shared_with`` JSON array) are also + included. + + Backward-compatible defaults mean existing callers automatically pick up + global+shared facts once those exist. Until then (PR-A has schema with + DEFAULT 'personal'), the OR branches are harmless no-ops. + """ + table = f"{prefix}." if prefix else "" + clauses = [f"({table}profile_id = ?)"] + params: list = [profile_id] + + if include_global: + clauses.append(f"({table}scope = 'global')") + + if include_shared: + clauses.append( + f"({table}scope = 'shared' AND {table}shared_with LIKE ?)" + ) + params.append(f'%"{profile_id}"%') + + where = "(" + " OR ".join(clauses) + ")" + return where, params + + class DatabaseManager: """Concurrent-safe SQLite manager with WAL, profile isolation, and FTS5. @@ -290,17 +327,29 @@ def set_pinned(self, fact_id: str, pinned: bool) -> None: (1 if pinned else 0, fact_id), ) - def get_pinned(self, profile_id: str) -> list[AtomicFact]: + def get_pinned( + self, profile_id: str, + include_global: bool = True, + include_shared: bool = True, + ) -> list[AtomicFact]: """Return all pinned facts for a profile, highest-importance first.""" + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT * FROM atomic_facts WHERE profile_id = ? AND pinned = 1 " + f"SELECT * FROM atomic_facts WHERE {where} AND pinned = 1 " "ORDER BY importance DESC", - (profile_id,), + (*params,), ) return [self._row_to_fact(r) for r in rows] def get_all_facts( self, profile_id: str, limit: int | None = None, + *, + include_global: bool = True, + include_shared: bool = True, ) -> list[AtomicFact]: """All facts for a profile, newest first. @@ -308,22 +357,31 @@ def get_all_facts( most-recent N (e.g. the Hopfield channel's 5000 cap) don't deserialize the entire table into AtomicFact objects. Default (None) = all facts. """ + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) if limit is not None: rows = self.execute( - "SELECT * FROM atomic_facts WHERE profile_id = ? " + f"SELECT * FROM atomic_facts WHERE {where} " "ORDER BY created_at DESC LIMIT ?", - (profile_id, int(limit)), + (*params, int(limit)), ) else: rows = self.execute( - "SELECT * FROM atomic_facts WHERE profile_id = ? ORDER BY created_at DESC", - (profile_id,), + f"SELECT * FROM atomic_facts WHERE {where} ORDER BY created_at DESC", + (*params,), ) return [self._row_to_fact(r) for r in rows] _MAX_FACTS_PER_ENTITY_LOOKUP: int = 100 - def get_facts_by_entity(self, entity_id: str, profile_id: str) -> list[AtomicFact]: + def get_facts_by_entity( + self, entity_id: str, profile_id: str, + include_global: bool = True, + include_shared: bool = True, + ) -> list[AtomicFact]: """Facts whose canonical_entities JSON array contains *entity_id*. V3.3.14: LIMIT to _MAX_FACTS_PER_ENTITY_LOOKUP (100) to prevent @@ -331,19 +389,33 @@ def get_facts_by_entity(self, entity_id: str, profile_id: str) -> list[AtomicFac facts for popular entities (500+) causing 17GB+ memory usage. Ordered by created_at DESC so newest facts are always included. """ + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT * FROM atomic_facts WHERE profile_id = ? AND canonical_entities_json LIKE ? " + f"SELECT * FROM atomic_facts WHERE {where} AND canonical_entities_json LIKE ? " "ORDER BY created_at DESC LIMIT ?", - (profile_id, f'%"{entity_id}"%', self._MAX_FACTS_PER_ENTITY_LOOKUP), + (*params, f'%"{entity_id}"%', self._MAX_FACTS_PER_ENTITY_LOOKUP), ) return [self._row_to_fact(r) for r in rows] - def get_facts_by_type(self, fact_type: FactType, profile_id: str) -> list[AtomicFact]: + def get_facts_by_type( + self, fact_type: FactType, profile_id: str, + include_global: bool = True, + include_shared: bool = True, + ) -> list[AtomicFact]: """All facts of a given type for a profile.""" + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT * FROM atomic_facts WHERE profile_id = ? AND fact_type = ? " + f"SELECT * FROM atomic_facts WHERE {where} AND fact_type = ? " "ORDER BY created_at DESC", - (profile_id, fact_type.value), + (*params, fact_type.value), ) return [self._row_to_fact(r) for r in rows] @@ -411,10 +483,19 @@ def gc_orphaned_embedding_metadata(self) -> int: ) return n - def get_fact_count(self, profile_id: str) -> int: + def get_fact_count( + self, profile_id: str, + include_global: bool = True, + include_shared: bool = True, + ) -> int: """Total fact count for a profile.""" + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT COUNT(*) AS c FROM atomic_facts WHERE profile_id = ?", (profile_id,), + f"SELECT COUNT(*) AS c FROM atomic_facts WHERE {where}", (*params,), ) return int(rows[0]["c"]) if rows else 0 @@ -481,12 +562,19 @@ def get_memory_content_batch(self, memory_ids: list[str]) -> dict[str, str]: def get_facts_by_memory_id( self, memory_id: str, profile_id: str, + include_global: bool = True, + include_shared: bool = True, ) -> list[AtomicFact]: """Get all atomic facts for a given memory_id.""" + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT * FROM atomic_facts WHERE memory_id = ? AND profile_id = ? " + f"SELECT * FROM atomic_facts WHERE memory_id = ? AND {where} " "ORDER BY confidence DESC", - (memory_id, profile_id), + (memory_id, *params), ) return [self._row_to_fact(r) for r in rows] @@ -522,12 +610,21 @@ def store_edge(self, edge: GraphEdge) -> str: ) return edge.edge_id - def get_edges_for_node(self, node_id: str, profile_id: str) -> list[GraphEdge]: + def get_edges_for_node( + self, node_id: str, profile_id: str, + include_global: bool = True, + include_shared: bool = True, + ) -> list[GraphEdge]: """All edges where node_id is source or target.""" + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT * FROM graph_edges WHERE profile_id = ? " + f"SELECT * FROM graph_edges WHERE {where} " "AND (source_id = ? OR target_id = ?)", - (profile_id, node_id, node_id), + (*params, node_id, node_id), ) return [ GraphEdge( @@ -553,12 +650,21 @@ def store_temporal_event(self, event: TemporalEvent) -> str: ) return event.event_id - def get_temporal_events(self, entity_id: str, profile_id: str) -> list[TemporalEvent]: + def get_temporal_events( + self, entity_id: str, profile_id: str, + include_global: bool = True, + include_shared: bool = True, + ) -> list[TemporalEvent]: """All temporal events for an entity, newest first.""" + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT * FROM temporal_events WHERE profile_id = ? AND entity_id = ? " + f"SELECT * FROM temporal_events WHERE {where} AND entity_id = ? " "ORDER BY observation_date DESC", - (profile_id, entity_id), + (*params, entity_id), ) return [ TemporalEvent( @@ -588,7 +694,11 @@ def get_all_bm25_tokens(self, profile_id: str) -> dict[str, list[str]]: ) return {dict(r)["fact_id"]: json.loads(dict(r)["tokens"]) for r in rows} - def search_facts_fts(self, query: str, profile_id: str, limit: int = 20) -> list[AtomicFact]: + def search_facts_fts( + self, query: str, profile_id: str, limit: int = 20, + include_global: bool = True, + include_shared: bool = True, + ) -> list[AtomicFact]: """Full-text search via FTS5, joined to facts table for reconstruction.""" # v3.6.12 (search-1): the raw query was passed straight into FTS5 MATCH, # so any '?', '-', quote, or trailing boolean keyword (AND/OR/NOT) raised @@ -599,12 +709,18 @@ def search_facts_fts(self, query: str, profile_id: str, limit: int = 20) -> list if not tokens: return [] match_expr = " OR ".join(f'"{t}"' for t in tokens) + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + prefix="f", + ) rows = self.execute( - """SELECT f.* FROM atomic_facts_fts AS fts + f"""SELECT f.* FROM atomic_facts_fts AS fts JOIN atomic_facts AS f ON f.fact_id = fts.fact_id - WHERE fts.atomic_facts_fts MATCH ? AND f.profile_id = ? + WHERE fts.atomic_facts_fts MATCH ? AND {where} ORDER BY fts.rank LIMIT ?""", - (match_expr, profile_id, limit), + (match_expr, *params, limit), ) return [self._row_to_fact(r) for r in rows] @@ -641,15 +757,22 @@ def get_fact(self, fact_id: str) -> AtomicFact | None: def get_facts_by_ids( self, fact_ids: list[str], profile_id: str, + include_global: bool = True, + include_shared: bool = True, ) -> list[AtomicFact]: """Get multiple facts by their IDs, scoped to a profile.""" if not fact_ids: return [] + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) placeholders = ",".join("?" for _ in fact_ids) rows = self.execute( f"SELECT * FROM atomic_facts WHERE fact_id IN ({placeholders}) " - f"AND profile_id = ? ORDER BY created_at DESC", - (*fact_ids, profile_id), + f"AND {where} ORDER BY created_at DESC", + (*fact_ids, *params), ) return [self._row_to_fact(r) for r in rows] @@ -820,14 +943,21 @@ def store_consolidation_action(self, action: ConsolidationAction) -> str: def get_temporal_events_by_range( self, profile_id: str, start_date: str, end_date: str, + include_global: bool = True, + include_shared: bool = True, ) -> list[TemporalEvent]: """Temporal events within a date range (inclusive).""" + where, params = _scope_where( + profile_id, + include_global=include_global, + include_shared=include_shared, + ) rows = self.execute( - "SELECT * FROM temporal_events WHERE profile_id = ? " + f"SELECT * FROM temporal_events WHERE {where} " "AND (referenced_date BETWEEN ? AND ? " " OR observation_date BETWEEN ? AND ?) " "ORDER BY observation_date DESC", - (profile_id, start_date, end_date, start_date, end_date), + (*params, start_date, end_date, start_date, end_date), ) return [ TemporalEvent( diff --git a/src/superlocalmemory/storage/migration_runner.py b/src/superlocalmemory/storage/migration_runner.py index 6347da1b..d9351f21 100644 --- a/src/superlocalmemory/storage/migration_runner.py +++ b/src/superlocalmemory/storage/migration_runner.py @@ -51,6 +51,7 @@ M013_bi_temporal_columns as _M013, M014_v345_scale_ready as _M014, M015_add_pinned_column as _M015, + M016_add_scope_support as _M016, ) # Map migration name → module (used for the optional ``verify(conn)`` hook @@ -71,6 +72,7 @@ _M013.NAME: _M013, _M014.NAME: _M014, _M015.NAME: _M015, + _M016.NAME: _M016, } logger = logging.getLogger(__name__) @@ -134,6 +136,9 @@ class Migration: Migration(name=_M014.NAME, db_target="memory", ddl=_M014.DDL), # M015 adds pinned column to atomic_facts (v3.4.65 core-memory pins). Migration(name=_M015.NAME, db_target="memory", ddl=_M015.DDL), + # M016 adds scope and shared_with columns to 5 core tables for + # multi-scope memory support (personal/global/shared). + Migration(name=_M016.NAME, db_target="memory", ddl=_M016.DDL), ] diff --git a/src/superlocalmemory/storage/migrations/M016_add_scope_support.py b/src/superlocalmemory/storage/migrations/M016_add_scope_support.py new file mode 100644 index 00000000..385a8b98 --- /dev/null +++ b/src/superlocalmemory/storage/migrations/M016_add_scope_support.py @@ -0,0 +1,55 @@ +# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar +# Licensed under AGPL-3.0-or-later - see LICENSE file +# Part of SuperLocalMemory v3.6.13 + +"""M016 — scope and shared_with columns on core tables (memory.db, deferred). + +Adds two columns to each of the 5 core tables for multi-scope memory support: + + scope TEXT NOT NULL DEFAULT 'personal' — personal | global + shared_with TEXT — JSON array of profile_ids + +Existing data retains scope='personal' (backward compatible). New indexes +on ``scope`` and ``(profile_id, scope)`` speed up scope-filtered queries. + +Deferred like M006, M011, and M013 because the core tables are bootstrapped +at engine init, not at migration time. Daemon lifespan calls +``apply_deferred`` right after engine init so these columns materialise +on first boot after upgrade. + +Author: Varun Pratap Bhardwaj / Qualixar +""" + +from __future__ import annotations + +import sqlite3 + +NAME = "M016_add_scope_support" +DB_TARGET = "memory" + +TABLES = [ + "memories", + "atomic_facts", + "canonical_entities", + "graph_edges", + "temporal_events", +] + +DDL = ";".join( + [f"ALTER TABLE {t} ADD COLUMN scope TEXT NOT NULL DEFAULT 'personal'" for t in TABLES] + + [f"ALTER TABLE {t} ADD COLUMN shared_with TEXT" for t in TABLES] + + [f"CREATE INDEX IF NOT EXISTS idx_{t}_scope ON {t}(scope)" for t in TABLES] + + [ + f"CREATE INDEX IF NOT EXISTS idx_{t}_profile_scope ON {t}(profile_id, scope)" + for t in TABLES + ] +) + + +def verify(conn: sqlite3.Connection) -> bool: + """Check if migration already applied by inspecting atomic_facts columns.""" + try: + cols = {r[1] for r in conn.execute("PRAGMA table_info(atomic_facts)").fetchall()} + except sqlite3.Error: + return False + return "scope" in cols diff --git a/src/superlocalmemory/storage/models.py b/src/superlocalmemory/storage/models.py index 9b9ec7f8..a3b99f83 100644 --- a/src/superlocalmemory/storage/models.py +++ b/src/superlocalmemory/storage/models.py @@ -119,6 +119,8 @@ class MemoryRecord: memory_id: str = field(default_factory=_new_id) profile_id: str = "default" + scope: str = "personal" + shared_with: list[str] | None = None content: str = "" session_id: str = "" speaker: str = "" # Who said this @@ -139,6 +141,8 @@ class AtomicFact: fact_id: str = field(default_factory=_new_id) memory_id: str = "" # Source memory this was extracted from profile_id: str = "default" + scope: str = "personal" + shared_with: list[str] | None = None content: str = "" # Atomic fact statement fact_type: FactType = FactType.SEMANTIC @@ -193,6 +197,8 @@ class CanonicalEntity: entity_id: str = field(default_factory=_new_id) profile_id: str = "default" + scope: str = "personal" + shared_with: list[str] | None = None canonical_name: str = "" entity_type: str = "" # person / place / org / concept / event first_seen: str = field(default_factory=_now) @@ -251,6 +257,8 @@ class TemporalEvent: event_id: str = field(default_factory=_new_id) profile_id: str = "default" + scope: str = "personal" + shared_with: list[str] | None = None entity_id: str = "" # FK to CanonicalEntity fact_id: str = "" # FK to AtomicFact observation_date: str | None = None @@ -266,6 +274,8 @@ class GraphEdge: edge_id: str = field(default_factory=_new_id) profile_id: str = "default" + scope: str = "personal" + shared_with: list[str] | None = None source_id: str = "" # Fact ID or Entity ID target_id: str = "" # Fact ID or Entity ID edge_type: EdgeType = EdgeType.ENTITY diff --git a/src/superlocalmemory/storage/schema.py b/src/superlocalmemory/storage/schema.py index 7c3a8cc7..d6e67600 100644 --- a/src/superlocalmemory/storage/schema.py +++ b/src/superlocalmemory/storage/schema.py @@ -110,6 +110,8 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: CREATE TABLE IF NOT EXISTS memories ( memory_id TEXT PRIMARY KEY, profile_id TEXT NOT NULL DEFAULT 'default', + scope TEXT NOT NULL DEFAULT 'personal', + shared_with TEXT, content TEXT NOT NULL, session_id TEXT NOT NULL DEFAULT '', speaker TEXT NOT NULL DEFAULT '', @@ -128,6 +130,10 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: ON memories (profile_id, session_id); CREATE INDEX IF NOT EXISTS idx_memories_created ON memories (created_at); +CREATE INDEX IF NOT EXISTS idx_memories_scope + ON memories (scope); +CREATE INDEX IF NOT EXISTS idx_memories_profile_scope + ON memories (profile_id, scope); """ @@ -140,6 +146,8 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: fact_id TEXT PRIMARY KEY, memory_id TEXT NOT NULL, profile_id TEXT NOT NULL DEFAULT 'default', + scope TEXT NOT NULL DEFAULT 'personal', + shared_with TEXT, content TEXT NOT NULL, fact_type TEXT NOT NULL DEFAULT 'semantic' CHECK (fact_type IN ( @@ -209,6 +217,10 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: CREATE INDEX IF NOT EXISTS idx_facts_interval ON atomic_facts (profile_id, interval_start, interval_end) WHERE interval_start IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_atomic_facts_scope + ON atomic_facts (scope); +CREATE INDEX IF NOT EXISTS idx_atomic_facts_profile_scope + ON atomic_facts (profile_id, scope); """ @@ -287,6 +299,8 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: CREATE TABLE IF NOT EXISTS canonical_entities ( entity_id TEXT PRIMARY KEY, profile_id TEXT NOT NULL DEFAULT 'default', + scope TEXT NOT NULL DEFAULT 'personal', + shared_with TEXT, canonical_name TEXT NOT NULL, entity_type TEXT NOT NULL DEFAULT '', first_seen TEXT NOT NULL DEFAULT (datetime('now')), @@ -303,6 +317,10 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: ON canonical_entities (profile_id, canonical_name COLLATE NOCASE); CREATE INDEX IF NOT EXISTS idx_entities_type ON canonical_entities (profile_id, entity_type); +CREATE INDEX IF NOT EXISTS idx_canonical_entities_scope + ON canonical_entities (scope); +CREATE INDEX IF NOT EXISTS idx_canonical_entities_profile_scope + ON canonical_entities (profile_id, scope); """ @@ -386,6 +404,8 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: CREATE TABLE IF NOT EXISTS temporal_events ( event_id TEXT PRIMARY KEY, profile_id TEXT NOT NULL DEFAULT 'default', + scope TEXT NOT NULL DEFAULT 'personal', + shared_with TEXT, entity_id TEXT NOT NULL, fact_id TEXT NOT NULL, observation_date TEXT, @@ -409,6 +429,10 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: CREATE INDEX IF NOT EXISTS idx_tevents_date_range ON temporal_events (profile_id, referenced_date) WHERE referenced_date IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_temporal_events_scope + ON temporal_events (scope); +CREATE INDEX IF NOT EXISTS idx_temporal_events_profile_scope + ON temporal_events (profile_id, scope); """ @@ -420,6 +444,8 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: CREATE TABLE IF NOT EXISTS graph_edges ( edge_id TEXT PRIMARY KEY, profile_id TEXT NOT NULL DEFAULT 'default', + scope TEXT NOT NULL DEFAULT 'personal', + shared_with TEXT, source_id TEXT NOT NULL, target_id TEXT NOT NULL, edge_type TEXT NOT NULL DEFAULT 'entity' @@ -444,6 +470,10 @@ def _set_pragmas(conn: sqlite3.Connection) -> None: ON graph_edges (profile_id, edge_type); CREATE INDEX IF NOT EXISTS idx_edges_exists_check ON graph_edges (profile_id, source_id, target_id, edge_type); +CREATE INDEX IF NOT EXISTS idx_graph_edges_scope + ON graph_edges (scope); +CREATE INDEX IF NOT EXISTS idx_graph_edges_profile_scope + ON graph_edges (profile_id, scope); """