ihabkhaled · ihabkhaled · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/.env.example b/.env.example
@@ -166,6 +166,29 @@ OLLAMA_FLASH_ATTENTION=1
 OLLAMA_KV_CACHE_TYPE=q8_0
 MEMORY_EXTRACTION_MODEL=AUTO
 
+# =============================================================================
+# Memory + Context V2 Flagship
+# =============================================================================
+# Master flag for the V2 control center; v1 endpoints stay live regardless.
+MEMORY_V2_ENABLED=true
+CONTEXT_V2_ENABLED=true
+RETRIEVAL_V2_ENABLED=true
+# Ollama models that back the V2 sensitivity and embedding subsystems.
+MEMORY_SENSITIVITY_MODEL=gemma3:4b
+MEMORY_EMBEDDING_MODEL=nomic-embed-text
+CONTEXT_EMBEDDING_MODEL=nomic-embed-text
+CONTEXT_COMPRESSION_MODEL=gemma3:4b
+# Per-user defaults for the suggestion queue (auto-approve cut-off) and retention sweep.
+MEMORY_AUTO_APPROVE_DEFAULT=0.85
+MEMORY_RETENTION_SWEEP_INTERVAL_MS=3600000
+MEMORY_SUGGESTION_TTL_DAYS=30
+CONTEXT_VERSION_RETENTION_COUNT=20
+CONTEXT_TOKEN_ESTIMATOR_MODE=char/4
+# Retrieval budgets used by `/internal/memories/retrieve`.
+RETRIEVAL_MEMORY_SEMANTIC_BUDGET=5
+RETRIEVAL_CONTEXT_SEMANTIC_BUDGET=12
+RETRIEVAL_TOKEN_GUARD_PCT=0.4
+
 # =============================================================================
 # File Service
 # =============================================================================

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -24,7 +24,7 @@ apps/
   claw-chat-service/        # Port 4002, PG claw_chat   — threads, messages, context assembly, execution
   claw-connector-service/   # Port 4003, PG claw_connectors — 7 providers (OpenAI, Anthropic, Gemini, Bedrock, DeepSeek, Ollama, Grok), health, model sync
   claw-routing-service/     # Port 4004, PG claw_routing — 7 modes, Ollama-assisted AUTO, policies
-  claw-memory-service/      # Port 4005, PG claw_memory  — memory CRUD, extraction, context packs
+  claw-memory-service/      # Port 4005, PG claw_memory  — memory CRUD + suggestion queue, extraction, sensitivity classifier, retrieval bundle, audit, usage telemetry, preferences, context packs (scopes, versions, attachments, templates)
   claw-file-service/        # Port 4006, PG claw_files   — upload, chunking (JSON/CSV/MD/text)
   claw-audit-service/       # Port 4007, MongoDB         — 10 audit events, usage ledger
   claw-ollama-service/      # Port 4008, PG claw_ollama  — model management, roles, generation
@@ -506,9 +506,10 @@ Mitigations (in priority order):
 
 ### Chat (PostgreSQL)
 
-- `ChatThread` — userId, title, routingMode, preferredProvider/Model, contextPackIds[], systemPrompt, temperature, maxTokens
+- `ChatThread` — userId, title, routingMode, preferredProvider/Model, contextPackIds[], systemPrompt, temperature, maxTokens, **V2 Integration**: useMemory, useContext (per-thread toggles)
 - `ChatMessage` — threadId, role, content, provider, model, routingMode, inputTokens, outputTokens, latencyMs, feedback, metadata(JSON)
 - `MessageAttachment` — messageId, fileId, type
+- `ChatMessageContextReceipt` (**V2 Integration**) — messageId UNIQUE, threadId, userId, payloadJson (RetrievalBundle: memories, packItems, assemblyOrder, tokenBudget, warnings), createdAt — backs "why was this used?"
 
 ### Connectors (PostgreSQL)
 
@@ -523,9 +524,17 @@ Mitigations (in priority order):
 
 ### Memory (PostgreSQL + pgvector)
 
-- `MemoryRecord` — userId, type (FACT/PREFERENCE/INSTRUCTION/SUMMARY), content, sourceThreadId/MessageId, isEnabled
-- `ContextPack` — name, description, scope
-- `ContextPackItem` — type, content, fileId, sortOrder
+- `MemoryRecord` — userId, type (FACT/PREFERENCE/INSTRUCTION/SUMMARY), content, sourceThreadId/MessageId, isEnabled, **V2**: scope (USER/THREAD/WORKSPACE/PROJECT), scopeRef, tags, category, priority, confidence, source (USER_MANUAL/AI_EXTRACTED/AUTOMATION_LEARNING/IMPORTED), sensitivity (NORMAL/SENSITIVE/REDACTED), retentionPolicy (PERMANENT/EXPIRING/AUTO_DECAY), expiresAt, pinned, pausedUntil, qualityScore, useCount, lastUsedAt, provenanceJson
+- `MemorySuggestion` (**V2**) — userId, type, content, confidence, sensitivity, reason, status (PENDING/APPROVED/REJECTED/AUTO_APPROVED/DISMISSED/EXPIRED), decidedAt, decidedBy, resultingMemoryId, sourceThreadId/MessageId
+- `MemoryUsage` (**V2**) — memoryId, userId, threadId, messageId, score, reason
+- `MemoryAuditLog` (**V2**) — memoryId (nullable; row outlives deletion), userId, action (CREATED/UPDATED/DELETED/USED/APPROVED/REJECTED/TOGGLED/PAUSED/RESUMED/REDACTED/IMPORTED/EXPORTED), actor, details
+- `MemoryPreference` (**V2**) — userId, pausedAll, autoApproveThreshold (default 0.85), defaultRetention, defaultExpiresInDays, redactByDefault
+- `ContextPack` — name, description, scope, **V2**: scope (USER/WORKSPACE/PROJECT/THREAD enum), scopeRef, legacyScope (free-text back-compat), tags, visibility (PRIVATE/WORKSPACE/PUBLIC), isEnabled, pausedUntil, pinned, color, icon, version, templateId, ownerUserId, useCount, lastUsedAt, qualityScore
+- `ContextPackItem` — type, content, fileId, sortOrder, **V2**: itemType (TEXT/FILE/URL/MARKDOWN/SNIPPET/MEMORY_REF), legacyType, url, memoryRefId, isEnabled, pinned, tokenCountEstimate, compressedSummary
+- `ContextPackVersion` (**V2**) — packId, version, payloadJson, summary, changedBy, createdAt (immutable history, pruned at 20 per pack)
+- `ContextPackUsage` (**V2**) — packId, userId, threadId, messageId, itemIdsUsed[], score
+- `ContextPackAttachment` (**V2**) — packId, scope, scopeRef, attachedBy, isActive
+- `ContextPackTemplate` (**V2**) — name, description, category, isSystem, payloadJson
 
 ### Files (PostgreSQL)
 
@@ -566,6 +575,25 @@ Exchange: `claw.events` (topic, durable). DLQ + 3 retries with backoff.
 | connector.health_checked          | connector    | audit, routing           |
 | routing.decision_made             | routing      | audit                    |
 | memory.extracted                  | memory       | audit                    |
+| memory.suggested                  | memory       | audit                    |
+| memory.approved                   | memory       | audit                    |
+| memory.rejected                   | memory       | audit                    |
+| memory.used                       | memory       | audit                    |
+| memory.forgotten                  | memory       | audit                    |
+| memory.paused                     | memory       | audit                    |
+| memory.redacted                   | memory       | audit                    |
+| context_pack.created              | memory       | audit                    |
+| context_pack.updated              | memory       | audit                    |
+| context_pack.deleted              | memory       | audit                    |
+| context_pack.attached             | memory       | audit                    |
+| context_pack.detached             | memory       | audit                    |
+| context_pack.used                 | memory       | audit                    |
+| context_pack.version_created      | memory       | audit                    |
+| context_pack.version_reverted     | memory       | audit                    |
+| context_pack.shared               | memory       | audit                    |
+| context.receipt_written           | chat         | audit                    |
+| chat_thread.memory_toggled        | chat         | audit                    |
+| chat_thread.context_toggled       | chat         | audit                    |
 | file.uploaded/chunked             | file         | —                        |
 | log.server                        | all services | server-logs              |
 | image.generated                   | image        | audit                    |
@@ -1066,6 +1094,21 @@ Single root `.env` (copy from `.env.example`). Groups:
   - WEBHOOK_CONNECTOR_REQUESTS_PER_MINUTE (default 60) — per-connector cap on incoming webhook delivery rate (Stream 11.4, in-memory sliding window; over-cap returns RATE_LIMITED rejection)
   - AUTO_SUGGEST_INBOX_REPLY_CRON (default `0 */15 * * * *`) — cron for the Gmail INBOX_REPLY collector that emits DRAFT candidates (Stream 12.2)
   - AUTO_SUGGEST_INBOX_REPLY_LOOKBACK_HOURS (default 48) — how far back to scan Gmail messages for inbox-reply candidates
+- Memory + Context V2 Flagship (2026-05-24, ADRs 033–038, docs/03-architecture/memory-context-integration.md):
+  - MEMORY_V2_ENABLED (default true) — master flag for the V2 control center; v1 endpoints stay live regardless
+  - CONTEXT_V2_ENABLED (default true)
+  - RETRIEVAL_V2_ENABLED (default true) — gates the unified `POST /internal/memories/retrieve` endpoint
+  - MEMORY_SENSITIVITY_MODEL (default `gemma3:4b`) — ambiguous-case sensitivity classifier (regex pre-filter ships in V2; Ollama call is a follow-up enhancement)
+  - MEMORY_EMBEDDING_MODEL / CONTEXT_EMBEDDING_MODEL (default `nomic-embed-text`)
+  - CONTEXT_COMPRESSION_MODEL (default `gemma3:4b`)
+  - MEMORY_AUTO_APPROVE_DEFAULT (default 0.85) — per-user `memory_preferences.autoApproveThreshold` default; only fires for sensitivity=NORMAL
+  - MEMORY_RETENTION_SWEEP_INTERVAL_MS (default 3600000) — hourly retention sweep
+  - MEMORY_SUGGESTION_TTL_DAYS (default 30) — auto-expire pending suggestions
+  - CONTEXT_VERSION_RETENTION_COUNT (default 20) — versions kept per pack
+  - CONTEXT_TOKEN_ESTIMATOR_MODE (default `char/4`)
+  - RETRIEVAL_MEMORY_SEMANTIC_BUDGET (default 5) — top-K memories per retrieval
+  - RETRIEVAL_CONTEXT_SEMANTIC_BUDGET (default 12) — top-K pack items per retrieval
+  - RETRIEVAL_TOKEN_GUARD_PCT (default 0.4) — fraction of token budget memory+context may consume
 
 ---
 

diff --git a/apps/claw-chat-service/prisma/migrations/20260524000000_chat_context_v2/migration.sql b/apps/claw-chat-service/prisma/migrations/20260524000000_chat_context_v2/migration.sql
@@ -0,0 +1,23 @@
+-- Integration V2 (Memory + Context) — chat-service additions.
+-- Adds per-thread memory/context toggles and the assembled-context receipt
+-- table that backs the "why did the AI know this?" surface.
+
+ALTER TABLE "chat_threads"
+  ADD COLUMN "use_memory" BOOLEAN NOT NULL DEFAULT true,
+  ADD COLUMN "use_context" BOOLEAN NOT NULL DEFAULT true;
+
+CREATE TABLE "chat_message_context_receipts" (
+  "id" TEXT NOT NULL,
+  "message_id" TEXT NOT NULL,
+  "thread_id" TEXT NOT NULL,
+  "user_id" TEXT NOT NULL,
+  "payload_json" JSONB NOT NULL,
+  "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+  CONSTRAINT "chat_message_context_receipts_pkey" PRIMARY KEY ("id")
+);
+CREATE UNIQUE INDEX "chat_message_context_receipts_message_id_unique"
+  ON "chat_message_context_receipts"("message_id");
+CREATE INDEX "chat_message_context_receipts_thread_id_idx"
+  ON "chat_message_context_receipts"("thread_id");
+CREATE INDEX "chat_message_context_receipts_userId_createdAt_idx"
+  ON "chat_message_context_receipts"("user_id", "created_at");
diff --git a/apps/claw-chat-service/prisma/schema.prisma b/apps/claw-chat-service/prisma/schema.prisma
@@ -48,6 +48,9 @@ model ChatThread {
   judgeModel        String?  @map("judge_model")
   qualityThreshold  Float?   @map("quality_threshold")
   maxReRouteAttempts Int?    @map("max_reroute_attempts")
+  // === Integration V2 — per-thread memory/context switches ===
+  useMemory    Boolean     @default(true) @map("use_memory")
+  useContext   Boolean     @default(true) @map("use_context")
   createdAt    DateTime    @default(now()) @map("created_at")
   updatedAt    DateTime    @updatedAt @map("updated_at")
 
@@ -58,6 +61,19 @@ model ChatThread {
   @@map("chat_threads")
 }
 
+model ChatMessageContextReceipt {
+  id          String   @id @default(cuid())
+  messageId   String   @unique @map("message_id")
+  threadId    String   @map("thread_id")
+  userId      String   @map("user_id")
+  payloadJson Json     @map("payload_json")
+  createdAt   DateTime @default(now()) @map("created_at")
+
+  @@index([threadId])
+  @@index([userId, createdAt])
+  @@map("chat_message_context_receipts")
+}
+
 model ChatMessage {
   id            String       @id @default(cuid())
   threadId      String       @map("thread_id")

diff --git a/apps/claw-chat-service/src/app/app.module.ts b/apps/claw-chat-service/src/app/app.module.ts
@@ -16,6 +16,8 @@ import { LoggingInterceptor } from './interceptors/logging.interceptor';
 import { HealthModule } from '../modules/health/health.module';
 import { ChatThreadsModule } from '../modules/chat-threads/chat-threads.module';
 import { ChatMessagesModule } from '../modules/chat-messages/chat-messages.module';
+import { ContextReceiptsModule } from '../modules/context-receipts/context-receipts.module';
+import { ContextPreviewModule } from '../modules/context-preview/context-preview.module';
 
 @Module({
   imports: [
@@ -61,6 +63,8 @@ import { ChatMessagesModule } from '../modules/chat-messages/chat-messages.modul
     HealthModule,
     ChatThreadsModule,
     ChatMessagesModule,
+    ContextReceiptsModule,
+    ContextPreviewModule,
     ThrottlerModule.forRoot([
       {
         ttl: Number(process.env['THROTTLE_TTL'] ?? 60000),

diff --git a/apps/claw-chat-service/src/common/utilities/context-receipt-json.utility.ts b/apps/claw-chat-service/src/common/utilities/context-receipt-json.utility.ts
@@ -0,0 +1,15 @@
+import type { RetrievalBundle } from '@claw/shared-types';
+import type { Prisma } from '../../generated/prisma';
+
+export function bundleToInputJson(bundle: RetrievalBundle): Prisma.InputJsonValue {
+  // RetrievalBundle is a plain JSON-shaped DTO (no Date / Function / BigInt /
+  // undefined). Round-tripping through JSON normalizes the structure and
+  // satisfies Prisma's InputJsonValue contract without an `as unknown as` cast.
+  return JSON.parse(JSON.stringify(bundle)) as Prisma.InputJsonValue;
+}
+
+export function inputJsonToBundle(payload: Prisma.JsonValue): RetrievalBundle {
+  // Inverse of bundleToInputJson — payload was written via the helper above
+  // so we know it matches the bundle shape.
+  return JSON.parse(JSON.stringify(payload)) as RetrievalBundle;
+}
diff --git a/apps/claw-chat-service/src/common/utilities/receipt-from-context.utility.ts b/apps/claw-chat-service/src/common/utilities/receipt-from-context.utility.ts
@@ -0,0 +1,56 @@
+import {
+  type ContextPackItemType,
+  type MemoryScope,
+  type MemorySensitivity,
+  type MemoryType,
+  type RetrievalBundle,
+  RetrievalReason,
+} from '@claw/shared-types';
+import type { AssembledContext } from '../../modules/chat-messages/types/context.types';
+
+/**
+ * Integration V2 — synthesize a RetrievalBundle from the existing
+ * AssembledContext. The bundle is stored as the per-message receipt that
+ * powers the "why was this used?" surface. Scores are approximated since the
+ * existing assembly path doesn't track per-item cosines — the next session
+ * can replace this with the actual retrieve() result for higher fidelity.
+ */
+export function receiptFromAssembledContext(
+  context: AssembledContext,
+  tokenBudgetUsed: number,
+): RetrievalBundle {
+  const memories = context.memories.map((m) => ({
+    id: m.id,
+    type: m.type as MemoryType,
+    content: m.content,
+    scope: 'USER' as MemoryScope,
+    scopeRef: null,
+    score: 0.5,
+    reason: RetrievalReason.INTENT_MATCH,
+    sensitivity: 'NORMAL' as MemorySensitivity,
+    sourceThreadId: null,
+    sourceMessageId: null,
+  }));
+  const packItems = context.contextPackItems.map((it, index) => ({
+    id: `pack-item-${String(index)}`,
+    contextPackId: 'unknown',
+    itemType: (it.type ?? 'TEXT') as ContextPackItemType,
+    content: it.content,
+    score: 0.5,
+    reason: RetrievalReason.EXPLICIT_ATTACH,
+    pinned: false,
+    tokenCountEstimate: Math.ceil((it.content ?? '').length / 4),
+  }));
+  return {
+    memories,
+    packItems,
+    assemblyOrder: [
+      ...memories.map((m) => `memory:${m.id}`),
+      ...packItems.map((p) => `pack:${p.id}`),
+    ],
+    tokenBudget: context.tokenBudget,
+    tokenBudgetUsed,
+    retrievalLatencyMs: 0,
+    warnings: [],
+  };
+}
diff --git a/apps/claw-chat-service/src/modules/chat-messages/__tests__/chat-messages.service.spec.ts b/apps/claw-chat-service/src/modules/chat-messages/__tests__/chat-messages.service.spec.ts
@@ -126,6 +126,9 @@ describe('ChatMessagesService', () => {
         emitCompletion: jest.fn(),
       } as unknown as ChatStreamService,
       rabbitMQ as unknown as RabbitMQService,
+      { write: jest.fn(), getByMessageId: jest.fn() } as unknown as ConstructorParameters<
+        typeof ChatMessagesService
+      >[16],
     );
   });
 
@@ -324,6 +327,9 @@ describe('ChatMessagesService', () => {
           emitCompletion: jest.fn(),
         } as unknown as ChatStreamService,
         rabbitMQ as unknown as RabbitMQService,
+        { write: jest.fn(), getByMessageId: jest.fn() } as unknown as ConstructorParameters<
+          typeof ChatMessagesService
+        >[16],
       );
 
       const result = await localService.executeVerify('user-1', {

diff --git a/apps/claw-chat-service/src/modules/chat-messages/chat-messages.module.ts b/apps/claw-chat-service/src/modules/chat-messages/chat-messages.module.ts
@@ -22,8 +22,10 @@ import { AdvancedModuleModelSelectionService } from './services/advanced-module-
 import { LocalModelSelectionService } from './services/local-model-selection.service';
 import { ChatMessagesRepository } from './repositories/chat-messages.repository';
 import { ChatThreadsRepository } from '../chat-threads/repositories/chat-threads.repository';
+import { ContextReceiptsModule } from '../context-receipts/context-receipts.module';
 
 @Module({
+  imports: [ContextReceiptsModule],
   controllers: [ChatMessagesController, ChatStreamController, ChatInternalController],
   providers: [
     ChatMessagesService,

diff --git a/apps/claw-chat-service/src/modules/chat-messages/services/chat-messages.service.ts b/apps/claw-chat-service/src/modules/chat-messages/services/chat-messages.service.ts
@@ -13,6 +13,8 @@
 import { ResearchWorkflow } from '../../../common/enums/research-workflow.enum';
 import { ChatMessagesRepository } from '../repositories/chat-messages.repository';
 import { ChatThreadsRepository } from '../../chat-threads/repositories/chat-threads.repository';
+import { ContextReceiptService } from '../../context-receipts/services/context-receipt.service';
+import { receiptFromAssembledContext } from '../../../common/utilities/receipt-from-context.utility';
 import { ChatExecutionManager } from '../managers/chat-execution.manager';
 import { ContextAssemblyManager } from '../managers/context-assembly.manager';
 import { ConsensusExecutionManager } from '../managers/consensus-execution.manager';
@@ -85,6 +87,7 @@
     private readonly rolePackManager: RolePackManager,
     private readonly chatStreamService: ChatStreamService,
     private readonly rabbitMQService: RabbitMQService,
+    private readonly contextReceiptService: ContextReceiptService,
   ) {
     this.structuredLogger = new StructuredLogger(
       this.rabbitMQService,
@@ -564,7 +567,7 @@
    return effectivePayload;
  }

  private async runLlmAndStore(
    effectivePayload: MessageRoutedData,
    originalPayload: MessageRoutedData,
    context: AssembledContext,
@@ -589,6 +592,18 @@
       contextMetadata,
       latestUserMetadata,
     );
+    // Integration V2 — persist the "why was this used?" receipt asynchronously.
+    void this.contextReceiptService
+      .write(
+        assistantMessage.id,
+        originalPayload.threadId,
+        thread?.userId ?? 'system',
+        receiptFromAssembledContext(context, llmResponse.inputTokens ?? 0),
+      )
+      .catch((error: unknown) => {
+        const msg = error instanceof Error ? error.message : 'unknown';
+        this.logger.warn(`runLlmAndStore: receipt write failed — ${msg}`);
+      });
     await this.updateThreadAfterResponse(originalPayload.threadId, llmResponse);
     this.chatStreamService.emitCompletion(
       originalPayload.threadId,

diff --git a/apps/claw-chat-service/src/modules/chat-threads/dto/update-thread.dto.ts b/apps/claw-chat-service/src/modules/chat-threads/dto/update-thread.dto.ts
@@ -28,6 +28,9 @@ export const updateThreadSchema = z.object({
   judgeModel: z.string().max(255).optional().nullable(),
   qualityThreshold: z.number().min(0).max(1).optional().nullable(),
   maxReRouteAttempts: z.number().int().min(0).max(5).optional().nullable(),
+  // Integration V2 — per-thread memory + context toggles
+  useMemory: z.boolean().optional(),
+  useContext: z.boolean().optional(),
 });
 
 export type UpdateThreadDto = z.infer<typeof updateThreadSchema>;