Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,29 @@ OLLAMA_FLASH_ATTENTION=1
OLLAMA_KV_CACHE_TYPE=q8_0
MEMORY_EXTRACTION_MODEL=AUTO

# =============================================================================
# Memory + Context V2 Flagship
# =============================================================================
# Master flag for the V2 control center; v1 endpoints stay live regardless.
MEMORY_V2_ENABLED=true
CONTEXT_V2_ENABLED=true
RETRIEVAL_V2_ENABLED=true
# Ollama models that back the V2 sensitivity and embedding subsystems.
MEMORY_SENSITIVITY_MODEL=gemma3:4b
MEMORY_EMBEDDING_MODEL=nomic-embed-text
CONTEXT_EMBEDDING_MODEL=nomic-embed-text
CONTEXT_COMPRESSION_MODEL=gemma3:4b
# Per-user defaults for the suggestion queue (auto-approve cut-off) and retention sweep.
MEMORY_AUTO_APPROVE_DEFAULT=0.85
MEMORY_RETENTION_SWEEP_INTERVAL_MS=3600000
MEMORY_SUGGESTION_TTL_DAYS=30
CONTEXT_VERSION_RETENTION_COUNT=20
CONTEXT_TOKEN_ESTIMATOR_MODE=char/4
# Retrieval budgets used by `/internal/memories/retrieve`.
RETRIEVAL_MEMORY_SEMANTIC_BUDGET=5
RETRIEVAL_CONTEXT_SEMANTIC_BUDGET=12
RETRIEVAL_TOKEN_GUARD_PCT=0.4

# =============================================================================
# File Service
# =============================================================================
Expand Down
53 changes: 48 additions & 5 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ apps/
claw-chat-service/ # Port 4002, PG claw_chat — threads, messages, context assembly, execution
claw-connector-service/ # Port 4003, PG claw_connectors — 7 providers (OpenAI, Anthropic, Gemini, Bedrock, DeepSeek, Ollama, Grok), health, model sync
claw-routing-service/ # Port 4004, PG claw_routing — 7 modes, Ollama-assisted AUTO, policies
claw-memory-service/ # Port 4005, PG claw_memory — memory CRUD, extraction, context packs
claw-memory-service/ # Port 4005, PG claw_memory — memory CRUD + suggestion queue, extraction, sensitivity classifier, retrieval bundle, audit, usage telemetry, preferences, context packs (scopes, versions, attachments, templates)
claw-file-service/ # Port 4006, PG claw_files — upload, chunking (JSON/CSV/MD/text)
claw-audit-service/ # Port 4007, MongoDB — 10 audit events, usage ledger
claw-ollama-service/ # Port 4008, PG claw_ollama — model management, roles, generation
Expand Down Expand Up @@ -506,9 +506,10 @@ Mitigations (in priority order):

### Chat (PostgreSQL)

- `ChatThread` — userId, title, routingMode, preferredProvider/Model, contextPackIds[], systemPrompt, temperature, maxTokens
- `ChatThread` — userId, title, routingMode, preferredProvider/Model, contextPackIds[], systemPrompt, temperature, maxTokens, **V2 Integration**: useMemory, useContext (per-thread toggles)
- `ChatMessage` — threadId, role, content, provider, model, routingMode, inputTokens, outputTokens, latencyMs, feedback, metadata(JSON)
- `MessageAttachment` — messageId, fileId, type
- `ChatMessageContextReceipt` (**V2 Integration**) — messageId UNIQUE, threadId, userId, payloadJson (RetrievalBundle: memories, packItems, assemblyOrder, tokenBudget, warnings), createdAt — backs "why was this used?"

### Connectors (PostgreSQL)

Expand All @@ -523,9 +524,17 @@ Mitigations (in priority order):

### Memory (PostgreSQL + pgvector)

- `MemoryRecord` — userId, type (FACT/PREFERENCE/INSTRUCTION/SUMMARY), content, sourceThreadId/MessageId, isEnabled
- `ContextPack` — name, description, scope
- `ContextPackItem` — type, content, fileId, sortOrder
- `MemoryRecord` — userId, type (FACT/PREFERENCE/INSTRUCTION/SUMMARY), content, sourceThreadId/MessageId, isEnabled, **V2**: scope (USER/THREAD/WORKSPACE/PROJECT), scopeRef, tags, category, priority, confidence, source (USER_MANUAL/AI_EXTRACTED/AUTOMATION_LEARNING/IMPORTED), sensitivity (NORMAL/SENSITIVE/REDACTED), retentionPolicy (PERMANENT/EXPIRING/AUTO_DECAY), expiresAt, pinned, pausedUntil, qualityScore, useCount, lastUsedAt, provenanceJson
- `MemorySuggestion` (**V2**) — userId, type, content, confidence, sensitivity, reason, status (PENDING/APPROVED/REJECTED/AUTO_APPROVED/DISMISSED/EXPIRED), decidedAt, decidedBy, resultingMemoryId, sourceThreadId/MessageId
- `MemoryUsage` (**V2**) — memoryId, userId, threadId, messageId, score, reason
- `MemoryAuditLog` (**V2**) — memoryId (nullable; row outlives deletion), userId, action (CREATED/UPDATED/DELETED/USED/APPROVED/REJECTED/TOGGLED/PAUSED/RESUMED/REDACTED/IMPORTED/EXPORTED), actor, details
- `MemoryPreference` (**V2**) — userId, pausedAll, autoApproveThreshold (default 0.85), defaultRetention, defaultExpiresInDays, redactByDefault
- `ContextPack` — name, description, scope, **V2**: scope (USER/WORKSPACE/PROJECT/THREAD enum), scopeRef, legacyScope (free-text back-compat), tags, visibility (PRIVATE/WORKSPACE/PUBLIC), isEnabled, pausedUntil, pinned, color, icon, version, templateId, ownerUserId, useCount, lastUsedAt, qualityScore
- `ContextPackItem` — type, content, fileId, sortOrder, **V2**: itemType (TEXT/FILE/URL/MARKDOWN/SNIPPET/MEMORY_REF), legacyType, url, memoryRefId, isEnabled, pinned, tokenCountEstimate, compressedSummary
- `ContextPackVersion` (**V2**) — packId, version, payloadJson, summary, changedBy, createdAt (immutable history, pruned at 20 per pack)
- `ContextPackUsage` (**V2**) — packId, userId, threadId, messageId, itemIdsUsed[], score
- `ContextPackAttachment` (**V2**) — packId, scope, scopeRef, attachedBy, isActive
- `ContextPackTemplate` (**V2**) — name, description, category, isSystem, payloadJson

### Files (PostgreSQL)

Expand Down Expand Up @@ -566,6 +575,25 @@ Exchange: `claw.events` (topic, durable). DLQ + 3 retries with backoff.
| connector.health_checked | connector | audit, routing |
| routing.decision_made | routing | audit |
| memory.extracted | memory | audit |
| memory.suggested | memory | audit |
| memory.approved | memory | audit |
| memory.rejected | memory | audit |
| memory.used | memory | audit |
| memory.forgotten | memory | audit |
| memory.paused | memory | audit |
| memory.redacted | memory | audit |
| context_pack.created | memory | audit |
| context_pack.updated | memory | audit |
| context_pack.deleted | memory | audit |
| context_pack.attached | memory | audit |
| context_pack.detached | memory | audit |
| context_pack.used | memory | audit |
| context_pack.version_created | memory | audit |
| context_pack.version_reverted | memory | audit |
| context_pack.shared | memory | audit |
| context.receipt_written | chat | audit |
| chat_thread.memory_toggled | chat | audit |
| chat_thread.context_toggled | chat | audit |
| file.uploaded/chunked | file | — |
| log.server | all services | server-logs |
| image.generated | image | audit |
Expand Down Expand Up @@ -1066,6 +1094,21 @@ Single root `.env` (copy from `.env.example`). Groups:
- WEBHOOK_CONNECTOR_REQUESTS_PER_MINUTE (default 60) — per-connector cap on incoming webhook delivery rate (Stream 11.4, in-memory sliding window; over-cap returns RATE_LIMITED rejection)
- AUTO_SUGGEST_INBOX_REPLY_CRON (default `0 */15 * * * *`) — cron for the Gmail INBOX_REPLY collector that emits DRAFT candidates (Stream 12.2)
- AUTO_SUGGEST_INBOX_REPLY_LOOKBACK_HOURS (default 48) — how far back to scan Gmail messages for inbox-reply candidates
- Memory + Context V2 Flagship (2026-05-24, ADRs 033–038, docs/03-architecture/memory-context-integration.md):
- MEMORY_V2_ENABLED (default true) — master flag for the V2 control center; v1 endpoints stay live regardless
- CONTEXT_V2_ENABLED (default true)
- RETRIEVAL_V2_ENABLED (default true) — gates the unified `POST /internal/memories/retrieve` endpoint
- MEMORY_SENSITIVITY_MODEL (default `gemma3:4b`) — ambiguous-case sensitivity classifier (regex pre-filter ships in V2; Ollama call is a follow-up enhancement)
- MEMORY_EMBEDDING_MODEL / CONTEXT_EMBEDDING_MODEL (default `nomic-embed-text`)
- CONTEXT_COMPRESSION_MODEL (default `gemma3:4b`)
- MEMORY_AUTO_APPROVE_DEFAULT (default 0.85) — per-user `memory_preferences.autoApproveThreshold` default; only fires for sensitivity=NORMAL
- MEMORY_RETENTION_SWEEP_INTERVAL_MS (default 3600000) — hourly retention sweep
- MEMORY_SUGGESTION_TTL_DAYS (default 30) — auto-expire pending suggestions
- CONTEXT_VERSION_RETENTION_COUNT (default 20) — versions kept per pack
- CONTEXT_TOKEN_ESTIMATOR_MODE (default `char/4`)
- RETRIEVAL_MEMORY_SEMANTIC_BUDGET (default 5) — top-K memories per retrieval
- RETRIEVAL_CONTEXT_SEMANTIC_BUDGET (default 12) — top-K pack items per retrieval
- RETRIEVAL_TOKEN_GUARD_PCT (default 0.4) — fraction of token budget memory+context may consume

---

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
-- Integration V2 (Memory + Context) — chat-service additions.
-- Adds per-thread memory/context toggles and the assembled-context receipt
-- table that backs the "why did the AI know this?" surface.

ALTER TABLE "chat_threads"
ADD COLUMN "use_memory" BOOLEAN NOT NULL DEFAULT true,
ADD COLUMN "use_context" BOOLEAN NOT NULL DEFAULT true;

CREATE TABLE "chat_message_context_receipts" (
"id" TEXT NOT NULL,
"message_id" TEXT NOT NULL,
"thread_id" TEXT NOT NULL,
"user_id" TEXT NOT NULL,
"payload_json" JSONB NOT NULL,
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT "chat_message_context_receipts_pkey" PRIMARY KEY ("id")
);
CREATE UNIQUE INDEX "chat_message_context_receipts_message_id_unique"
ON "chat_message_context_receipts"("message_id");
CREATE INDEX "chat_message_context_receipts_thread_id_idx"
ON "chat_message_context_receipts"("thread_id");
CREATE INDEX "chat_message_context_receipts_userId_createdAt_idx"
ON "chat_message_context_receipts"("user_id", "created_at");
16 changes: 16 additions & 0 deletions apps/claw-chat-service/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ model ChatThread {
judgeModel String? @map("judge_model")
qualityThreshold Float? @map("quality_threshold")
maxReRouteAttempts Int? @map("max_reroute_attempts")
// === Integration V2 — per-thread memory/context switches ===
useMemory Boolean @default(true) @map("use_memory")
useContext Boolean @default(true) @map("use_context")
createdAt DateTime @default(now()) @map("created_at")
updatedAt DateTime @updatedAt @map("updated_at")

Expand All @@ -58,6 +61,19 @@ model ChatThread {
@@map("chat_threads")
}

model ChatMessageContextReceipt {
id String @id @default(cuid())
messageId String @unique @map("message_id")
threadId String @map("thread_id")
userId String @map("user_id")
payloadJson Json @map("payload_json")
createdAt DateTime @default(now()) @map("created_at")

@@index([threadId])
@@index([userId, createdAt])
@@map("chat_message_context_receipts")
}

model ChatMessage {
id String @id @default(cuid())
threadId String @map("thread_id")
Expand Down
4 changes: 4 additions & 0 deletions apps/claw-chat-service/src/app/app.module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import { LoggingInterceptor } from './interceptors/logging.interceptor';
import { HealthModule } from '../modules/health/health.module';
import { ChatThreadsModule } from '../modules/chat-threads/chat-threads.module';
import { ChatMessagesModule } from '../modules/chat-messages/chat-messages.module';
import { ContextReceiptsModule } from '../modules/context-receipts/context-receipts.module';
import { ContextPreviewModule } from '../modules/context-preview/context-preview.module';

@Module({
imports: [
Expand Down Expand Up @@ -61,6 +63,8 @@ import { ChatMessagesModule } from '../modules/chat-messages/chat-messages.modul
HealthModule,
ChatThreadsModule,
ChatMessagesModule,
ContextReceiptsModule,
ContextPreviewModule,
ThrottlerModule.forRoot([
{
ttl: Number(process.env['THROTTLE_TTL'] ?? 60000),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import type { RetrievalBundle } from '@claw/shared-types';
import type { Prisma } from '../../generated/prisma';

export function bundleToInputJson(bundle: RetrievalBundle): Prisma.InputJsonValue {
// RetrievalBundle is a plain JSON-shaped DTO (no Date / Function / BigInt /
// undefined). Round-tripping through JSON normalizes the structure and
// satisfies Prisma's InputJsonValue contract without an `as unknown as` cast.
return JSON.parse(JSON.stringify(bundle)) as Prisma.InputJsonValue;
}

export function inputJsonToBundle(payload: Prisma.JsonValue): RetrievalBundle {
// Inverse of bundleToInputJson — payload was written via the helper above
// so we know it matches the bundle shape.
return JSON.parse(JSON.stringify(payload)) as RetrievalBundle;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import {
type ContextPackItemType,
type MemoryScope,
type MemorySensitivity,
type MemoryType,
type RetrievalBundle,
RetrievalReason,
} from '@claw/shared-types';
import type { AssembledContext } from '../../modules/chat-messages/types/context.types';

/**
* Integration V2 — synthesize a RetrievalBundle from the existing
* AssembledContext. The bundle is stored as the per-message receipt that
* powers the "why was this used?" surface. Scores are approximated since the
* existing assembly path doesn't track per-item cosines — the next session
* can replace this with the actual retrieve() result for higher fidelity.
*/
export function receiptFromAssembledContext(
context: AssembledContext,
tokenBudgetUsed: number,
): RetrievalBundle {
const memories = context.memories.map((m) => ({
id: m.id,
type: m.type as MemoryType,
content: m.content,
scope: 'USER' as MemoryScope,
scopeRef: null,
score: 0.5,
reason: RetrievalReason.INTENT_MATCH,
sensitivity: 'NORMAL' as MemorySensitivity,
sourceThreadId: null,
sourceMessageId: null,
}));
const packItems = context.contextPackItems.map((it, index) => ({
id: `pack-item-${String(index)}`,
contextPackId: 'unknown',
itemType: (it.type ?? 'TEXT') as ContextPackItemType,
content: it.content,
score: 0.5,
reason: RetrievalReason.EXPLICIT_ATTACH,
pinned: false,
tokenCountEstimate: Math.ceil((it.content ?? '').length / 4),
}));
return {
memories,
packItems,
assemblyOrder: [
...memories.map((m) => `memory:${m.id}`),
...packItems.map((p) => `pack:${p.id}`),
],
tokenBudget: context.tokenBudget,
tokenBudgetUsed,
retrievalLatencyMs: 0,
warnings: [],
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ describe('ChatMessagesService', () => {
emitCompletion: jest.fn(),
} as unknown as ChatStreamService,
rabbitMQ as unknown as RabbitMQService,
{ write: jest.fn(), getByMessageId: jest.fn() } as unknown as ConstructorParameters<
typeof ChatMessagesService
>[16],
);
});

Expand Down Expand Up @@ -324,6 +327,9 @@ describe('ChatMessagesService', () => {
emitCompletion: jest.fn(),
} as unknown as ChatStreamService,
rabbitMQ as unknown as RabbitMQService,
{ write: jest.fn(), getByMessageId: jest.fn() } as unknown as ConstructorParameters<
typeof ChatMessagesService
>[16],
);

const result = await localService.executeVerify('user-1', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ import { AdvancedModuleModelSelectionService } from './services/advanced-module-
import { LocalModelSelectionService } from './services/local-model-selection.service';
import { ChatMessagesRepository } from './repositories/chat-messages.repository';
import { ChatThreadsRepository } from '../chat-threads/repositories/chat-threads.repository';
import { ContextReceiptsModule } from '../context-receipts/context-receipts.module';

@Module({
imports: [ContextReceiptsModule],
controllers: [ChatMessagesController, ChatStreamController, ChatInternalController],
providers: [
ChatMessagesService,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import { ResearchWorkflow } from '../../../common/enums/research-workflow.enum';
import { ChatMessagesRepository } from '../repositories/chat-messages.repository';
import { ChatThreadsRepository } from '../../chat-threads/repositories/chat-threads.repository';
import { ContextReceiptService } from '../../context-receipts/services/context-receipt.service';
import { receiptFromAssembledContext } from '../../../common/utilities/receipt-from-context.utility';
import { ChatExecutionManager } from '../managers/chat-execution.manager';
import { ContextAssemblyManager } from '../managers/context-assembly.manager';
import { ConsensusExecutionManager } from '../managers/consensus-execution.manager';
Expand Down Expand Up @@ -85,6 +87,7 @@
private readonly rolePackManager: RolePackManager,
private readonly chatStreamService: ChatStreamService,
private readonly rabbitMQService: RabbitMQService,
private readonly contextReceiptService: ContextReceiptService,
) {
this.structuredLogger = new StructuredLogger(
this.rabbitMQService,
Expand Down Expand Up @@ -564,7 +567,7 @@
return effectivePayload;
}

private async runLlmAndStore(

Check warning on line 570 in apps/claw-chat-service/src/modules/chat-messages/services/chat-messages.service.ts

View workflow job for this annotation

GitHub Actions / Lint (chat)

Async method 'runLlmAndStore' has too many lines (51). Maximum allowed is 50
effectivePayload: MessageRoutedData,
originalPayload: MessageRoutedData,
context: AssembledContext,
Expand All @@ -589,6 +592,18 @@
contextMetadata,
latestUserMetadata,
);
// Integration V2 — persist the "why was this used?" receipt asynchronously.
void this.contextReceiptService
.write(
assistantMessage.id,
originalPayload.threadId,
thread?.userId ?? 'system',
receiptFromAssembledContext(context, llmResponse.inputTokens ?? 0),
)
.catch((error: unknown) => {
const msg = error instanceof Error ? error.message : 'unknown';
this.logger.warn(`runLlmAndStore: receipt write failed — ${msg}`);
});
await this.updateThreadAfterResponse(originalPayload.threadId, llmResponse);
this.chatStreamService.emitCompletion(
originalPayload.threadId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ export const updateThreadSchema = z.object({
judgeModel: z.string().max(255).optional().nullable(),
qualityThreshold: z.number().min(0).max(1).optional().nullable(),
maxReRouteAttempts: z.number().int().min(0).max(5).optional().nullable(),
// Integration V2 — per-thread memory + context toggles
useMemory: z.boolean().optional(),
useContext: z.boolean().optional(),
});

export type UpdateThreadDto = z.infer<typeof updateThreadSchema>;
Loading
Loading