diff --git a/apps/claw-routing-service/src/modules/cost-budget/constants/budget.constants.ts b/apps/claw-routing-service/src/modules/cost-budget/constants/budget.constants.ts new file mode 100644 index 00000000..026d8bb2 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/constants/budget.constants.ts @@ -0,0 +1,9 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +export const BUDGET_DEFAULT_WARN_AT_PERCENT = 80; +export const BUDGET_DEFAULT_RESET_INTERVAL_DAYS = 30; + +export const BUDGET_MESSAGE_KEY_NEAR_LIMIT = 'BUDGET_NEAR_LIMIT'; +export const BUDGET_MESSAGE_KEY_EXCEEDED_FORCED_LOCAL = 'BUDGET_EXCEEDED_FORCED_LOCAL'; +export const BUDGET_MESSAGE_KEY_EXCEEDED_OVERRIDDEN = 'BUDGET_EXCEEDED_OVERRIDDEN'; +export const BUDGET_MESSAGE_KEY_UNKNOWN_COST_BLOCKED = 'BUDGET_UNKNOWN_COST_BLOCKED'; diff --git a/apps/claw-routing-service/src/modules/cost-budget/controllers/cost-budget.controller.ts b/apps/claw-routing-service/src/modules/cost-budget/controllers/cost-budget.controller.ts new file mode 100644 index 00000000..4a85aa4b --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/controllers/cost-budget.controller.ts @@ -0,0 +1,45 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +import { Body, Controller, Get, Param, Patch, Post } from '@nestjs/common'; + +import { CostBudgetService } from '../services/cost-budget.service'; + +@Controller('routing/cost-budget') +export class CostBudgetController { + constructor(private readonly service: CostBudgetService) {} + + @Get('me') + async getMine(): Promise { + return this.service.getMine(); + } + + @Get('me/forecast') + async getMineForecast(): Promise { + return this.service.getMineForecast(); + } + + @Patch('me') + async updateMine(@Body() body: unknown): Promise { + return this.service.updateMine(body); + } + + @Get() + async listAll(): Promise { + return this.service.listAll(); + } + + @Post() + async create(@Body() body: unknown): Promise { + return this.service.create(body); + } + + @Patch(':id') + async update(@Param('id') id: string, @Body() body: unknown): Promise { + return this.service.update(id, body); + } + + @Post('check') + async check(@Body() body: unknown): Promise { + return this.service.check(body); + } +} diff --git a/apps/claw-routing-service/src/modules/cost-budget/cost-budget.module.ts b/apps/claw-routing-service/src/modules/cost-budget/cost-budget.module.ts new file mode 100644 index 00000000..9975a680 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/cost-budget.module.ts @@ -0,0 +1,26 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) +// NEW module — NOT yet registered in app.module.ts. + +import { Module } from '@nestjs/common'; + +import { CostBudgetController } from './controllers/cost-budget.controller'; +import { BudgetGateManager } from './managers/budget-gate.manager'; +import { BudgetResetManager } from './managers/budget-reset.manager'; +import { BudgetWarningManager } from './managers/budget-warning.manager'; +import { SpendTrackerManager } from './managers/spend-tracker.manager'; +import { UserCostBudgetRepository } from './repositories/user-cost-budget.repository'; +import { CostBudgetService } from './services/cost-budget.service'; + +@Module({ + controllers: [CostBudgetController], + providers: [ + CostBudgetService, + BudgetGateManager, + SpendTrackerManager, + BudgetWarningManager, + BudgetResetManager, + UserCostBudgetRepository, + ], + exports: [CostBudgetService, BudgetGateManager, SpendTrackerManager], +}) +export class CostBudgetModule {} diff --git a/apps/claw-routing-service/src/modules/cost-budget/dto/check-budget.dto.ts b/apps/claw-routing-service/src/modules/cost-budget/dto/check-budget.dto.ts new file mode 100644 index 00000000..852d46f0 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/dto/check-budget.dto.ts @@ -0,0 +1,11 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +import { z } from 'zod'; + +export const checkBudgetSchema = z.object({ + userId: z.string().min(1).max(200), + orgId: z.string().max(200).optional(), + estimatedCostUsd: z.number().nonnegative().max(10_000), +}); + +export type CheckBudgetDto = z.infer; diff --git a/apps/claw-routing-service/src/modules/cost-budget/dto/create-budget.dto.ts b/apps/claw-routing-service/src/modules/cost-budget/dto/create-budget.dto.ts new file mode 100644 index 00000000..2b19e744 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/dto/create-budget.dto.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +import { z } from 'zod'; + +export const createBudgetSchema = z.object({ + scope: z.enum(['USER', 'ORG']), + ownerId: z.string().min(1).max(200), + monthlyCapUsd: z.number().positive().max(1_000_000), + warnAtPercent: z.number().int().min(1).max(100).default(80), + overrideAllowed: z.boolean().default(false), +}); + +export type CreateBudgetDto = z.infer; diff --git a/apps/claw-routing-service/src/modules/cost-budget/dto/update-budget.dto.ts b/apps/claw-routing-service/src/modules/cost-budget/dto/update-budget.dto.ts new file mode 100644 index 00000000..21555947 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/dto/update-budget.dto.ts @@ -0,0 +1,11 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +import { z } from 'zod'; + +export const updateBudgetSchema = z.object({ + monthlyCapUsd: z.number().positive().max(1_000_000).optional(), + warnAtPercent: z.number().int().min(1).max(100).optional(), + overrideAllowed: z.boolean().optional(), +}); + +export type UpdateBudgetDto = z.infer; diff --git a/apps/claw-routing-service/src/modules/cost-budget/managers/budget-gate.manager.ts b/apps/claw-routing-service/src/modules/cost-budget/managers/budget-gate.manager.ts new file mode 100644 index 00000000..9cb5fcd5 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/managers/budget-gate.manager.ts @@ -0,0 +1,18 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) +// Pre-routing budget check. + +import { Injectable, Logger } from '@nestjs/common'; + +import type { BudgetCheckInput, BudgetCheckResult } from '../types/budget.types'; + +@Injectable() +export class BudgetGateManager { + private readonly logger = new Logger(BudgetGateManager.name); + + async check(_input: BudgetCheckInput): Promise { + this.logger.warn('BudgetGateManager.check: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R4 — BudgetGateManager.check not implemented; see docs/15-ai-context/routing-flagship-streams/05-r4-cost-budget-intelligence.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/cost-budget/managers/budget-reset.manager.ts b/apps/claw-routing-service/src/modules/cost-budget/managers/budget-reset.manager.ts new file mode 100644 index 00000000..55481619 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/managers/budget-reset.manager.ts @@ -0,0 +1,16 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) +// Monthly cron — resets every UserCostBudget where resetAt <= now. + +import { Injectable, Logger } from '@nestjs/common'; +import { Cron, CronExpression } from '@nestjs/schedule'; + +@Injectable() +export class BudgetResetManager { + private readonly logger = new Logger(BudgetResetManager.name); + + @Cron(CronExpression.EVERY_HOUR) + async tick(): Promise { + this.logger.warn('BudgetResetManager.tick: SCAFFOLD only'); + throw new Error('SCAFFOLD-R4 — BudgetResetManager.tick not implemented'); + } +} diff --git a/apps/claw-routing-service/src/modules/cost-budget/managers/budget-warning.manager.ts b/apps/claw-routing-service/src/modules/cost-budget/managers/budget-warning.manager.ts new file mode 100644 index 00000000..7f0808e1 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/managers/budget-warning.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class BudgetWarningManager { + private readonly logger = new Logger(BudgetWarningManager.name); + + async checkAndEmitWarning(_budgetId: string, _currentSpendUsd: number, _capUsd: number): Promise { + this.logger.warn('BudgetWarningManager.checkAndEmitWarning: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — BudgetWarningManager.checkAndEmitWarning not implemented'); + } +} diff --git a/apps/claw-routing-service/src/modules/cost-budget/managers/spend-tracker.manager.ts b/apps/claw-routing-service/src/modules/cost-budget/managers/spend-tracker.manager.ts new file mode 100644 index 00000000..8e1e3c68 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/managers/spend-tracker.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class SpendTrackerManager { + private readonly logger = new Logger(SpendTrackerManager.name); + + async incrementSpend(_userId: string, _orgId: string | undefined, _amountUsd: number): Promise { + this.logger.warn('SpendTrackerManager.incrementSpend: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — SpendTrackerManager.incrementSpend not implemented'); + } +} diff --git a/apps/claw-routing-service/src/modules/cost-budget/repositories/user-cost-budget.repository.ts b/apps/claw-routing-service/src/modules/cost-budget/repositories/user-cost-budget.repository.ts new file mode 100644 index 00000000..d0c06196 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/repositories/user-cost-budget.repository.ts @@ -0,0 +1,30 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) +// Pure data access — uses PrismaService once UserCostBudget model is added. +// Per repository rule: NO throws. Return null/empty until implemented. + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class UserCostBudgetRepository { + private readonly logger = new Logger(UserCostBudgetRepository.name); + + async findActiveForOwner(_scope: string, _ownerId: string): Promise { + this.logger.warn('UserCostBudgetRepository.findActiveForOwner: SCAFFOLD-R4 returns null'); + return null; + } + + async create(_input: unknown): Promise { + this.logger.warn('UserCostBudgetRepository.create: SCAFFOLD-R4 returns null'); + return null; + } + + async update(_id: string, _input: unknown): Promise { + this.logger.warn('UserCostBudgetRepository.update: SCAFFOLD-R4 returns null'); + return null; + } + + async findExpired(_now: Date): Promise { + this.logger.warn('UserCostBudgetRepository.findExpired: SCAFFOLD-R4 returns empty'); + return []; + } +} diff --git a/apps/claw-routing-service/src/modules/cost-budget/services/cost-budget.service.ts b/apps/claw-routing-service/src/modules/cost-budget/services/cost-budget.service.ts new file mode 100644 index 00000000..329abe39 --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/services/cost-budget.service.ts @@ -0,0 +1,37 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class CostBudgetService { + private readonly logger = new Logger(CostBudgetService.name); + + async getMine(): Promise { + this.logger.warn('CostBudgetService.getMine: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — CostBudgetService.getMine not implemented'); + } + async getMineForecast(): Promise { + this.logger.warn('CostBudgetService.getMineForecast: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — CostBudgetService.getMineForecast not implemented'); + } + async updateMine(_body: unknown): Promise { + this.logger.warn('CostBudgetService.updateMine: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — CostBudgetService.updateMine not implemented'); + } + async listAll(): Promise { + this.logger.warn('CostBudgetService.listAll: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — CostBudgetService.listAll not implemented'); + } + async create(_body: unknown): Promise { + this.logger.warn('CostBudgetService.create: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — CostBudgetService.create not implemented'); + } + async update(_id: string, _body: unknown): Promise { + this.logger.warn('CostBudgetService.update: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — CostBudgetService.update not implemented'); + } + async check(_body: unknown): Promise { + this.logger.warn('CostBudgetService.check: SCAFFOLD-R4 only'); + throw new Error('SCAFFOLD-R4 — CostBudgetService.check not implemented'); + } +} diff --git a/apps/claw-routing-service/src/modules/cost-budget/types/budget.types.ts b/apps/claw-routing-service/src/modules/cost-budget/types/budget.types.ts new file mode 100644 index 00000000..6c71032c --- /dev/null +++ b/apps/claw-routing-service/src/modules/cost-budget/types/budget.types.ts @@ -0,0 +1,18 @@ +// SCAFFOLD: stream R.4 (05-r4-cost-budget-intelligence) + +export type BudgetScope = 'USER' | 'ORG'; +export type BudgetStatus = 'OK' | 'WARN' | 'EXCEEDED'; + +export type BudgetCheckInput = { + userId: string; + orgId?: string; + estimatedCostUsd: number; +}; + +export type BudgetCheckResult = { + status: BudgetStatus; + remainingUsd: number; + percentOfCap: number; + overrideAllowed: boolean; + blockingScope?: BudgetScope; +}; diff --git a/apps/claw-routing-service/src/modules/intelligence/constants/intelligence.constants.ts b/apps/claw-routing-service/src/modules/intelligence/constants/intelligence.constants.ts new file mode 100644 index 00000000..0848214d --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/constants/intelligence.constants.ts @@ -0,0 +1,10 @@ +// SCAFFOLD: stream R.8 (09-r8-advanced-intelligence) + +export const PROMPT_LENGTH_HEADROOM_FACTOR = 1.2; +export const LATENCY_CIRCUIT_ROLLING_WINDOW_MS = 5 * 60 * 1000; +export const LATENCY_CIRCUIT_MIN_SAMPLES = 20; +export const MID_STREAM_KILL_GRACE_MS = 500; +export const CONSENSUS_MIN_MODELS = 3; +export const CONSENSUS_AGREEMENT_THRESHOLD = 0.7; +export const COST_QUALITY_SLIDER_DEFAULT = 0.5; +export const FINE_TUNE_BIAS_WEIGHT_MAX = 0.3; diff --git a/apps/claw-routing-service/src/modules/intelligence/intelligence.module.ts b/apps/claw-routing-service/src/modules/intelligence/intelligence.module.ts new file mode 100644 index 00000000..87218660 --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/intelligence.module.ts @@ -0,0 +1,40 @@ +// SCAFFOLD: stream R.8 (09-r8-advanced-intelligence) +// Umbrella module for 9 independent sub-features. NOT yet registered. + +import { Module } from '@nestjs/common'; + +import { ConsensusModeManager } from './managers/consensus-mode.manager'; +import { CostQualitySliderManager } from './managers/cost-quality-slider.manager'; +import { EmbeddingRouterManager } from './managers/embedding-router.manager'; +import { FineTunePreferenceManager } from './managers/fine-tune-preference.manager'; +import { LatencyCircuitBreakerManager } from './managers/latency-circuit-breaker.manager'; +import { MidStreamSwitcherManager } from './managers/mid-stream-switcher.manager'; +import { MultiIntentSplitterManager } from './managers/multi-intent-splitter.manager'; +import { PromptLengthGuardManager } from './managers/prompt-length-guard.manager'; +import { RegionRouterManager } from './managers/region-router.manager'; + +@Module({ + providers: [ + PromptLengthGuardManager, + LatencyCircuitBreakerManager, + MidStreamSwitcherManager, + FineTunePreferenceManager, + RegionRouterManager, + MultiIntentSplitterManager, + EmbeddingRouterManager, + ConsensusModeManager, + CostQualitySliderManager, + ], + exports: [ + PromptLengthGuardManager, + LatencyCircuitBreakerManager, + MidStreamSwitcherManager, + FineTunePreferenceManager, + RegionRouterManager, + MultiIntentSplitterManager, + EmbeddingRouterManager, + ConsensusModeManager, + CostQualitySliderManager, + ], +}) +export class IntelligenceModule {} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/consensus-mode.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/consensus-mode.manager.ts new file mode 100644 index 00000000..669e2698 --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/consensus-mode.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.8 (09-r8-advanced-intelligence) — fire 3 models, score agreement + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class ConsensusModeManager { + private readonly logger = new Logger(ConsensusModeManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('ConsensusModeManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.8 — ConsensusModeManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/cost-quality-slider.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/cost-quality-slider.manager.ts new file mode 100644 index 00000000..0d148022 --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/cost-quality-slider.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.9 (09-r8-advanced-intelligence) — apply user cost/quality slider to scoring weight + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class CostQualitySliderManager { + private readonly logger = new Logger(CostQualitySliderManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('CostQualitySliderManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.9 — CostQualitySliderManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/embedding-router.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/embedding-router.manager.ts new file mode 100644 index 00000000..c9934c3d --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/embedding-router.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.7 (09-r8-advanced-intelligence) — pick embedding-specific model + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class EmbeddingRouterManager { + private readonly logger = new Logger(EmbeddingRouterManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('EmbeddingRouterManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.7 — EmbeddingRouterManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/fine-tune-preference.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/fine-tune-preference.manager.ts new file mode 100644 index 00000000..136c49a6 --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/fine-tune-preference.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.4 (09-r8-advanced-intelligence) — boost user fine-tunes for their domain + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class FineTunePreferenceManager { + private readonly logger = new Logger(FineTunePreferenceManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('FineTunePreferenceManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.4 — FineTunePreferenceManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/latency-circuit-breaker.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/latency-circuit-breaker.manager.ts new file mode 100644 index 00000000..e663dbc8 --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/latency-circuit-breaker.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.2 (09-r8-advanced-intelligence) — open circuit when p95 latency exceeds threshold + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class LatencyCircuitBreakerManager { + private readonly logger = new Logger(LatencyCircuitBreakerManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('LatencyCircuitBreakerManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.2 — LatencyCircuitBreakerManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/mid-stream-switcher.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/mid-stream-switcher.manager.ts new file mode 100644 index 00000000..e56691ef --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/mid-stream-switcher.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.3 (09-r8-advanced-intelligence) — kill slow first-chunk + reroute mid-stream + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class MidStreamSwitcherManager { + private readonly logger = new Logger(MidStreamSwitcherManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('MidStreamSwitcherManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.3 — MidStreamSwitcherManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/multi-intent-splitter.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/multi-intent-splitter.manager.ts new file mode 100644 index 00000000..64fd4bce --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/multi-intent-splitter.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.6 (09-r8-advanced-intelligence) — split N intents into N parallel calls + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class MultiIntentSplitterManager { + private readonly logger = new Logger(MultiIntentSplitterManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('MultiIntentSplitterManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.6 — MultiIntentSplitterManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/prompt-length-guard.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/prompt-length-guard.manager.ts new file mode 100644 index 00000000..dc055a2d --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/prompt-length-guard.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.1 (09-r8-advanced-intelligence) — filter candidates with insufficient context window + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class PromptLengthGuardManager { + private readonly logger = new Logger(PromptLengthGuardManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('PromptLengthGuardManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.1 — PromptLengthGuardManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/managers/region-router.manager.ts b/apps/claw-routing-service/src/modules/intelligence/managers/region-router.manager.ts new file mode 100644 index 00000000..c398ab8a --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/managers/region-router.manager.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.8.9.5 (09-r8-advanced-intelligence) — pick regional endpoint + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class RegionRouterManager { + private readonly logger = new Logger(RegionRouterManager.name); + + apply(_input: unknown): unknown { + this.logger.warn('RegionRouterManager.apply: SCAFFOLD only'); + throw new Error('SCAFFOLD-R8.9.5 — RegionRouterManager.apply not implemented; see docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md'); + } +} diff --git a/apps/claw-routing-service/src/modules/intelligence/types/intelligence.types.ts b/apps/claw-routing-service/src/modules/intelligence/types/intelligence.types.ts new file mode 100644 index 00000000..243695e3 --- /dev/null +++ b/apps/claw-routing-service/src/modules/intelligence/types/intelligence.types.ts @@ -0,0 +1,23 @@ +// SCAFFOLD: stream R.8 (09-r8-advanced-intelligence) + +export type CostQualityWeight = { + qualityWeight: number; + costWeight: number; +}; + +export type CircuitBreakerTrigger = 'FAILURE_RATE' | 'LATENCY_P95'; + +export type RegionPreferenceScope = 'USER' | 'ORG' | 'GLOBAL'; + +export type MultiIntent = { + intent: string; + confidence: number; + routedProvider?: string; + routedModel?: string; +}; + +export type ConsensusResult = { + models: { provider: string; model: string; answer: string; confidence: number }[]; + agreementScore: number; + winnerIndex: number; +}; diff --git a/apps/claw-routing-service/src/modules/language-detection/constants/language-codes.constants.ts b/apps/claw-routing-service/src/modules/language-detection/constants/language-codes.constants.ts new file mode 100644 index 00000000..c4e0fc3d --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/constants/language-codes.constants.ts @@ -0,0 +1,8 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +export const SUPPORTED_LOCALES = ['en', 'ar', 'de', 'es', 'fr', 'it', 'pt', 'ru'] as const; +export const SUPPORTED_LOCALES_SET = new Set(SUPPORTED_LOCALES); + +export const LANGUAGE_CONFIDENCE_THRESHOLD = 0.6; +export const LANGUAGE_FALLBACK = 'en'; +export const LANGUAGE_DETECTION_MIN_CHARS = 3; diff --git a/apps/claw-routing-service/src/modules/language-detection/constants/language-rtl.constants.ts b/apps/claw-routing-service/src/modules/language-detection/constants/language-rtl.constants.ts new file mode 100644 index 00000000..1dd590df --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/constants/language-rtl.constants.ts @@ -0,0 +1,7 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +export const RTL_LANGUAGES = new Set(['ar', 'he', 'fa', 'ur', 'ckb', 'sd', 'yi']); + +export function isRtlLanguage(lang: string): boolean { + return RTL_LANGUAGES.has(lang.toLowerCase()); +} diff --git a/apps/claw-routing-service/src/modules/language-detection/controllers/language-detection.controller.ts b/apps/claw-routing-service/src/modules/language-detection/controllers/language-detection.controller.ts new file mode 100644 index 00000000..f30a9bdd --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/controllers/language-detection.controller.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +import { Body, Controller, Post } from '@nestjs/common'; + +import { LanguageDetectionService } from '../services/language-detection.service'; + +@Controller('routing/detect-language') +export class LanguageDetectionController { + constructor(private readonly service: LanguageDetectionService) {} + + @Post() + async detect(@Body() body: unknown): Promise { + return this.service.detect(body); + } +} diff --git a/apps/claw-routing-service/src/modules/language-detection/dto/detect-language.dto.ts b/apps/claw-routing-service/src/modules/language-detection/dto/detect-language.dto.ts new file mode 100644 index 00000000..23ed3018 --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/dto/detect-language.dto.ts @@ -0,0 +1,9 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +import { z } from 'zod'; + +export const detectLanguageSchema = z.object({ + message: z.string().min(0).max(200_000), +}); + +export type DetectLanguageDto = z.infer; diff --git a/apps/claw-routing-service/src/modules/language-detection/language-detection.module.ts b/apps/claw-routing-service/src/modules/language-detection/language-detection.module.ts new file mode 100644 index 00000000..817f9d15 --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/language-detection.module.ts @@ -0,0 +1,16 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) +// NEW module — NOT yet registered. + +import { Module } from '@nestjs/common'; + +import { LanguageDetectionController } from './controllers/language-detection.controller'; +import { CodeMixedDetectorManager } from './managers/code-mixed-detector.manager'; +import { LanguageClassifierManager } from './managers/language-classifier.manager'; +import { LanguageDetectionService } from './services/language-detection.service'; + +@Module({ + controllers: [LanguageDetectionController], + providers: [LanguageDetectionService, LanguageClassifierManager, CodeMixedDetectorManager], + exports: [LanguageDetectionService], +}) +export class LanguageDetectionModule {} diff --git a/apps/claw-routing-service/src/modules/language-detection/managers/code-mixed-detector.manager.ts b/apps/claw-routing-service/src/modules/language-detection/managers/code-mixed-detector.manager.ts new file mode 100644 index 00000000..9684b1e2 --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/managers/code-mixed-detector.manager.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class CodeMixedDetectorManager { + private readonly logger = new Logger(CodeMixedDetectorManager.name); + + detect(_message: string, _primaryLang: string): { isCodeMixed: boolean; secondaryLang?: string } { + this.logger.warn('CodeMixedDetectorManager.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R7 — CodeMixedDetectorManager.detect not implemented; see docs/15-ai-context/routing-flagship-streams/08-r7-i18n-non-english.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/language-detection/managers/language-classifier.manager.ts b/apps/claw-routing-service/src/modules/language-detection/managers/language-classifier.manager.ts new file mode 100644 index 00000000..fda68d23 --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/managers/language-classifier.manager.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +import { Injectable, Logger } from '@nestjs/common'; + +import type { LanguageDetectionResult } from '../types/language-detection.types'; + +@Injectable() +export class LanguageClassifierManager { + private readonly logger = new Logger(LanguageClassifierManager.name); + + classify(_message: string): LanguageDetectionResult { + this.logger.warn('LanguageClassifierManager.classify: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R7 — LanguageClassifierManager.classify not implemented; see docs/15-ai-context/routing-flagship-streams/08-r7-i18n-non-english.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/language-detection/services/language-detection.service.ts b/apps/claw-routing-service/src/modules/language-detection/services/language-detection.service.ts new file mode 100644 index 00000000..6422e1f9 --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/services/language-detection.service.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class LanguageDetectionService { + private readonly logger = new Logger(LanguageDetectionService.name); + + async detect(_body: unknown): Promise { + this.logger.warn('LanguageDetectionService.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R7 — LanguageDetectionService.detect not implemented; see docs/15-ai-context/routing-flagship-streams/08-r7-i18n-non-english.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/language-detection/types/language-detection.types.ts b/apps/claw-routing-service/src/modules/language-detection/types/language-detection.types.ts new file mode 100644 index 00000000..36e7b84e --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/types/language-detection.types.ts @@ -0,0 +1,12 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) + +export type LanguageDetectionResult = { + detectedLanguage: string; // ISO-639-1, e.g. 'en', 'ar', 'es' + languageConfidence: number; // 0..1 + isCodeMixed: boolean; + secondaryLanguage?: string; + secondaryConfidence?: number; + isRtl: boolean; +}; + +export type LanguageStrength = Record; // { en: 1.0, ar: 0.92, ... } diff --git a/apps/claw-routing-service/src/modules/language-detection/utilities/language-strength-resolver.utility.ts b/apps/claw-routing-service/src/modules/language-detection/utilities/language-strength-resolver.utility.ts new file mode 100644 index 00000000..f923254e --- /dev/null +++ b/apps/claw-routing-service/src/modules/language-detection/utilities/language-strength-resolver.utility.ts @@ -0,0 +1,14 @@ +// SCAFFOLD: stream R.7 (08-r7-i18n-non-english) +// Reads RouterModelRegistry.languageStrengthJson and applies a score boost +// to candidates with high strength in the detected language. + +import type { LanguageStrength } from '../types/language-detection.types'; + +export function resolveLanguageStrength( + _registry: { languageStrengthJson?: LanguageStrength | null }, + _detectedLanguage: string, +): number { + throw new Error( + 'SCAFFOLD-R7 — resolveLanguageStrength not implemented; see docs/15-ai-context/routing-flagship-streams/08-r7-i18n-non-english.md', + ); +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/audio-mime.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/audio-mime.constants.ts new file mode 100644 index 00000000..56bd354d --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/audio-mime.constants.ts @@ -0,0 +1,14 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const AUDIO_MIME_TYPES = [ + 'audio/mpeg', + 'audio/mp4', + 'audio/x-m4a', + 'audio/wav', + 'audio/x-wav', + 'audio/ogg', + 'audio/flac', + 'audio/webm', +]; + +export const AUDIO_EXTENSIONS = ['.mp3', '.m4a', '.wav', '.ogg', '.flac', '.opus']; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/embedding-keywords.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/embedding-keywords.constants.ts new file mode 100644 index 00000000..29f7104b --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/embedding-keywords.constants.ts @@ -0,0 +1,14 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const EMBEDDING_VERBS = [ + 'embed', + 'vectorize', + 'create embedding', + 'generate embedding', + 'store in vector db', + 'retrieve similar', + 'find similar', + 'semantic search', + 'nearest neighbor', + 'index this', +]; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/image-mime.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/image-mime.constants.ts new file mode 100644 index 00000000..3e87c472 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/image-mime.constants.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const IMAGE_MIME_TYPES = [ + 'image/png', + 'image/jpeg', + 'image/gif', + 'image/webp', + 'image/bmp', + 'image/tiff', + 'image/svg+xml', +]; + +export const IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.tif', '.tiff', '.svg']; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/pdf-mime.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/pdf-mime.constants.ts new file mode 100644 index 00000000..56aeb5d9 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/pdf-mime.constants.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const PDF_MIME_TYPES = ['application/pdf']; +export const PDF_EXTENSIONS = ['.pdf']; +export const PDF_SUMMARIZE_VERBS = [ + 'summarize', + 'summary', + 'explain', + 'extract', + 'q&a', + 'tl;dr', + 'tldr', + 'key points', + 'main ideas', + 'what does this pdf', + 'read this pdf', +]; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/spreadsheet-mime.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/spreadsheet-mime.constants.ts new file mode 100644 index 00000000..1cb29a0b --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/spreadsheet-mime.constants.ts @@ -0,0 +1,10 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const SPREADSHEET_MIME_TYPES = [ + 'application/vnd.ms-excel', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.oasis.opendocument.spreadsheet', + 'text/csv', +]; + +export const SPREADSHEET_EXTENSIONS = ['.xlsx', '.xls', '.csv', '.ods']; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/tool-calling-keywords.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/tool-calling-keywords.constants.ts new file mode 100644 index 00000000..157360ba --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/tool-calling-keywords.constants.ts @@ -0,0 +1,20 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const TOOL_CALLING_VERBS = [ + 'book', + 'schedule', + 'send email', + 'send a message', + 'call function', + 'invoke', + 'run command', + 'execute', + 'create ticket', + 'open pr', + 'open pull request', + 'create issue', + 'post to slack', + 'add to calendar', + 'set reminder', + 'create meeting', +]; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/video-mime.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/video-mime.constants.ts new file mode 100644 index 00000000..a28be5a8 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/video-mime.constants.ts @@ -0,0 +1,13 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const VIDEO_MIME_TYPES = [ + 'video/mp4', + 'video/quicktime', + 'video/webm', + 'video/x-msvideo', + 'video/x-matroska', + 'video/mpeg', + 'video/ogg', +]; + +export const VIDEO_EXTENSIONS = ['.mp4', '.mov', '.webm', '.avi', '.mkv', '.mpeg', '.mpg', '.ogv']; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/web-url.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/web-url.constants.ts new file mode 100644 index 00000000..7d3ea11c --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/web-url.constants.ts @@ -0,0 +1,16 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const HTTP_URL_REGEX = /https?:\/\/[^\s<>"']+/giu; +export const UNSAFE_URL_SCHEMES = ['javascript:', 'data:', 'file:', 'vbscript:']; + +export const WEB_SUMMARIZE_VERBS = [ + 'summarize', + 'summary', + 'read', + 'fetch', + 'scrape', + 'what does', + 'explain this article', + 'tldr', + 'tl;dr', +]; diff --git a/apps/claw-routing-service/src/modules/modality-detection/constants/youtube-url.constants.ts b/apps/claw-routing-service/src/modules/modality-detection/constants/youtube-url.constants.ts new file mode 100644 index 00000000..5f2fff9f --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/constants/youtube-url.constants.ts @@ -0,0 +1,18 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +export const YOUTUBE_HOST_REGEX = /^https?:\/\/(www\.)?(youtube\.com|youtu\.be|m\.youtube\.com)\//iu; +export const YOUTUBE_WATCH_REGEX = /https?:\/\/(?:www\.|m\.)?youtube\.com\/watch\?v=[\w-]+/giu; +export const YOUTUBE_SHORT_REGEX = /https?:\/\/youtu\.be\/[\w-]+/giu; +export const YOUTUBE_SHORTS_REGEX = /https?:\/\/(?:www\.)?youtube\.com\/shorts\/[\w-]+/giu; +export const YOUTUBE_PLAYLIST_REGEX = /https?:\/\/(?:www\.)?youtube\.com\/playlist\?list=[\w-]+/giu; + +export const YOUTUBE_SUMMARIZE_VERBS = [ + 'summarize', + 'summary', + 'tl;dr', + 'transcript', + 'what does this video say', + 'explain this video', + 'key points', + 'highlights', +]; diff --git a/apps/claw-routing-service/src/modules/modality-detection/controllers/modality-detection.controller.ts b/apps/claw-routing-service/src/modules/modality-detection/controllers/modality-detection.controller.ts new file mode 100644 index 00000000..c7ca9a8f --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/controllers/modality-detection.controller.ts @@ -0,0 +1,18 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { Body, Controller, Post } from '@nestjs/common'; + +import { detectModalitySchema, type DetectModalityDto } from '../dto/detect-modality.dto'; +import { ModalityDetectionService } from '../services/modality-detection.service'; +import type { ModalityDetectionResult } from '../types/modality-detection.types'; + +@Controller('routing/detect-modality') +export class ModalityDetectionController { + constructor(private readonly service: ModalityDetectionService) {} + + @Post() + async detect(@Body() body: unknown): Promise { + const parsed: DetectModalityDto = detectModalitySchema.parse(body); + return this.service.detect(parsed); + } +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/dto/detect-modality.dto.ts b/apps/claw-routing-service/src/modules/modality-detection/dto/detect-modality.dto.ts new file mode 100644 index 00000000..9149348b --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/dto/detect-modality.dto.ts @@ -0,0 +1,18 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { z } from 'zod'; + +const attachmentMetaSchema = z.object({ + fileId: z.string().min(1).max(200), + filename: z.string().min(1).max(500), + mimeType: z.string().min(1).max(200), + sizeBytes: z.number().int().nonnegative().max(5_000_000_000), +}); + +export const detectModalitySchema = z.object({ + message: z.string().min(0).max(200_000), + attachments: z.array(attachmentMetaSchema).max(50).default([]), + clientStreamingExpected: z.boolean().optional(), +}); + +export type DetectModalityDto = z.infer; diff --git a/apps/claw-routing-service/src/modules/modality-detection/managers/attachment-intent.manager.ts b/apps/claw-routing-service/src/modules/modality-detection/managers/attachment-intent.manager.ts new file mode 100644 index 00000000..c7844841 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/managers/attachment-intent.manager.ts @@ -0,0 +1,20 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { Injectable, Logger } from '@nestjs/common'; + +import type { + AttachmentIntentResult, + AttachmentMeta, +} from '../types/modality-detection.types'; + +@Injectable() +export class AttachmentIntentManager { + private readonly logger = new Logger(AttachmentIntentManager.name); + + detect(_message: string, _attachments: AttachmentMeta[]): AttachmentIntentResult { + this.logger.warn('AttachmentIntentManager.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R2 — AttachmentIntentManager.detect not implemented; see docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/managers/embedding-intent.manager.ts b/apps/claw-routing-service/src/modules/modality-detection/managers/embedding-intent.manager.ts new file mode 100644 index 00000000..902ed55c --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/managers/embedding-intent.manager.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { Injectable, Logger } from '@nestjs/common'; + +import type { EmbeddingIntentResult } from '../types/modality-detection.types'; + +@Injectable() +export class EmbeddingIntentManager { + private readonly logger = new Logger(EmbeddingIntentManager.name); + + detect(_message: string): EmbeddingIntentResult { + this.logger.warn('EmbeddingIntentManager.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R2 — EmbeddingIntentManager.detect not implemented; see docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/managers/streaming-intent.manager.ts b/apps/claw-routing-service/src/modules/modality-detection/managers/streaming-intent.manager.ts new file mode 100644 index 00000000..a2ecccf3 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/managers/streaming-intent.manager.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { Injectable, Logger } from '@nestjs/common'; + +import type { StreamingIntentResult } from '../types/modality-detection.types'; + +@Injectable() +export class StreamingIntentManager { + private readonly logger = new Logger(StreamingIntentManager.name); + + detect(_clientStreamingExpected: boolean | undefined): StreamingIntentResult { + this.logger.warn('StreamingIntentManager.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R2 — StreamingIntentManager.detect not implemented; see docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/managers/tool-calling-intent.manager.ts b/apps/claw-routing-service/src/modules/modality-detection/managers/tool-calling-intent.manager.ts new file mode 100644 index 00000000..545bc5a9 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/managers/tool-calling-intent.manager.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { Injectable, Logger } from '@nestjs/common'; + +import type { ToolCallingIntentResult } from '../types/modality-detection.types'; + +@Injectable() +export class ToolCallingIntentManager { + private readonly logger = new Logger(ToolCallingIntentManager.name); + + detect(_message: string): ToolCallingIntentResult { + this.logger.warn('ToolCallingIntentManager.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R2 — ToolCallingIntentManager.detect not implemented; see docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/managers/url-intent.manager.ts b/apps/claw-routing-service/src/modules/modality-detection/managers/url-intent.manager.ts new file mode 100644 index 00000000..ea4f2cb9 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/managers/url-intent.manager.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { Injectable, Logger } from '@nestjs/common'; + +import type { UrlIntentResult } from '../types/modality-detection.types'; + +@Injectable() +export class UrlIntentManager { + private readonly logger = new Logger(UrlIntentManager.name); + + detect(_message: string): UrlIntentResult { + this.logger.warn('UrlIntentManager.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R2 — UrlIntentManager.detect not implemented; see docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/modality-detection.module.ts b/apps/claw-routing-service/src/modules/modality-detection/modality-detection.module.ts new file mode 100644 index 00000000..d674f068 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/modality-detection.module.ts @@ -0,0 +1,26 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) +// NEW module — NOT yet imported by app.module.ts. Register only after activation. + +import { Module } from '@nestjs/common'; + +import { ModalityDetectionController } from './controllers/modality-detection.controller'; +import { AttachmentIntentManager } from './managers/attachment-intent.manager'; +import { EmbeddingIntentManager } from './managers/embedding-intent.manager'; +import { StreamingIntentManager } from './managers/streaming-intent.manager'; +import { ToolCallingIntentManager } from './managers/tool-calling-intent.manager'; +import { UrlIntentManager } from './managers/url-intent.manager'; +import { ModalityDetectionService } from './services/modality-detection.service'; + +@Module({ + controllers: [ModalityDetectionController], + providers: [ + ModalityDetectionService, + UrlIntentManager, + AttachmentIntentManager, + ToolCallingIntentManager, + StreamingIntentManager, + EmbeddingIntentManager, + ], + exports: [ModalityDetectionService], +}) +export class ModalityDetectionModule {} diff --git a/apps/claw-routing-service/src/modules/modality-detection/services/modality-detection.service.ts b/apps/claw-routing-service/src/modules/modality-detection/services/modality-detection.service.ts new file mode 100644 index 00000000..3963f97e --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/services/modality-detection.service.ts @@ -0,0 +1,39 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { Injectable, Logger } from '@nestjs/common'; + +import { AttachmentIntentManager } from '../managers/attachment-intent.manager'; +import { EmbeddingIntentManager } from '../managers/embedding-intent.manager'; +import { StreamingIntentManager } from '../managers/streaming-intent.manager'; +import { ToolCallingIntentManager } from '../managers/tool-calling-intent.manager'; +import { UrlIntentManager } from '../managers/url-intent.manager'; +import type { DetectModalityDto } from '../dto/detect-modality.dto'; +import type { ModalityDetectionResult } from '../types/modality-detection.types'; + +@Injectable() +export class ModalityDetectionService { + private readonly logger = new Logger(ModalityDetectionService.name); + + constructor( + private readonly urlIntent: UrlIntentManager, + private readonly attachmentIntent: AttachmentIntentManager, + private readonly toolCallingIntent: ToolCallingIntentManager, + private readonly streamingIntent: StreamingIntentManager, + private readonly embeddingIntent: EmbeddingIntentManager, + ) {} + + async detect(_input: DetectModalityDto): Promise { + // Reference each injected manager so the unused-private-check passes + // until the real implementation wires them up. + void this.urlIntent; + void this.attachmentIntent; + void this.toolCallingIntent; + void this.streamingIntent; + void this.embeddingIntent; + + this.logger.warn('ModalityDetectionService.detect: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R2 — ModalityDetectionService.detect not implemented; see docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/modality-detection/types/modality-detection.types.ts b/apps/claw-routing-service/src/modules/modality-detection/types/modality-detection.types.ts new file mode 100644 index 00000000..79deac57 --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/types/modality-detection.types.ts @@ -0,0 +1,52 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import type { ModalityKind, WorkflowKind } from '../../../generated/prisma'; + +export type AttachmentMeta = { + fileId: string; + filename: string; + mimeType: string; + sizeBytes: number; +}; + +export type DetectedUrl = { + url: string; + kind: 'youtube' | 'web'; +}; + +export type ModalityDetectionResult = { + detectedModalities: ModalityKind[]; + workflowHint?: WorkflowKind; + workflowConfidence: number; + reasonTags: string[]; + fileMetadata: AttachmentMeta[]; + urlMetadata: DetectedUrl[]; + fallback?: { reason: string; tag: string }; +}; + +// Per-manager partial result types — extracted to avoid inline string-literal +// unions inside Pick<>, which the routing-service eslint config bans in +// logic files. +export type UrlIntentResult = Pick< + ModalityDetectionResult, + 'detectedModalities' | 'urlMetadata' | 'workflowHint' | 'reasonTags' +>; + +export type AttachmentIntentResult = Pick< + ModalityDetectionResult, + 'detectedModalities' | 'fileMetadata' | 'workflowHint' | 'reasonTags' +>; + +export type ToolCallingIntentResult = { + needsToolCalling: boolean; + reasonTag?: string; +}; + +export type StreamingIntentResult = { + needsStreaming: boolean; +}; + +export type EmbeddingIntentResult = { + isEmbeddingTask: boolean; + reasonTag?: string; +}; diff --git a/apps/claw-routing-service/src/modules/modality-detection/utilities/extension-detector.utility.ts b/apps/claw-routing-service/src/modules/modality-detection/utilities/extension-detector.utility.ts new file mode 100644 index 00000000..862dd9be --- /dev/null +++ b/apps/claw-routing-service/src/modules/modality-detection/utilities/extension-detector.utility.ts @@ -0,0 +1,22 @@ +// SCAFFOLD: stream R.2 (03-r2-multimodal-intent-detection) + +import { extname } from 'node:path'; + +export function getExtension(filename: string): string { + return extname(filename).toLowerCase(); +} + +export function matchesAny(filename: string, extensions: readonly string[]): boolean { + const ext = getExtension(filename); + return extensions.includes(ext); +} + +export function mimeOrExtensionMatches( + mimeType: string, + filename: string, + mimeTypes: readonly string[], + extensions: readonly string[], +): boolean { + if (mimeTypes.includes(mimeType.toLowerCase())) return true; + return matchesAny(filename, extensions); +} diff --git a/apps/claw-routing-service/src/modules/playground/controllers/playground.controller.ts b/apps/claw-routing-service/src/modules/playground/controllers/playground.controller.ts new file mode 100644 index 00000000..50a4dc9b --- /dev/null +++ b/apps/claw-routing-service/src/modules/playground/controllers/playground.controller.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.5 (06-r5-operator-playground) + +import { Body, Controller, Post } from '@nestjs/common'; + +import { PlaygroundService } from '../services/playground.service'; + +@Controller('routing/playground') +export class PlaygroundController { + constructor(private readonly service: PlaygroundService) {} + + @Post('evaluate') + async evaluate(@Body() body: unknown): Promise { + return this.service.evaluate(body); + } +} diff --git a/apps/claw-routing-service/src/modules/playground/dto/playground-evaluate.dto.ts b/apps/claw-routing-service/src/modules/playground/dto/playground-evaluate.dto.ts new file mode 100644 index 00000000..4911fc79 --- /dev/null +++ b/apps/claw-routing-service/src/modules/playground/dto/playground-evaluate.dto.ts @@ -0,0 +1,25 @@ +// SCAFFOLD: stream R.5 (06-r5-operator-playground) + +import { z } from 'zod'; + +export const playgroundEvaluateSchema = z.object({ + message: z.string().min(1).max(200_000), + attachments: z + .array( + z.object({ + fileId: z.string().min(1).max(200), + filename: z.string().min(1).max(500), + mimeType: z.string().min(1).max(200), + sizeBytes: z.number().int().nonnegative().max(5_000_000_000), + }), + ) + .max(50) + .default([]), + userMode: z + .enum(['AUTO', 'MANUAL_MODEL', 'LOCAL_ONLY', 'PRIVACY_FIRST', 'LOW_LATENCY', 'HIGH_REASONING', 'COST_SAVER']) + .default('AUTO'), + compareWithV2: z.boolean().default(true), + compareWithOllamaRouter: z.boolean().default(true), +}); + +export type PlaygroundEvaluateDto = z.infer; diff --git a/apps/claw-routing-service/src/modules/playground/playground.module.ts b/apps/claw-routing-service/src/modules/playground/playground.module.ts new file mode 100644 index 00000000..773a624a --- /dev/null +++ b/apps/claw-routing-service/src/modules/playground/playground.module.ts @@ -0,0 +1,14 @@ +// SCAFFOLD: stream R.5 (06-r5-operator-playground) +// NEW module — NOT yet registered. + +import { Module } from '@nestjs/common'; + +import { PlaygroundController } from './controllers/playground.controller'; +import { PlaygroundService } from './services/playground.service'; + +@Module({ + controllers: [PlaygroundController], + providers: [PlaygroundService], + exports: [PlaygroundService], +}) +export class PlaygroundModule {} diff --git a/apps/claw-routing-service/src/modules/playground/services/playground.service.ts b/apps/claw-routing-service/src/modules/playground/services/playground.service.ts new file mode 100644 index 00000000..56c189b4 --- /dev/null +++ b/apps/claw-routing-service/src/modules/playground/services/playground.service.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.5 (06-r5-operator-playground) + +import { Injectable, Logger } from '@nestjs/common'; + +@Injectable() +export class PlaygroundService { + private readonly logger = new Logger(PlaygroundService.name); + + async evaluate(_body: unknown): Promise { + this.logger.warn('PlaygroundService.evaluate: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R5 — PlaygroundService.evaluate not implemented; see docs/15-ai-context/routing-flagship-streams/06-r5-operator-playground.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/playground/types/playground.types.ts b/apps/claw-routing-service/src/modules/playground/types/playground.types.ts new file mode 100644 index 00000000..9665caa1 --- /dev/null +++ b/apps/claw-routing-service/src/modules/playground/types/playground.types.ts @@ -0,0 +1,25 @@ +// SCAFFOLD: stream R.5 (06-r5-operator-playground) + +export type ScoreBreakdown = { + provider: string; + model: string; + scores: { + quality: number; + cost: number; + latency: number; + privacy: number; + familiarity: number; + }; + totalScore: number; + wasChosen: boolean; +}; + +export type PlaygroundEvaluationResult = { + v1Decision: unknown; + v2Decision?: unknown; + ollamaRouterDecision?: unknown; + scoreBreakdown: ScoreBreakdown[]; + candidateList: unknown[]; + modalityResult?: unknown; + workflowChoice?: unknown; +}; diff --git a/apps/claw-routing-service/src/modules/route-evaluator/constants/canary.constants.ts b/apps/claw-routing-service/src/modules/route-evaluator/constants/canary.constants.ts new file mode 100644 index 00000000..f941c56d --- /dev/null +++ b/apps/claw-routing-service/src/modules/route-evaluator/constants/canary.constants.ts @@ -0,0 +1,16 @@ +// SCAFFOLD: stream R.1/R.3 (02-r1r3-v2-evaluator-canary) + +export const CANARY_HASH_SALT = 'routing-v2-canary'; +export const CANARY_ROLLING_WINDOW_REQUESTS = 100; +export const CANARY_ROLLING_WINDOW_MS = 5 * 60 * 1000; +export const CANARY_WARMUP_GRACE_REQUESTS = 50; + +export const CANARY_FALLBACK_REASON_INVALID_SCHEMA = 'invalid_schema'; +export const CANARY_FALLBACK_REASON_SAFETY = 'safety'; +export const CANARY_FALLBACK_REASON_GUARDRAIL_BREACH = 'guardrail_breach'; +export const CANARY_FALLBACK_REASON_V2_THREW = 'v2_threw'; + +export const CANARY_GUARDRAIL_METRIC_REGRESSION = 'regression'; +export const CANARY_GUARDRAIL_METRIC_COST_INCREASE = 'cost_increase'; +export const CANARY_GUARDRAIL_METRIC_CONFIDENCE_DROP = 'confidence_drop'; +export const CANARY_GUARDRAIL_METRIC_FAILURE_RATE = 'failure_rate'; diff --git a/apps/claw-routing-service/src/modules/route-evaluator/managers/canary-bucket.manager.ts b/apps/claw-routing-service/src/modules/route-evaluator/managers/canary-bucket.manager.ts new file mode 100644 index 00000000..f95aa516 --- /dev/null +++ b/apps/claw-routing-service/src/modules/route-evaluator/managers/canary-bucket.manager.ts @@ -0,0 +1,20 @@ +// SCAFFOLD: stream R.1/R.3 (02-r1r3-v2-evaluator-canary) — replace with real implementation before activation. +// See docs/15-ai-context/routing-flagship-streams/02-r1r3-v2-evaluator-canary.md +// +// NOT registered in route-evaluator.module.ts — discoverable but inert. + +import { Injectable, Logger } from '@nestjs/common'; + +import type { CanaryBucketInput, CanaryBucketResult } from '../types/canary.types'; + +@Injectable() +export class CanaryBucketManager { + private readonly logger = new Logger(CanaryBucketManager.name); + + isV2Bucket(_input: CanaryBucketInput): CanaryBucketResult { + this.logger.warn('CanaryBucketManager.isV2Bucket: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R1/R3 — CanaryBucketManager.isV2Bucket not implemented; see docs/15-ai-context/routing-flagship-streams/02-r1r3-v2-evaluator-canary.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/route-evaluator/types/canary.types.ts b/apps/claw-routing-service/src/modules/route-evaluator/types/canary.types.ts new file mode 100644 index 00000000..83fe1ea8 --- /dev/null +++ b/apps/claw-routing-service/src/modules/route-evaluator/types/canary.types.ts @@ -0,0 +1,25 @@ +// SCAFFOLD: stream R.1/R.3 (02-r1r3-v2-evaluator-canary) + +export type CanaryBucketInput = { + userId: string; + orgId?: string; + canaryPercent: number; + allowlist?: string[]; + denylist?: string[]; +}; + +export type CanaryBucketResult = { + isV2: boolean; + bucketReason: 'percent' | 'allowlist' | 'denylist' | 'rollback_switch_active' | 'flag_off'; +}; + +export type DecisionComparisonInput = { + v1Decision: { provider: string; model: string; confidence: number; costClass: string }; + v2Decision: { provider: string; model: string; confidence: number; costClass: string }; +}; + +export type DecisionComparisonResult = { + sameDecision: boolean; + confidenceDelta: number; + costClassChange: 'cheaper' | 'same' | 'more_expensive'; +}; diff --git a/apps/claw-routing-service/src/modules/route-evaluator/utilities/canary-hash.utility.ts b/apps/claw-routing-service/src/modules/route-evaluator/utilities/canary-hash.utility.ts new file mode 100644 index 00000000..d5c48203 --- /dev/null +++ b/apps/claw-routing-service/src/modules/route-evaluator/utilities/canary-hash.utility.ts @@ -0,0 +1,22 @@ +// SCAFFOLD: stream R.1/R.3 (02-r1r3-v2-evaluator-canary) +// Pure stable bucketing — same input always yields same bucket. + +import { createHash } from 'node:crypto'; + +import { CANARY_HASH_SALT } from '../constants/canary.constants'; + +export function hashToBucket(userId: string, orgId: string | undefined): number { + const input = `${userId}|${orgId ?? ''}|${CANARY_HASH_SALT}`; + const hex = createHash('sha256').update(input).digest('hex').slice(0, 8); + return Number.parseInt(hex, 16) % 100; +} + +export function isInCanaryBucket( + userId: string, + orgId: string | undefined, + canaryPercent: number, +): boolean { + if (canaryPercent <= 0) return false; + if (canaryPercent >= 100) return true; + return hashToBucket(userId, orgId) < canaryPercent; +} diff --git a/apps/claw-routing-service/src/modules/route-evaluator/utilities/decision-comparator.utility.ts b/apps/claw-routing-service/src/modules/route-evaluator/utilities/decision-comparator.utility.ts new file mode 100644 index 00000000..a6b55ff3 --- /dev/null +++ b/apps/claw-routing-service/src/modules/route-evaluator/utilities/decision-comparator.utility.ts @@ -0,0 +1,9 @@ +// SCAFFOLD: stream R.1/R.3 (02-r1r3-v2-evaluator-canary) + +import type { DecisionComparisonInput, DecisionComparisonResult } from '../types/canary.types'; + +export function compareDecisions(_input: DecisionComparisonInput): DecisionComparisonResult { + throw new Error( + 'SCAFFOLD-R1/R3 — compareDecisions not implemented; see docs/15-ai-context/routing-flagship-streams/02-r1r3-v2-evaluator-canary.md', + ); +} diff --git a/apps/claw-routing-service/src/modules/routing/constants/learned-bias.constants.ts b/apps/claw-routing-service/src/modules/routing/constants/learned-bias.constants.ts new file mode 100644 index 00000000..8e3f4d80 --- /dev/null +++ b/apps/claw-routing-service/src/modules/routing/constants/learned-bias.constants.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.1 (01-r1-learning-loop) + +export const LEARNED_BIAS_REASON_THUMBS_UP_HISTORY = 'learned_bias_thumbs_up_history'; +export const LEARNED_BIAS_REASON_THUMBS_DOWN_HISTORY = 'learned_bias_thumbs_down_history'; +export const LEARNED_BIAS_REASON_TOPIC_PROFILE = 'learned_bias_topic_profile'; +export const LEARNED_BIAS_REASON_SAMPLE_TOO_SMALL = 'learned_bias_sample_too_small'; +export const LEARNED_BIAS_REASON_NO_DATA = 'learned_bias_no_data'; +export const LEARNED_BIAS_REASON_PRIVACY_OVERRIDE = 'learned_bias_privacy_override_beat_bias'; +export const LEARNED_BIAS_REASON_CAPABILITY_OVERRIDE = 'learned_bias_capability_override_beat_bias'; +export const LEARNED_BIAS_REASON_MANUAL_OVERRIDE = 'learned_bias_manual_user_model_beat_bias'; +export const LEARNED_BIAS_REASON_THREAD_STICKY = 'learned_bias_thread_sticky'; +export const LEARNED_BIAS_REASON_THREAD_STICKY_UNHEALTHY = 'learned_bias_sticky_unhealthy_fallthrough'; + +export const LEARNED_BIAS_CACHE_TTL_MS = 5 * 60 * 1000; +export const LEARNED_BIAS_FETCH_LIMIT = 50; diff --git a/apps/claw-routing-service/src/modules/routing/managers/learned-bias.manager.ts b/apps/claw-routing-service/src/modules/routing/managers/learned-bias.manager.ts new file mode 100644 index 00000000..c01e6a10 --- /dev/null +++ b/apps/claw-routing-service/src/modules/routing/managers/learned-bias.manager.ts @@ -0,0 +1,20 @@ +// SCAFFOLD: stream R.1 (01-r1-learning-loop) — replace this stub with real implementation before activation. +// See docs/15-ai-context/routing-flagship-streams/01-r1-learning-loop.md +// +// NOT registered in routing.module.ts — discoverable but inert. + +import { Injectable, Logger } from '@nestjs/common'; + +import type { BiasInput, BiasOutput } from '../types/learned-bias.types'; + +@Injectable() +export class LearnedBiasManager { + private readonly logger = new Logger(LearnedBiasManager.name); + + async applyBias(_input: BiasInput): Promise { + this.logger.warn('LearnedBiasManager.applyBias: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R1 — LearnedBiasManager.applyBias not implemented; see docs/15-ai-context/routing-flagship-streams/01-r1-learning-loop.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/routing/types/calibration.types.ts b/apps/claw-routing-service/src/modules/routing/types/calibration.types.ts new file mode 100644 index 00000000..1411a8f8 --- /dev/null +++ b/apps/claw-routing-service/src/modules/routing/types/calibration.types.ts @@ -0,0 +1,14 @@ +// SCAFFOLD: stream R.1 (01-r1-learning-loop) + +export type CalibrationSignal = + | 'EXACT_KEYWORD' + | 'VERB_NOUN_COMBO' + | 'CATEGORY_KEYWORD' + | 'HEURISTIC_FALLBACK' + | 'PRIVACY_ENFORCED'; + +export type CalibrationSample = { + signal: CalibrationSignal; + wasCorrect: boolean; + occurredAt: Date; +}; diff --git a/apps/claw-routing-service/src/modules/routing/types/learned-bias.types.ts b/apps/claw-routing-service/src/modules/routing/types/learned-bias.types.ts new file mode 100644 index 00000000..20fd02ca --- /dev/null +++ b/apps/claw-routing-service/src/modules/routing/types/learned-bias.types.ts @@ -0,0 +1,40 @@ +// SCAFFOLD: stream R.1 (01-r1-learning-loop) + +import type { DomainTag, PrivacyClass } from '../../../generated/prisma'; + +export type CandidateModel = { + provider: string; + model: string; + score: number; +}; + +export type BiasInput = { + userId: string; + domain: DomainTag; + taskFamily: string; + candidates: CandidateModel[]; + privacyConstraint: PrivacyClass; + threadId?: string; +}; + +export type AppliedBiasMetadata = { + learnedScoreRowIds: string[]; + topicProfileRowId?: string; + sampleSize: number; + biasWeight: number; + reasonTag: string; +}; + +export type BiasOutput = { + candidates: CandidateModel[]; + appliedBias: AppliedBiasMetadata; +}; + +// Per-utility input shape (moved out of learned-bias-applier.utility.ts +// to satisfy the no-inline-type-alias rule for utility files). +export type LearnedSignal = { + provider: string; + model: string; + scoreDelta: number; + sampleSize: number; +}; diff --git a/apps/claw-routing-service/src/modules/routing/utilities/confidence-calibrator.utility.ts b/apps/claw-routing-service/src/modules/routing/utilities/confidence-calibrator.utility.ts new file mode 100644 index 00000000..d10c2ed4 --- /dev/null +++ b/apps/claw-routing-service/src/modules/routing/utilities/confidence-calibrator.utility.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.1 (01-r1-learning-loop) +// Rolling 30-day hit-rate calibration for routing-signal confidence constants. + +import type { CalibrationSample, CalibrationSignal } from '../types/calibration.types'; + +export function calibrateConfidence( + _signal: CalibrationSignal, + _samples: CalibrationSample[], + _windowDays: number, + _fallbackConstant: number, +): number { + throw new Error( + 'SCAFFOLD-R1 — calibrateConfidence not implemented; see docs/15-ai-context/routing-flagship-streams/01-r1-learning-loop.md', + ); +} diff --git a/apps/claw-routing-service/src/modules/routing/utilities/learned-bias-applier.utility.ts b/apps/claw-routing-service/src/modules/routing/utilities/learned-bias-applier.utility.ts new file mode 100644 index 00000000..d07ee3b9 --- /dev/null +++ b/apps/claw-routing-service/src/modules/routing/utilities/learned-bias-applier.utility.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.1 (01-r1-learning-loop) +// Pure utility — extracts the bias math out of the manager for unit testability. + +import type { CandidateModel, LearnedSignal } from '../types/learned-bias.types'; + +export function applyLearnedBias( + _candidates: CandidateModel[], + _signals: LearnedSignal[], + _weightMax: number, + _minSampleSize: number, +): CandidateModel[] { + throw new Error( + 'SCAFFOLD-R1 — applyLearnedBias not implemented; see docs/15-ai-context/routing-flagship-streams/01-r1-learning-loop.md', + ); +} diff --git a/apps/claw-routing-service/src/modules/workflows/constants/workflow-priority.constants.ts b/apps/claw-routing-service/src/modules/workflows/constants/workflow-priority.constants.ts new file mode 100644 index 00000000..802e55e4 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/constants/workflow-priority.constants.ts @@ -0,0 +1,25 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) +// When multiple workflow handlers match, this ordering decides the winner. + +import { WorkflowKind } from '../../../generated/prisma'; + +export const WORKFLOW_PRIORITY: WorkflowKind[] = [ + WorkflowKind.JUDGE_PIPELINE, + WorkflowKind.COMPARE_ENSEMBLE, + WorkflowKind.PDF_EXTRACTION, + WorkflowKind.YOUTUBE_TRANSCRIPT, + WorkflowKind.VIDEO_ANALYSIS, + WorkflowKind.AUDIO_TRANSCRIBE, + WorkflowKind.IMAGE_ANALYSIS, + WorkflowKind.CODE_REVIEW, + WorkflowKind.SEARCH_FIRST, + WorkflowKind.EXTRACT_FIRST, + WorkflowKind.FILE_GENERATION, + WorkflowKind.IMAGE_GENERATION, + WorkflowKind.DIRECT_LLM, +]; + +export const WORKFLOW_FALLBACK_REASON_DISABLED = 'workflow_disabled_fallthrough'; +export const WORKFLOW_FALLBACK_REASON_DEP_UNHEALTHY = 'workflow_dependency_unhealthy'; +export const WORKFLOW_FALLBACK_REASON_EXTRACTION_FAILED = 'workflow_extraction_failed'; +export const WORKFLOW_FALLBACK_REASON_NO_MATCH = 'workflow_no_match_using_direct_llm'; diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/audio-transcribe.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/audio-transcribe.handler.ts new file mode 100644 index 00000000..41c120a2 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/audio-transcribe.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class AudioTranscribeHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.AUDIO_TRANSCRIBE; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — AudioTranscribeHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — AudioTranscribeHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/code-review.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/code-review.handler.ts new file mode 100644 index 00000000..b8a33e20 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/code-review.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class CodeReviewHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.CODE_REVIEW; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — CodeReviewHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — CodeReviewHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/compare-ensemble.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/compare-ensemble.handler.ts new file mode 100644 index 00000000..42f94e5f --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/compare-ensemble.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class CompareEnsembleHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.COMPARE_ENSEMBLE; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — CompareEnsembleHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — CompareEnsembleHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/direct-llm.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/direct-llm.handler.ts new file mode 100644 index 00000000..49266858 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/direct-llm.handler.ts @@ -0,0 +1,15 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class DirectLlmHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.DIRECT_LLM; + canHandle(_c: WorkflowExecutionContext): boolean { return true; } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — DirectLlmHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0.5; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/extract-first.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/extract-first.handler.ts new file mode 100644 index 00000000..35747a57 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/extract-first.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class ExtractFirstHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.EXTRACT_FIRST; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — ExtractFirstHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — ExtractFirstHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/file-generation.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/file-generation.handler.ts new file mode 100644 index 00000000..dca38128 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/file-generation.handler.ts @@ -0,0 +1,19 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +// NOTE: File generation is wired today in routing.manager.ts (Stage 3 of AUTO pipeline). +// This handler will replace that direct call when the workflow orchestrator goes live. +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class FileGenerationHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.FILE_GENERATION; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — FileGenerationHandler.canHandle not implemented; delegates to existing file-gen detection'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — FileGenerationHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/image-analysis.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/image-analysis.handler.ts new file mode 100644 index 00000000..2129d5d8 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/image-analysis.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class ImageAnalysisHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.IMAGE_ANALYSIS; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — ImageAnalysisHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — ImageAnalysisHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/image-generation.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/image-generation.handler.ts new file mode 100644 index 00000000..027f07f6 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/image-generation.handler.ts @@ -0,0 +1,19 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +// NOTE: Image generation is wired today in routing.manager.ts (Stage 2 of AUTO pipeline). +// This handler will replace that direct call when the workflow orchestrator goes live. +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class ImageGenerationHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.IMAGE_GENERATION; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — ImageGenerationHandler.canHandle not implemented; delegates to existing ImageDetectionManager'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — ImageGenerationHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/judge-pipeline.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/judge-pipeline.handler.ts new file mode 100644 index 00000000..b7e2a130 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/judge-pipeline.handler.ts @@ -0,0 +1,18 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +// HIGHEST priority — used for medical/legal/high-risk domains. +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class JudgePipelineHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.JUDGE_PIPELINE; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — JudgePipelineHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — JudgePipelineHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/pdf-extraction.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/pdf-extraction.handler.ts new file mode 100644 index 00000000..6f718eab --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/pdf-extraction.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class PdfExtractionHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.PDF_EXTRACTION; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — PdfExtractionHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — PdfExtractionHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/search-first.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/search-first.handler.ts new file mode 100644 index 00000000..81d49dd6 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/search-first.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class SearchFirstHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.SEARCH_FIRST; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — SearchFirstHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — SearchFirstHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/video-analysis.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/video-analysis.handler.ts new file mode 100644 index 00000000..5ebd0f08 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/video-analysis.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class VideoAnalysisHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.VIDEO_ANALYSIS; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — VideoAnalysisHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — VideoAnalysisHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/handlers/youtube-transcript.handler.ts b/apps/claw-routing-service/src/modules/workflows/managers/handlers/youtube-transcript.handler.ts new file mode 100644 index 00000000..5c1cebd1 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/handlers/youtube-transcript.handler.ts @@ -0,0 +1,17 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) — handler stub +import { Injectable } from '@nestjs/common'; +import { WorkflowKind } from '../../../../generated/prisma'; +import type { IWorkflowHandler } from '../../types/workflow-handler.interface'; +import type { WorkflowExecutionContext, WorkflowPlan } from '../../types/workflow-execution.types'; + +@Injectable() +export class YoutubeTranscriptHandler implements IWorkflowHandler { + readonly kind = WorkflowKind.YOUTUBE_TRANSCRIPT; + canHandle(_c: WorkflowExecutionContext): boolean { + throw new Error('SCAFFOLD-R3 — YoutubeTranscriptHandler.canHandle not implemented'); + } + plan(_c: WorkflowExecutionContext): WorkflowPlan { + throw new Error('SCAFFOLD-R3 — YoutubeTranscriptHandler.plan not implemented'); + } + confidence(_c: WorkflowExecutionContext): number { return 0; } +} diff --git a/apps/claw-routing-service/src/modules/workflows/managers/workflow-orchestrator.manager.ts b/apps/claw-routing-service/src/modules/workflows/managers/workflow-orchestrator.manager.ts new file mode 100644 index 00000000..23965eab --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/managers/workflow-orchestrator.manager.ts @@ -0,0 +1,19 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) +// Top-level coordinator — picks the right workflow handler based on context. +// NOT yet wired into RoutingManager.handleAuto(). + +import { Injectable, Logger } from '@nestjs/common'; + +import type { WorkflowChoice, WorkflowExecutionContext } from '../types/workflow-execution.types'; + +@Injectable() +export class WorkflowOrchestratorManager { + private readonly logger = new Logger(WorkflowOrchestratorManager.name); + + pickWorkflow(_context: WorkflowExecutionContext): WorkflowChoice { + this.logger.warn('WorkflowOrchestratorManager.pickWorkflow: SCAFFOLD only'); + throw new Error( + 'SCAFFOLD-R3 — WorkflowOrchestratorManager.pickWorkflow not implemented; see docs/15-ai-context/routing-flagship-streams/04-r3-workflow-orchestrator.md', + ); + } +} diff --git a/apps/claw-routing-service/src/modules/workflows/types/workflow-execution.types.ts b/apps/claw-routing-service/src/modules/workflows/types/workflow-execution.types.ts new file mode 100644 index 00000000..19cfca37 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/types/workflow-execution.types.ts @@ -0,0 +1,55 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) + +import type { WorkflowKind } from '../../../generated/prisma'; +import type { + AttachmentMeta, + DetectedUrl, + ModalityDetectionResult, +} from '../../modality-detection/types/modality-detection.types'; + +export type WorkflowExecutionContext = { + message: string; + threadId?: string; + userId: string; + modalityResult: ModalityDetectionResult; + attachments: AttachmentMeta[]; + urls: DetectedUrl[]; +}; + +export type WorkflowStep = + | { + type: 'llm_call'; + provider: string; + model: string; + promptTemplate: string; + inputFromStepId?: string; + } + | { + type: 'extract'; + extractor: + | 'pdf' + | 'spreadsheet' + | 'youtube_transcript' + | 'web_scrape' + | 'audio_transcribe' + | 'video_extract'; + sourceFileId?: string; + sourceUrl?: string; + } + | { type: 'search'; query: string } + | { type: 'judge'; primaryStepId: string; criticStepId: string }; + +export type WorkflowPlan = { + kind: WorkflowKind; + steps: WorkflowStep[]; + estimatedDurationMs: number; + estimatedCostUsd: number; +}; + +export type WorkflowChoice = { + kind: WorkflowKind; + confidence: number; + plan: WorkflowPlan; + reasonTags: string[]; + fallbackUsed: boolean; +}; diff --git a/apps/claw-routing-service/src/modules/workflows/types/workflow-handler.interface.ts b/apps/claw-routing-service/src/modules/workflows/types/workflow-handler.interface.ts new file mode 100644 index 00000000..b24a1121 --- /dev/null +++ b/apps/claw-routing-service/src/modules/workflows/types/workflow-handler.interface.ts @@ -0,0 +1,11 @@ +// SCAFFOLD: stream R.3 (04-r3-workflow-orchestrator) + +import type { WorkflowKind } from '../../../generated/prisma'; +import type { WorkflowExecutionContext, WorkflowPlan } from './workflow-execution.types'; + +export interface IWorkflowHandler { + readonly kind: WorkflowKind; + canHandle(context: WorkflowExecutionContext): boolean; + plan(context: WorkflowExecutionContext): WorkflowPlan; + confidence(context: WorkflowExecutionContext): number; +} diff --git a/docs/15-ai-context/routing-flagship-streams/00-master-plan.md b/docs/15-ai-context/routing-flagship-streams/00-master-plan.md new file mode 100644 index 00000000..a27c3145 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/00-master-plan.md @@ -0,0 +1,114 @@ +# 00 — Master Routing Flagship Plan + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/00_MASTER_routing_implementation_flagship.md` + +## Mission + +Turn the routing audit into an implementation-grade flagship plan that makes ClawAI's router smarter, multimodal, learning-aware, workflow-aware, budget-aware, transparent, and production-grade. + +## Implementation roadmap (release slices) + +| Release | Streams | Goal | Acceptance | +|---------|---------|------|------------| +| **R1** | 01, 11.1, 11.2 | Closing the learning loop + visibility | LearnedScore biases hot path; explanation surfaces in chat; workflow kind on every decision | +| **R2** | 02, 06, 10 | Trust + safety | v2 canary 5% with rollback; playground UI; 500-prompt regression suite in CI | +| **R3** | 03, 11.4, 11.5, 11.10 | Multimodal detection | YouTube/PDF/video/audio/spreadsheet/URL/lang detection; modality stored on decision | +| **R4** | 04 | Workflows go live | JUDGE_PIPELINE, COMPARE_ENSEMBLE, SEARCH_FIRST, EXTRACT_FIRST, PDF_EXTRACTION, YOUTUBE_TRANSCRIPT, CODE_REVIEW all execute | +| **R5** | 05, 07 | Cost + multi-tenant | Per-user + per-org budgets; org-scoped policies; allow/deny lists | +| **R6** | 08, 09 | Intelligence + i18n | Language-aware routing; prompt-length filter; mid-stream switch; embedding routing; consensus; cost/quality slider | + +## Dependency graph + +``` + ┌────────────────────────┐ + │ 01 learning loop │◄────────────────┐ + └───────┬────────────────┘ │ + │ │ + ┌───────▼────────────────┐ │ + │ 02 v2 canary │ │ + └───────┬────────────────┘ │ + │ │ + ┌───────▼────────────────┐ ┌────────────┴───┐ + │ 04 workflows live │◄──┤ 03 modality │ + └───────┬────────────────┘ └────────────────┘ + │ + ┌───────────┼───────────┐ + ▼ ▼ ▼ + ┌────────┐ ┌─────────┐ ┌──────────┐ + │ 05 cost │ │ 06 playg │ │ 07 fleet │ + └────────┘ └─────────┘ └──────────┘ + │ + ┌───────▼────────────────┐ + │ 08 i18n 09 advanced │ + └────────────────────────┘ + │ + ┌───────▼────────────────┐ + │ 10 quality (always-on) │ + └────────────────────────┘ +``` + +## Owners / agents needed + +| Domain | Owner agent | +|--------|-------------| +| Backend manager/service/repository | principal backend architect agent | +| Prisma migrations + seeders | principal data architect agent | +| Frontend pages/components/hooks | principal frontend architect agent | +| Scoring/learning algorithms | principal AI routing scientist agent | +| Workflow orchestration | principal LLM orchestration architect agent | +| QA / replay / regression | principal QA agency agent | +| Docs / runbooks | principal documentation owner agent | +| Release gates / canary / rollback | principal DevOps agent | + +## Migration order (Prisma) + +1. **R.1** — no new tables; reads existing `RouterLearnedScore`/`RouterTopicProfile`. +2. **R.4** — add `UserCostBudget` + index. +3. **R.6** — add nullable `orgId` to `RoutingPolicy` + composite index `(orgId, isActive, priority)`. +4. **R.7** — extend `RoutingDecision` with `detectedLanguage` + `languageConfidence` columns. +5. **R.2/R.3** — extend `RoutingDecision` with `detectedModalities Json` + `selectedWorkflow WorkflowKind` columns. +6. **R.8** — `RouterRegionPreference`, `UserCostQualitySlider`, `UserFineTunePreference` (one migration per sub-feature). + +## Test strategy + +Per-stream test plan in each `0X-name.md`. Cross-cutting: + +- **Unit:** every manager method (≥80% coverage). Jest + ts-jest. +- **API:** `qa/test-routing-r-.sh` per stream. Hits live backend, asserts DB rows + response shape. +- **Integration:** `qa/test-routing-integration-flagship.sh` — fires 50 prompts through R.1+R.2+R.3+R.4 pipeline, verifies decision lifecycle. +- **Regression:** `qa/test-routing-regression-500.sh` (Stream 10) — runs the 500-prompt validation set every CI. +- **Load:** `qa/test-routing-load.sh` (Stream 10) — 100 req/s × 10 min, p95 routing decision < 50 ms. +- **Playwright:** `apps/claw-frontend/e2e/routing-*.spec.ts` per UI stream (06, 11.3, 11.9). + +## Rollback strategy + +Every stream activation behind a feature flag in `.env`: + +``` +ROUTING_R1_LEARNED_BIAS_ENABLED=false +ROUTING_R2_MODALITY_DETECTION_ENABLED=false +ROUTING_R3_WORKFLOWS_ENABLED=false +ROUTING_R4_COST_BUDGET_ENABLED=false +ROUTING_R6_MULTI_TENANT_ENABLED=false +ROUTING_R7_LANGUAGE_DETECTION_ENABLED=false +ROUTING_R8__ENABLED=false +ROUTING_V2_CANARY_PERCENT=0 +ROUTING_V2_PRIMARY_ENABLED=false +ROUTING_V2_ROLLBACK_SWITCH=true +``` + +Flip flag → rollback in seconds. No data migration required to roll back a stream. + +## Business positioning (1 sentence) + +> "ClawAI isn't another AI wrapper — it's the intelligence layer that picks the right model and workflow for the task, with cost, privacy, latency, modality, domain awareness, learned feedback, and full transparency." + +## Blockers identified + +| # | Blocker | Stream | Resolution | +|---|---------|--------|------------| +| B1 | `chat-service` is the consumer of `/routing/evaluate`; activating R.1 requires no chat changes (it's transparent), but **R.2/R.3 require chat to pass attachment+URL metadata in the routing context** | 02, 03 | Add `RoutingContext.attachments` + `RoutingContext.urls` upstream in chat-service | +| B2 | `connector-service` must expose `freeTierRemaining` for R.4 free-tier awareness; not implemented today | 05 | New connector field + sync event | +| B3 | `WorkflowKind.JUDGE_PIPELINE` requires the chat execution layer to support multi-step LLM calls; currently single-call | 04 | Refactor `ChatExecutionManager` to support workflow handoff | +| B4 | Multi-tenant requires `User.orgId` end-to-end (auth-service); today users have no org | 07 | Add `Organization` + `OrganizationMember` to auth-service first (same model exists in agent-service but isolated) | +| B5 | Region routing needs Bedrock multi-region connector support | 09.5 | New `ConnectorRegion` table in connector-service | diff --git a/docs/15-ai-context/routing-flagship-streams/01-r1-learning-loop.md b/docs/15-ai-context/routing-flagship-streams/01-r1-learning-loop.md new file mode 100644 index 00000000..db16bfb3 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/01-r1-learning-loop.md @@ -0,0 +1,204 @@ +# Stream 01 — R.1 Close the Learning Loop + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/01_R1_close_learning_loop.md` + +## Mission + +Make the data already collected by `RouterLearnedScore`, `RouterTopicProfile`, `RoutingOutcomeRecord`, and `RoutingFeedbackRecord` actually influence future routing decisions in the v1 hot path. + +## Current state (from audit) + +- `RouterEducationManager.ingestExecutionOutcome()` writes `RouterModelProfile` + `RouterLearnedScore` rows on `message.completed`. +- `RouterEducationManager.ingestFeedbackSignal()` writes the same on `message.feedback_set`. +- `LearningLoopManager` produces `RouterLearnedScore` via `bounded-adjust.utility.ts`. +- **`RoutingManager.handleAuto()` never reads any of it.** Hot path is keyword-based + Ollama-router only. +- v2 evaluator (`route-evaluator/`) consumes the learned data but runs in shadow mode. + +## Files to add (scaffold included in this branch) + +``` +apps/claw-routing-service/src/modules/routing/ +├── managers/ +│ └── learned-bias.manager.ts (NEW — new manager) +├── utilities/ +│ ├── learned-bias-applier.utility.ts (NEW) +│ └── confidence-calibrator.utility.ts (NEW) +├── types/ +│ ├── learned-bias.types.ts (NEW) +│ └── thread-sticky-route.types.ts (NEW) +└── constants/ + └── learned-bias.constants.ts (NEW) +``` + +## Files to modify (NOT scaffolded — needs careful integration) + +``` +apps/claw-routing-service/src/modules/routing/managers/routing.manager.ts + → handleAuto(): after category detection + before final selection, call LearnedBiasManager.applyBias() + → handleAuto(): if thread sticky route enabled + thread has prior decision, prefer same provider+model + → buildExplanation(): include learnedBiasReason in reasonTags + +apps/claw-routing-service/src/modules/routing/services/routing.service.ts + → publish new "routing.learned_bias_applied" event when bias changes a decision + +apps/claw-routing-service/src/modules/routing/repositories/routing-decisions.repository.ts + → findLastDecisionForThread(threadId): used by sticky route + +apps/claw-routing-service/src/app/config/app.config.ts + → add zod-validated env block for ROUTING_R1_* +``` + +## API contract — LearnedBiasManager + +```typescript +// apps/claw-routing-service/src/modules/routing/managers/learned-bias.manager.ts + +export type BiasInput = { + userId: string; + domain: DomainTag; + taskFamily: string; + candidates: CandidateModel[]; + privacyConstraint: PrivacyClass; + threadId?: string; +}; + +export type BiasOutput = { + candidates: CandidateModel[]; // re-scored + appliedBias: { + learnedScoreRows: string[]; // ids of rows that influenced the decision + topicProfileRow?: string; + sampleSize: number; + biasWeight: number; + reasonTag: string; // 'learned_bias_thumbs_up_history' | 'sample_too_small_no_bias' | etc. + }; +}; + +@Injectable() +export class LearnedBiasManager { + async applyBias(input: BiasInput): Promise; +} +``` + +## Bounded-bias algorithm + +``` +1. Fetch RouterLearnedScore rows for (userId, domain) — limit 50 most recent. +2. Fetch RouterTopicProfile row for (userId, taskFamily). +3. For each candidate model: + - Look up score adjustment Δ from learned rows + topic profile. + - Clamp Δ to ±ROUTING_R1_LEARNED_BIAS_WEIGHT_MAX (default 0.3). + - If sample size < ROUTING_R1_MIN_SAMPLE_SIZE (default 10): use Δ × (sampleSize/min). + - Add Δ to candidate.score. +4. Re-rank candidates. +5. NEVER promote a candidate that fails privacyConstraint (LOCAL_ONLY/LOCAL_PREFERRED). +6. NEVER promote a candidate that lacks required capability (vision/tools/streaming). +7. Return re-ranked list + appliedBias metadata. +``` + +## Confidence calibration + +``` +1. Rolling window: last ROUTING_R1_CONFIDENCE_CALIBRATION_WINDOW_DAYS (default 30) days. +2. For each routing signal (EXACT_KEYWORD, VERB_NOUN_COMBO, CATEGORY_KEYWORD, HEURISTIC_FALLBACK, PRIVACY_ENFORCED): + - Compute hit rate = correct decisions / total decisions with that signal. +3. Replace constants with rolling values. +4. Snapshot weekly into RoutingCalibrationSnapshot. +5. Compare to baseline; alert if drop > drift threshold (Stream 09 / R.9). +``` + +## Per-thread sticky route + +``` +If ROUTING_R1_STICKY_THREAD_ROUTE_ENABLED=true: + → Find last 5 RoutingDecision rows for context.threadId. + → If all 5 picked same (provider, model) AND no privacy/capability change triggered → use same. + → Otherwise fall through to AUTO. +``` + +## Acceptance criteria + +| # | Test | Expected | +|---|---------------------------------------------------------------------------------------------|-------------------------------------------------------------------------| +| 1 | User thumbs-downs gpt-4o-mini for 12 legal questions; thumbs-ups claude-opus-4 for 8 | Next legal question routes to claude-opus-4 even if keyword classifier first picks mini | +| 2 | Same as above with only 3 thumbs-downs | Sample too small; bias dampened; reasonTag includes `sample_too_small` | +| 3 | Privacy-keyword message + learned bias toward cloud model | Local model wins; reasonTag includes `privacy_override_beats_learned` | +| 4 | User on MANUAL_MODEL=gpt-4o; learned bias toward claude-sonnet-4 | gpt-4o wins; learned bias not applied | +| 5 | Thread has 5 prior decisions all on claude-sonnet-4; new message on same topic | Sticky route picks claude-sonnet-4; reasonTag includes `thread_sticky` | +| 6 | Sticky route would pick a now-unhealthy model | Falls through to AUTO; reasonTag includes `sticky_unhealthy_fallthrough`| +| 7 | RoutingDecision.explanation contains "learned bias adjusted score from X to Y" | Visible in `/api/v1/routing/decisions/:threadId` response | +| 8 | Feature flag off | Hot path unchanged; no learned bias applied | + +## Tests + +``` +apps/claw-routing-service/src/modules/routing/managers/__tests__/learned-bias.manager.spec.ts + - applies positive bias from thumbs-ups history + - applies negative bias from thumbs-downs history + - clamps bias to ±weightMax + - dampens bias when sample size below min + - never overrides privacy constraint + - never overrides capability requirement + - never overrides manual user model + - calibration uses rolling 30-day window + +apps/claw-routing-service/src/modules/routing/utilities/__tests__/confidence-calibrator.utility.spec.ts + - hit rate calculated correctly + - default to constants when window empty + - signals tracked independently + +apps/claw-routing-service/src/modules/routing/managers/__tests__/routing.manager.sticky-route.spec.ts + - sticky route picks last decision when 5/5 same + - sticky route falls through on unhealthy model + - sticky route disabled by flag + +qa/test-routing-r1-learning-loop.sh + - Live API test: seed 12 fake feedback rows, fire routing request, verify response includes learned bias reason tag +``` + +## Observability + +New log lines (all `info` level, structured): + +``` +routing.learned_bias_applied candidates=N userId=X domain=Y biasWeight=Z reasonTag=... +routing.sticky_thread_applied threadId=X provider=Y model=Z +routing.confidence_calibrated signal=EXACT_KEYWORD windowDays=30 hitRate=0.93 +routing.privacy_override_beat_bias userId=X originalCandidate=Y replacedWith=Z +``` + +New RabbitMQ events: + +``` +routing.learned_bias_applied { userId, domain, biasWeight, beforeScore, afterScore } +routing.confidence_recalibrated { signal, oldValue, newValue, windowDays } +``` + +## Rollback + +```bash +echo 'ROUTING_R1_LEARNED_BIAS_ENABLED=false' >> .env +./scripts/claw.sh restart routing-service +``` + +Hot path reverts to keyword-only routing. Existing decisions in DB retain `learnedBiasReason` field (nullable). + +## Risks + +| # | Risk | Mitigation | +|---|------|------------| +| 1 | Learned bias amplifies user mistakes (e.g. user incorrectly thumbs-downs all coding answers) | Cap at ±0.3 weight; require min sample size; visible in explanation | +| 2 | Bias data leaks via reasonTag in chat UI ("we picked X because you hated Y") | reasonTag goes to admin replay only; user-facing explanation is generic | +| 3 | Hot path latency increases due to extra DB queries | Cache learned-bias rows per (userId, domain) for 5 min | +| 4 | First-week of activation: no bias data yet | Falls back to existing keyword routing — safe-by-default | +| 5 | Sticky route gets stuck on a broken model | Sticky checks isHealthy; sticky disabled when circuit breaker open | + +## Implementation slices (PRs) + +1. **Slice 1.1:** scaffold types + constants + LearnedBiasManager skeleton (this branch) + unit tests for applier +2. **Slice 1.2:** wire LearnedBiasManager.applyBias() call into `RoutingManager.handleAuto()` behind `ROUTING_R1_LEARNED_BIAS_ENABLED` flag; integration test +3. **Slice 1.3:** confidence calibrator + replace constants in routing.constants.ts; weekly snapshot cron +4. **Slice 1.4:** per-thread sticky route + tests +5. **Slice 1.5:** explanation surface in `RoutingDecision.explanation` field; surface in `/routing` admin UI +6. **Slice 1.6:** RabbitMQ events + audit logs + +Each slice ships behind the flag — activation = flip flag in `.env`. diff --git a/docs/15-ai-context/routing-flagship-streams/02-r1r3-v2-evaluator-canary.md b/docs/15-ai-context/routing-flagship-streams/02-r1r3-v2-evaluator-canary.md new file mode 100644 index 00000000..71eddec1 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/02-r1r3-v2-evaluator-canary.md @@ -0,0 +1,145 @@ +# Stream 02 — R.1/R.3 Promote v2 Evaluator (Canary) + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/02_R1_R3_promote_v2_evaluator_canary.md` + +## Mission + +Take the existing route-evaluator-v2 from shadow-only to canary primary for a small percentage of traffic, with guardrails, comparison telemetry, and instant rollback. + +## Current state (from audit) + +- `route-evaluator.controller.ts` (`/routing/evaluate-v2`) exists with full `RoutingDecisionV2` schema. +- `evaluate-shadow.controller.ts` runs v2 alongside v1 — zero impact on response. +- chat-service still consumes `/routing/evaluate` (v1). + +## Files to add (scaffold included) + +``` +apps/claw-routing-service/src/modules/route-evaluator/ +├── managers/ +│ └── canary-bucket.manager.ts (NEW) +├── utilities/ +│ ├── canary-hash.utility.ts (NEW — stable per-user/org bucketing) +│ └── decision-comparator.utility.ts (NEW) +├── types/ +│ └── canary.types.ts (NEW) +└── constants/ + └── canary.constants.ts (NEW) +``` + +## Files to modify (NOT scaffolded) + +``` +apps/claw-chat-service/src/modules/chat/managers/chat-execution.manager.ts + → call /routing/evaluate-v2 when CanaryBucketManager.isV2Bucket(userId, threadId) returns true + +apps/claw-routing-service/src/modules/route-evaluator/controllers/route-evaluator.controller.ts + → primary mode: POST /evaluate-v2 returns the v2 decision; on safety/regression breach, fall back to v1 + +apps/claw-routing-service/src/app/config/app.config.ts + → ROUTING_V2_PRIMARY_ENABLED, ROUTING_V2_CANARY_PERCENT, ROUTING_V2_ROLLBACK_SWITCH, threshold envs +``` + +## Canary bucketing + +```typescript +// SHA-256(userId + orgId + "routing-v2-canary") % 100 < ROUTING_V2_CANARY_PERCENT +// Stable: same user always lands in same bucket across requests. +// Org-scoped: all users in an org can be canaried together. +// Override: ROUTING_V2_CANARY_USER_ALLOWLIST=u1,u2,u3 (force-in for testing) +// ROUTING_V2_CANARY_USER_DENYLIST=u4,u5 (force-out, e.g. CEO) +``` + +## Guardrails + +Comparator runs on every canary decision: + +| Metric | Threshold env | Default | Action if breached | +|----------------------------|----------------------------------------------------|---------|--------------------------| +| regression % | `ROUTING_V2_REGRESSION_THRESHOLD_PERCENT` | 1 | Auto-disable canary | +| cost increase % | `ROUTING_V2_COST_INCREASE_THRESHOLD_PERCENT` | 10 | Auto-disable canary | +| confidence drop | `ROUTING_V2_CONFIDENCE_DROP_THRESHOLD` | 0.1 | Warn, log audit | +| failure rate % | `ROUTING_V2_FAILURE_RATE_THRESHOLD_PERCENT` | 2 | Auto-disable canary | + +Rolling window: 100 requests OR 5 minutes (whichever first). + +## Acceptance criteria + +| # | Test | Expected | +|---|---------------------------------------------------------------------------------------------|-------------------------------------------------------------------------| +| 1 | `ROUTING_V2_CANARY_PERCENT=5` + 1000 requests | ~50 requests hit v2; same userId always same bucket | +| 2 | v2 returns invalid `RoutingDecisionV2` (schema fail) | Fall back to v1 silently; log warn; counter incremented | +| 3 | v2 disagrees with v1 + post-execution outcome is `BAD_REGRESSION` | Comparator increments regression count; if rolling rate > threshold, canary disabled automatically | +| 4 | `ROUTING_V2_ROLLBACK_SWITCH=true` set | All canary traffic routes through v1; canary effectively disabled | +| 5 | Comparison dashboard at `/routing/canary-comparison` | Side-by-side: v1 picked X (cost $0.001), v2 picked Y (cost $0.0008), outcome= QUALITY_WIN | +| 6 | Promote suspicious case from canary → regression fixture | Same flow as existing replay-lab promotion | +| 7 | Feature flag fully off (`ROUTING_V2_PRIMARY_ENABLED=false`) | v2 stays in shadow mode (old behavior); zero impact | + +## Tests + +``` +apps/claw-routing-service/src/modules/route-evaluator/managers/__tests__/canary-bucket.manager.spec.ts + - bucket hash is stable per user + - bucket respects canary percent + - allowlist forces inclusion + - denylist forces exclusion + - org-bucket includes all org members + +apps/claw-routing-service/src/modules/route-evaluator/utilities/__tests__/decision-comparator.utility.spec.ts + - detects regression when v1 was correct + v2 picked worse model + - detects cost increase + - detects confidence drop + - detects rolling-window breach + +apps/claw-chat-service/src/modules/chat/managers/__tests__/chat-execution.manager.canary.spec.ts + - chat falls back to v1 when v2 invalid + - chat respects canary flag + +qa/test-routing-r2-v2-canary.sh + - enable canary at 100% in a test env + - fire 10 routing requests, assert all go to v2 + - inject a fake "v2 returns invalid decision" → assert v1 fallback + - set ROUTING_V2_ROLLBACK_SWITCH=true → assert all subsequent requests bypass v2 +``` + +## Observability + +``` +routing.canary.bucketed userId=X bucket=v2|v1 percent=5 +routing.canary.v2_succeeded userId=X v1Provider=A v2Provider=B sameDecision=false +routing.canary.v2_fellback userId=X reason=invalid_schema|safety|breach +routing.canary.guardrail_tripped metric=regression|cost|confidence|failure rate=X threshold=Y → AUTO-DISABLE +``` + +RabbitMQ: + +``` +routing.v2.canary_started { userId, bucket } +routing.v2.canary_succeeded { userId, v1Decision, v2Decision } +routing.v2.canary_fellback { userId, reason } +routing.v2.canary_disabled { metric, rate, threshold } +``` + +## Dashboard (admin) + +New page `/routing/canary-comparison`: +- Live counter: canary % active, request count, regression %, cost delta %, failure % +- Recent disagreements table: v1 chose X / v2 chose Y / outcome / cost delta +- "Disable canary" big red button (sets `ROUTING_V2_ROLLBACK_SWITCH=true` via admin API) + +## Rollback + +Two layers: +1. Flip `ROUTING_V2_ROLLBACK_SWITCH=true` — single request bypass v2 in milliseconds. +2. Set `ROUTING_V2_CANARY_PERCENT=0` — canary disabled for new requests. + +Automatic rollback when guardrail trips. Manual rollback via admin UI button. + +## Risks + +| # | Risk | Mitigation | +|---|------|------------| +| 1 | v2 returns subtly worse decisions that aren't caught by guardrails | Run shadow comparison for 7 days BEFORE enabling primary canary; require ≤0.5% disagreement | +| 2 | Bucketing leaks state (same user gets different bucket on retry) | Hash is pure stateless function of (userId, orgId); test asserts stability | +| 3 | Guardrail false-positive trips canary on benign traffic | Rolling-window minimum 100 requests; warmup grace period of 50 requests after enable | +| 4 | Comparator latency drags hot path | Comparator runs async on `message.completed` event, not inline | diff --git a/docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md b/docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md new file mode 100644 index 00000000..c03960a5 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/03-r2-multimodal-intent-detection.md @@ -0,0 +1,182 @@ +# Stream 03 — R.2 Multimodal Intent Detection + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/03_R2_multimodal_intent_detection.md` + +## Mission + +Stop pretending the router only sees text. Detect attachments / URLs / file types / streaming/tool-calling needs in the message context and tag the RoutingDecision with one or more `ModalityKind` values + a hinted `WorkflowKind`. + +## Detection coverage + +| Modality | Detector | WorkflowKind hint | +| ------------------------- | --------------------------------------------------------------------------------------------------------- | ------------------------ | +| `YOUTUBE_INPUT` | URL regex: `(youtube\.com/watch|youtu\.be/|youtube\.com/shorts/|youtube\.com/playlist)` | `YOUTUBE_TRANSCRIPT` | +| `PDF_INPUT` | Attachment MIME `application/pdf` + verb (`summarize|explain|extract|q&a`) | `PDF_EXTRACTION` | +| `VIDEO_INPUT` | Attachment MIME `video/*` OR extension `.mp4|.mov|.webm|.avi|.mkv` | `VIDEO_ANALYSIS` | +| `AUDIO_INPUT` | Attachment MIME `audio/*` OR extension `.wav|.mp3|.m4a|.ogg|.flac` | `AUDIO_TRANSCRIBE` | +| `SPREADSHEET_INPUT` | Attachment MIME `application/vnd.ms-excel|application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` OR ext `.xlsx|.xls|.csv|.ods` | `EXTRACT_FIRST` | +| `WEB_INPUT` | Non-YouTube URL + verb (`summarize|read|fetch|what does X say`) | `SEARCH_FIRST` | +| `IMAGE_INPUT` | Attachment MIME `image/*` OR extension `.png|.jpg|.jpeg|.gif|.webp|.bmp` | `IMAGE_ANALYSIS` | +| `TOOL_CALLING` | Verb intent: `book|schedule|send email|call function|invoke tool|run command` | n/a — filter on capability flag | +| `STREAMING` (client need) | Request header `Accept: text/event-stream` OR query `?stream=true` | n/a — filter on capability flag | +| `EMBEDDING` | Verb: `embed|vectorize|create embedding|store in vector db|retrieve similar` | n/a — separate router branch | + +## Files to add (scaffold included) + +``` +apps/claw-routing-service/src/modules/modality-detection/ (NEW MODULE) +├── modality-detection.module.ts +├── controllers/ +│ └── modality-detection.controller.ts (POST /routing/detect-modality) +├── services/ +│ └── modality-detection.service.ts +├── managers/ +│ ├── url-intent.manager.ts (YouTube + Web URL) +│ ├── attachment-intent.manager.ts (PDF/video/audio/spreadsheet/image) +│ ├── tool-calling-intent.manager.ts +│ ├── streaming-intent.manager.ts +│ └── embedding-intent.manager.ts +├── dto/ +│ └── detect-modality.dto.ts +├── types/ +│ └── modality-detection.types.ts +├── constants/ +│ ├── youtube-url.constants.ts +│ ├── web-url.constants.ts +│ ├── pdf-mime.constants.ts +│ ├── video-mime.constants.ts +│ ├── audio-mime.constants.ts +│ ├── spreadsheet-mime.constants.ts +│ ├── image-mime.constants.ts +│ ├── tool-calling-keywords.constants.ts +│ └── embedding-keywords.constants.ts +└── utilities/ + └── extension-detector.utility.ts +``` + +## Detection contract + +```typescript +export type ModalityDetectionInput = { + message: string; + attachments: AttachmentMeta[]; + clientStreamingExpected?: boolean; +}; + +export type AttachmentMeta = { + fileId: string; + filename: string; + mimeType: string; + sizeBytes: number; +}; + +export type ModalityDetectionResult = { + detectedModalities: ModalityKind[]; + workflowHint?: WorkflowKind; + workflowConfidence: number; // 0..1 + reasonTags: string[]; + fileMetadata: AttachmentMeta[]; + urlMetadata: { url: string; kind: 'youtube' | 'web' }[]; + fallback?: { reason: string; tag: string }; +}; +``` + +## Acceptance criteria + +| # | Test | Expected | +|---|----------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------| +| 1 | Message contains `https://youtube.com/watch?v=abc123` + "summarize" | `detectedModalities=[TEXT, YOUTUBE_INPUT]`, `workflowHint=YOUTUBE_TRANSCRIPT` | +| 2 | Attachment with mime `application/pdf` + "extract the key points" | `detectedModalities=[TEXT, PDF_INPUT]`, `workflowHint=PDF_EXTRACTION` | +| 3 | Attachment mime `audio/mpeg` | `[TEXT, AUDIO_INPUT]`, `workflowHint=AUDIO_TRANSCRIBE` | +| 4 | Attachment mime `video/mp4` | `[TEXT, VIDEO_INPUT]`, `workflowHint=VIDEO_ANALYSIS` | +| 5 | Attachment mime `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` | `[TEXT, SPREADSHEET_INPUT]`, `workflowHint=EXTRACT_FIRST` | +| 6 | URL `https://example.com/blog` + "summarize this article" | `[TEXT, WEB_INPUT]`, `workflowHint=SEARCH_FIRST` | +| 7 | Message "book me a meeting tomorrow at 3pm" | `[TEXT, TOOL_CALLING]`, no workflowHint | +| 8 | Request with `Accept: text/event-stream` | `[TEXT, STREAMING]`, no workflowHint | +| 9 | Message "embed this paragraph and store in vector db" | `[TEXT, EMBEDDING]`, no workflowHint | +| 10 | Multiple attachments: 1 PDF + 1 audio + 1 image | `[TEXT, PDF_INPUT, AUDIO_INPUT, IMAGE_INPUT]`; workflowHint=highest-priority (PDF_EXTRACTION) | +| 11 | Malicious URL: `javascript:alert(1)` or non-http scheme | Stripped from detection; reasonTag `unsafe_url_ignored` | +| 12 | YouTube URL but `ROUTING_R2_YOUTUBE_DETECTION_ENABLED=false` | Falls through; not flagged | +| 13 | All flags off (`ROUTING_R2_MODALITY_DETECTION_ENABLED=false`) | Endpoint returns empty modalities; hot path uses v1 behavior | +| 14 | Non-English prompt with file ("الرجاء تلخيص ملف PDF" + PDF attachment) | Detects `PDF_INPUT` regardless of message language | + +## Endpoint contract + +```http +POST /api/v1/routing/detect-modality + +{ + "message": "Please summarize https://youtu.be/abc123", + "attachments": [], + "clientStreamingExpected": false +} + +Response 200: +{ + "detectedModalities": ["TEXT", "YOUTUBE_INPUT"], + "workflowHint": "YOUTUBE_TRANSCRIPT", + "workflowConfidence": 0.95, + "reasonTags": ["youtube_url_with_summarize_verb"], + "fileMetadata": [], + "urlMetadata": [{"url": "https://youtu.be/abc123", "kind": "youtube"}] +} +``` + +## Tests + +``` +apps/claw-routing-service/src/modules/modality-detection/managers/__tests__/url-intent.manager.spec.ts + - youtube.com/watch detected + - youtu.be detected + - youtube.com/shorts detected + - playlist detected + - non-youtube URL with summarize verb → web_input + - javascript: URL stripped + - data: URL stripped + +apps/claw-routing-service/src/modules/modality-detection/managers/__tests__/attachment-intent.manager.spec.ts + - PDF + summarize verb → PDF_EXTRACTION + - PDF without verb → PDF_INPUT only (no workflow hint) + - audio/mp3 → AUDIO_TRANSCRIBE + - video/mp4 → VIDEO_ANALYSIS + - .xlsx mime → SPREADSHEET_INPUT + - .png mime → IMAGE_INPUT + - multiple attachments → multiple modalities + +qa/test-routing-r2-modality-detection.sh + - Hit POST /routing/detect-modality with each MIME type + - Assert response shape +``` + +## Wiring with Stream 04 (workflow orchestrator) + +After this stream is wired, `RoutingManager.handleAuto()` calls: + +```typescript +const modalityResult = await this.modalityDetectionService.detect({ + message: context.message, + attachments: context.attachments, +}); + +if (modalityResult.workflowHint && workflowsEnabled(modalityResult.workflowHint)) { + return this.workflowOrchestratorManager.orchestrate({ + workflow: modalityResult.workflowHint, + context, + modalityResult, + }); +} +// otherwise fall through to existing AUTO pipeline +``` + +## Rollback + +`ROUTING_R2_MODALITY_DETECTION_ENABLED=false` → endpoint returns empty; hot path unaffected. Per-modality flags allow partial activation (e.g. enable YouTube detection but not video). + +## Risks + +| # | Risk | Mitigation | +|---|------|------------| +| 1 | False-positive URL detection (e.g. `youtube.com.malicious.io`) | Strict regex anchored at host start; only valid scheme `https?:` | +| 2 | Attachment metadata not passed by chat-service in `RoutingContext` | **Blocker B1** in master plan — chat-service change required first | +| 3 | Detection adds latency to hot path | Detector is pure (no I/O); regex + array scans; <1ms total | +| 4 | Workflow hint suggested but workflow disabled | Hot path falls through to v1 routing; reasonTag `workflow_disabled_fallthrough` | diff --git a/docs/15-ai-context/routing-flagship-streams/04-r3-workflow-orchestrator.md b/docs/15-ai-context/routing-flagship-streams/04-r3-workflow-orchestrator.md new file mode 100644 index 00000000..a18499e4 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/04-r3-workflow-orchestrator.md @@ -0,0 +1,236 @@ +# Stream 04 — R.3 Workflow Orchestrator Goes Live + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/04_R3_workflow_orchestrator_goes_live.md` + +## Mission + +Make all 13 `WorkflowKind` values mean real executable behavior. Today `RoutingManager.handleAuto()` always returns `DIRECT_LLM` implicitly; this stream lights up the other 12 workflows behind per-workflow feature flags. + +## The 13 workflows + +| Workflow | Trigger | Status today | Activation flag | +| -------------------- | ----------------------------------------------------------------------------- | ------------ | -------------------------------------------------------- | +| `DIRECT_LLM` | Default — fallthrough when no other workflow matches | wired | always on | +| `SEARCH_FIRST` | Time-sensitive verbs (latest, today, current, news, regulation, pricing) | not wired | `ROUTING_R3_WORKFLOW_SEARCH_FIRST_ENABLED` | +| `EXTRACT_FIRST` | Attachment with structured-data intent (spreadsheet/CSV/JSON) | not wired | `ROUTING_R3_WORKFLOW_EXTRACT_FIRST_ENABLED` | +| `PDF_EXTRACTION` | `PDF_INPUT` modality + summarize/explain/extract verb | not wired | `ROUTING_R3_WORKFLOW_PDF_EXTRACTION_ENABLED` | +| `YOUTUBE_TRANSCRIPT` | `YOUTUBE_INPUT` modality | not wired | `ROUTING_R3_WORKFLOW_YOUTUBE_TRANSCRIPT_ENABLED` | +| `IMAGE_ANALYSIS` | `IMAGE_INPUT` modality | not wired | `ROUTING_R3_WORKFLOW_IMAGE_ANALYSIS_ENABLED` | +| `IMAGE_GENERATION` | Image generation keywords detected (Stage 2 of v1 pipeline) | wired today | `ROUTING_R3_WORKFLOW_IMAGE_GENERATION_ENABLED` (on) | +| `VIDEO_ANALYSIS` | `VIDEO_INPUT` modality | not wired | `ROUTING_R3_WORKFLOW_VIDEO_ANALYSIS_ENABLED` | +| `AUDIO_TRANSCRIBE` | `AUDIO_INPUT` modality | not wired | `ROUTING_R3_WORKFLOW_AUDIO_TRANSCRIBE_ENABLED` | +| `FILE_GENERATION` | File generation verb+format detected (Stage 3 of v1 pipeline) | wired today | `ROUTING_R3_WORKFLOW_FILE_GENERATION_ENABLED` (on) | +| `CODE_REVIEW` | Code-block in message + `review/refactor/debug` verb | not wired | `ROUTING_R3_WORKFLOW_CODE_REVIEW_ENABLED` | +| `COMPARE_ENSEMBLE` | Explicit "compare" mode OR high-uncertainty (confidence < 0.5) | not wired | `ROUTING_R3_WORKFLOW_COMPARE_ENSEMBLE_ENABLED` | +| `JUDGE_PIPELINE` | Medical/Legal/high-risk domain detected | not wired | `ROUTING_R3_WORKFLOW_JUDGE_PIPELINE_ENABLED` | + +## Per-workflow contract + +Every workflow handler implements: + +```typescript +export interface IWorkflowHandler { + readonly kind: WorkflowKind; + + /** Validate that this workflow CAN run with the given context (deps healthy, attachments present, etc.) */ + canHandle(context: WorkflowExecutionContext): boolean; + + /** Return the workflow plan: an ordered list of steps. Each step = a model call OR external service call. */ + plan(context: WorkflowExecutionContext): WorkflowPlan; + + /** Confidence score for whether this workflow is the right choice for this context (0..1) */ + confidence(context: WorkflowExecutionContext): number; +} + +export type WorkflowExecutionContext = { + message: string; + threadId?: string; + userId: string; + modalityResult: ModalityDetectionResult; + routingDecision: RoutingDecision; + attachments: AttachmentMeta[]; + urls: DetectedUrl[]; +}; + +export type WorkflowPlan = { + kind: WorkflowKind; + steps: WorkflowStep[]; + estimatedDurationMs: number; + estimatedCostUsd: number; +}; + +export type WorkflowStep = + | { type: 'llm_call'; provider: string; model: string; promptTemplate: string; inputFromStepId?: string } + | { type: 'extract'; extractor: 'pdf' | 'spreadsheet' | 'youtube_transcript' | 'web_scrape' | 'audio_transcribe' | 'video_extract'; sourceFileId?: string; sourceUrl?: string } + | { type: 'search'; query: string } + | { type: 'judge'; primaryStepId: string; criticStepId: string }; +``` + +## Files to add (scaffold included) + +``` +apps/claw-routing-service/src/modules/workflows/ +├── managers/ +│ ├── workflow-orchestrator.manager.ts (NEW — top-level coordinator) +│ └── handlers/ (NEW directory — 13 workflow handlers) +│ ├── direct-llm.handler.ts +│ ├── search-first.handler.ts +│ ├── extract-first.handler.ts +│ ├── pdf-extraction.handler.ts +│ ├── youtube-transcript.handler.ts +│ ├── image-analysis.handler.ts +│ ├── image-generation.handler.ts +│ ├── video-analysis.handler.ts +│ ├── audio-transcribe.handler.ts +│ ├── file-generation.handler.ts +│ ├── code-review.handler.ts +│ ├── compare-ensemble.handler.ts +│ └── judge-pipeline.handler.ts +├── types/ +│ ├── workflow-execution.types.ts +│ ├── workflow-plan.types.ts +│ └── workflow-handler.interface.ts +└── constants/ + └── workflow-priority.constants.ts (NEW) +``` + +## Workflow priority (when multiple match) + +``` +1. JUDGE_PIPELINE (highest — safety wins) +2. COMPARE_ENSEMBLE +3. PDF_EXTRACTION +4. YOUTUBE_TRANSCRIPT +5. VIDEO_ANALYSIS +6. AUDIO_TRANSCRIBE +7. IMAGE_ANALYSIS +8. CODE_REVIEW +9. SEARCH_FIRST +10. EXTRACT_FIRST +11. FILE_GENERATION +12. IMAGE_GENERATION +13. DIRECT_LLM (lowest — fallthrough) +``` + +## High-priority workflow specs + +### `JUDGE_PIPELINE` + +For medical/legal/high-risk: +``` +Step 1: llm_call to PRIMARY (e.g. claude-opus-4) +Step 2: llm_call to CRITIC (different family, e.g. gpt-4o) — asked "is the primary answer correct? cite issues" +Step 3: judge — if critic agrees → finalize; if disagrees → escalate to a third "judge" model OR human review queue +``` + +Records `judgeOutcome` on `RoutingOutcomeRecord`. + +### `COMPARE_ENSEMBLE` + +``` +Step 1..N: parallel llm_call to N candidate models +Step N+1: judge — score the N responses (criteria: completeness, correctness, format adherence) +Step N+2: return winner + comparison summary +``` + +### `SEARCH_FIRST` + +``` +Step 1: search → research-service /search/run with query +Step 2: extract — pull top-K results as context +Step 3: llm_call with context-augmented prompt +``` + +### `PDF_EXTRACTION` + +``` +Step 1: extract — pdf via file-service chunking +Step 2: llm_call with chunks + summarize/extract/qa system prompt +``` + +### `YOUTUBE_TRANSCRIPT` + +``` +Step 1: extract — youtube_transcript via new extractor service (or research-service extension) +Step 2: llm_call with transcript chunks + user verb +``` + +### `CODE_REVIEW` + +``` +Step 1: extract code block(s) from message +Step 2: llm_call to coding-tuned model (claude-sonnet-4 / LOCAL_CODING) with code-review system prompt +Step 3 (optional): lint via local tool integration +Step 4: critic LLM verifies the review's suggestions +``` + +## Acceptance criteria + +| # | Test | Expected | +|---|----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------| +| 1 | `RoutingDecision` includes `selectedWorkflow` field | All decisions have non-null workflow (default: DIRECT_LLM) | +| 2 | Medical question + JUDGE_PIPELINE enabled | Decision has `selectedWorkflow=JUDGE_PIPELINE`, response goes through primary+critic | +| 3 | "Compare claude and gpt-4 on this question" + COMPARE_ENSEMBLE enabled | Decision has `COMPARE_ENSEMBLE`, response includes both answers + winner | +| 4 | "What's the latest news on the EU AI Act" + SEARCH_FIRST enabled | Decision has `SEARCH_FIRST`, response cites recent web sources | +| 5 | PDF attachment + "summarize" + PDF_EXTRACTION disabled | Decision falls back to `DIRECT_LLM`, reasonTag `workflow_disabled_fallthrough` | +| 6 | Unsupported workflow advertised in UI | NOT advertised — UI reads per-flag state | +| 7 | Workflow fails (e.g. YouTube transcript extractor down) | Falls back to `DIRECT_LLM` with reasonTag `workflow_extraction_failed` | + +## Frontend surfacing + +Each routing decision in `/routing` recent list shows a workflow badge: + +``` +[ JUDGE_PIPELINE ] [ COMPARE_ENSEMBLE ] [ SEARCH_FIRST ] [ DIRECT_LLM ] [ disabled — not yet wired ] +``` + +Disabled workflows greyed out with tooltip "Activate ROUTING_R3_WORKFLOW_X_ENABLED in .env". + +## Tests + +``` +apps/claw-routing-service/src/modules/workflows/managers/__tests__/workflow-orchestrator.manager.spec.ts + - picks JUDGE_PIPELINE over DIRECT_LLM for medical domain + - picks COMPARE_ENSEMBLE for explicit "compare" intent + - picks PDF_EXTRACTION when PDF + summarize verb + - falls back to DIRECT_LLM when chosen workflow disabled + - falls back to DIRECT_LLM when chosen workflow's deps unhealthy + - respects priority order when multiple match + +apps/claw-routing-service/src/modules/workflows/managers/handlers/__tests__/*.handler.spec.ts + - one spec per handler (13 specs) + - each verifies canHandle + plan + confidence + +qa/test-routing-r3-workflows.sh + - fire 13 fixture messages, one per workflow + - assert decision.selectedWorkflow matches expected +``` + +## Wiring + +Once activated, `RoutingManager.handleAuto()`: + +```typescript +async handleAuto(context: RoutingContext): Promise { + // ... existing stages 1-4 ... + + const modalityResult = await this.modalityDetectionService.detect({ /* ... */ }); + + const workflowChoice = await this.workflowOrchestratorManager.pickWorkflow({ + context, + modalityResult, + baseDecision: existingV1Decision, + }); + + return { + ...existingV1Decision, + selectedWorkflow: workflowChoice.kind, + workflowConfidence: workflowChoice.confidence, + workflowPlan: workflowChoice.plan, + }; +} +``` + +## Rollback + +Per-workflow flag — disable one without affecting others. Master flag `ROUTING_R3_WORKFLOWS_ENABLED=false` → all back to DIRECT_LLM-only behavior. diff --git a/docs/15-ai-context/routing-flagship-streams/05-r4-cost-budget-intelligence.md b/docs/15-ai-context/routing-flagship-streams/05-r4-cost-budget-intelligence.md new file mode 100644 index 00000000..9512ceb5 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/05-r4-cost-budget-intelligence.md @@ -0,0 +1,130 @@ +# Stream 05 — R.4 Cost Budget Intelligence + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/05_R4_cost_budget_intelligence.md` + +## Mission + +Stop letting users spend unlimited cloud-model dollars. Add per-user (and optionally per-org) monthly caps; pre-routing gate that blocks cloud calls when over budget; user-visible warnings; free-tier-remaining awareness; admin + user dashboards. + +## Files to add (scaffold included) + +``` +apps/claw-routing-service/src/modules/cost-budget/ (NEW MODULE) +├── cost-budget.module.ts +├── controllers/ +│ └── cost-budget.controller.ts +├── services/ +│ └── cost-budget.service.ts +├── managers/ +│ ├── budget-gate.manager.ts (pre-routing check) +│ ├── spend-tracker.manager.ts (post-execution increment) +│ ├── budget-warning.manager.ts (80% threshold warnings) +│ └── budget-reset.manager.ts (monthly cron) +├── repositories/ +│ └── user-cost-budget.repository.ts +├── dto/ +│ ├── create-budget.dto.ts +│ ├── update-budget.dto.ts +│ └── check-budget.dto.ts +├── types/ +│ └── budget.types.ts +└── constants/ + └── budget.constants.ts +``` + +## Prisma migration (see PRISMA_FUTURE_MODELS.md) + +`UserCostBudget` + `CostBudgetScope` + `CostBudgetStatus` enums. Backfill: empty. + +## Pre-routing budget gate + +```typescript +// In RoutingManager.handleAuto() — BEFORE picking cloud provider: + +const estimatedCost = estimateRequestCost(context, candidatePrimary); +const budgetCheck = await this.budgetGateManager.check({ + userId: context.userId, + orgId: context.orgId, + estimatedCostUsd: estimatedCost, +}); + +if (budgetCheck.status === 'EXCEEDED' && !budgetCheck.overrideAllowed) { + // Force local route + return this.handleLocalOnly(context); +} + +if (budgetCheck.status === 'WARN') { + reasonTags.push('cost_budget_warn'); +} +``` + +## Acceptance criteria + +| # | Test | Expected | +|---|------|----------| +| 1 | User under budget + cloud request | Routes to cloud as normal; spend incremented post-execution | +| 2 | User at 85% of budget | Routes to cloud + `messageKey: 'BUDGET_NEAR_LIMIT'` in response metadata | +| 3 | User over budget without override permission | Routes to local; `messageKey: 'BUDGET_EXCEEDED_FORCED_LOCAL'` | +| 4 | User over budget WITH `overrideAllowed=true` | Routes to cloud + audit log + `messageKey: 'BUDGET_EXCEEDED_OVERRIDDEN'` | +| 5 | Unknown cost (model not in cost table) | Treat cautiously: assume STANDARD class; if user near budget → block | +| 6 | Privacy keyword present | Privacy beats budget — always local regardless | +| 7 | Free tier remaining (when connector exposes it) | Prefer free-tier-remaining provider for cost wins | +| 8 | Budget reset on `resetAt` date | Cron at midnight: `currentSpendUsd=0`, `status=OK`, `resetAt += 1 month` | +| 9 | Org budget exceeded but user has personal headroom | Org budget wins (most restrictive) | +| 10 | Cost dashboard at `/settings/budget` | Shows: monthly cap, spent, trend, projected hit date | +| 11 | Admin dashboard at `/admin/cost-budgets` | List all users + spend + status; filter by org | +| 12 | Cost/quality slider (`0=cheapest, 100=best`) | Affects scoring engine weight | + +## Endpoint contract + +```http +GET /api/v1/routing/cost-budget/me (current user's budget) +GET /api/v1/routing/cost-budget/me/forecast (projected hit date + trend) +PATCH /api/v1/routing/cost-budget/me (user updates personal cap if allowed) +POST /api/v1/routing/cost-budget (admin: create budget for any user/org) +PATCH /api/v1/routing/cost-budget/:id (admin: update) +GET /api/v1/routing/cost-budget (admin: list all with filters) +POST /api/v1/routing/cost-budget/check (internal: pre-routing gate) +``` + +## Free-tier awareness (blocked on connector-service work) + +Stream blocker B2 in master plan. When connector-service exposes `freeTierRemaining`: + +```typescript +// Score boost for providers with free tier +if (candidate.freeTierRemaining > 0 && candidate.costClass !== 'FREE') { + candidate.score += 0.1; // small boost when free tier still available +} +``` + +## RabbitMQ events + +``` +cost_budget.created { budgetId, scope, ownerId, monthlyCapUsd } +cost_budget.spend_incremented { budgetId, deltaUsd, totalUsd, percentOfCap } +cost_budget.warning_threshold_crossed { budgetId, percentOfCap } +cost_budget.exceeded { budgetId, forcedLocal: boolean, overrideUsed: boolean } +cost_budget.reset { budgetId, previousSpend, newResetAt } +``` + +## Frontend + +- `/settings/budget` — user page (own budget) +- `/admin/cost-budgets` — admin page (all budgets) +- Warning banner in chat composer when budget at 80%+ +- Locked-state in model selector when over budget without override +- "Budget exceeded" empty state with "Switch to local model" CTA + +## Rollback + +`ROUTING_R4_COST_BUDGET_ENABLED=false` → budget gate skipped; all routes proceed as before. DB rows remain (no destructive change). + +## Risks + +| # | Risk | Mitigation | +|---|------|------------| +| 1 | Cost estimate wrong (token count off) | Use generous upper bound; log actual vs estimated for calibration | +| 2 | Race condition: 2 concurrent requests both pass budget check | Increment is post-execution; minor over-cap acceptable; weekly cleanup script | +| 3 | User can't access their work near reset | "Override" toggle for "this 24h period" | +| 4 | Privacy keyword + over-budget conflict | Privacy always wins — never expose data to cloud even if user opts in | diff --git a/docs/15-ai-context/routing-flagship-streams/06-r5-operator-playground.md b/docs/15-ai-context/routing-flagship-streams/06-r5-operator-playground.md new file mode 100644 index 00000000..3213eff7 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/06-r5-operator-playground.md @@ -0,0 +1,180 @@ +# Stream 06 — R.5 Operator Playground + Transparency + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/06_R5_operator_playground_transparency.md` + +## Mission + +Give operators and end users transparency into routing decisions. Add a "Try a message" playground for operators, surface "Why this model?" explanations in chat, and add the missing admin pages (taxonomy CRUD, circuit-breaker dashboard, category drill-down). + +## Surfaces to ship + +| Surface | Type | Path | Status today | New file paths | +|---------|------|------|--------------|----------------| +| Playground | FE page | `/routing/playground` | missing | `apps/claw-frontend/src/app/(portal)/routing/playground/page.tsx` | +| "Why this model?" | FE component | inline in chat message | missing | `apps/claw-frontend/src/components/chat/why-this-model.tsx` | +| Category drill-down | FE filter | `/routing` recent list | missing | extend `use-routing-decisions.ts` + add filter to existing page | +| Circuit-breaker dashboard | FE page | `/routing/circuit-breakers` | missing (BE exists) | `apps/claw-frontend/src/app/(portal)/routing/circuit-breakers/page.tsx` | +| Taxonomy admin UI | FE page | `/routing/taxonomy` | missing (BE exists) | `apps/claw-frontend/src/app/(portal)/routing/taxonomy/page.tsx` | +| Per-user routing history | FE page | `/settings/routing-history` | missing | `apps/claw-frontend/src/app/(portal)/settings/routing-history/page.tsx` | +| "Rerun decision" button | FE button on recent decision row | existing | missing | extend `routing-decision-row.tsx` | +| "Save playground case as fixture" | FE button on playground | new | missing | inside playground page | +| "Compare policies on sample prompt" | FE page | `/routing/policy-compare` | missing | `apps/claw-frontend/src/app/(portal)/routing/policy-compare/page.tsx` | + +## Playground API contract + +```http +POST /api/v1/routing/playground/evaluate +{ + "message": "Write a Python function to sort a list", + "attachments": [], + "userMode": "AUTO", + "compareWithV2": true, + "compareWithOllamaRouter": true +} + +Response 200: +{ + "v1Decision": { /* full RoutingDecision */ }, + "v2Decision": { /* full RoutingDecisionV2 */ }, + "ollamaRouterDecision": { /* OllamaRouter raw output */ }, + "scoreBreakdown": [ + { "provider": "Anthropic", "model": "claude-sonnet-4", + "scores": { "quality": 0.92, "cost": 0.6, "latency": 0.75, "privacy": 0.5, "familiarity": 0.8 }, + "totalScore": 0.74, "wasChosen": true }, + { "provider": "OpenAI", "model": "gpt-4o-mini", + "scores": { "quality": 0.7, "cost": 0.95, "latency": 0.9, "privacy": 0.5, "familiarity": 0.6 }, + "totalScore": 0.71, "wasChosen": false } + ], + "candidateList": [...full ranked list...], + "modalityResult": {...}, + "workflowChoice": {...} +} +``` + +## "Why this model?" inline + +For every chat message, add a small `ⓘ` button that expands to: + +``` +Routed to claude-sonnet-4 (Anthropic) because: +• Category: Coding (CONFIDENCE_EXACT_KEYWORD: "function", "sort", "list") +• Workflow: DIRECT_LLM (no special workflow triggered) +• Learned bias: +0.05 (you positively rated claude-sonnet-4 on coding 8/10 times) +• Cost class: STANDARD ($0.003 estimated) +• Privacy: PUBLIC_OK (no PII detected) +[ Rerun decision ] [ See full reasoning ] +``` + +## Files to add (no scaffold this round — frontend skeletons + BE playground module) + +Backend scaffold: + +``` +apps/claw-routing-service/src/modules/playground/ (NEW MODULE) +├── playground.module.ts +├── controllers/ +│ └── playground.controller.ts (POST /routing/playground/evaluate) +├── services/ +│ └── playground.service.ts +├── dto/ +│ └── playground-evaluate.dto.ts +└── types/ + └── playground.types.ts +``` + +Frontend pages (NOT scaffolded — needs i18n + auth wiring): + +``` +apps/claw-frontend/src/app/(portal)/routing/playground/page.tsx +apps/claw-frontend/src/app/(portal)/routing/circuit-breakers/page.tsx +apps/claw-frontend/src/app/(portal)/routing/taxonomy/page.tsx +apps/claw-frontend/src/app/(portal)/routing/policy-compare/page.tsx +apps/claw-frontend/src/app/(portal)/settings/routing-history/page.tsx +apps/claw-frontend/src/components/chat/why-this-model.tsx +apps/claw-frontend/src/hooks/routing/use-playground.ts +apps/claw-frontend/src/hooks/routing/use-circuit-breakers.ts +apps/claw-frontend/src/hooks/routing/use-taxonomy-admin.ts +apps/claw-frontend/src/hooks/routing/use-routing-history-mine.ts +``` + +## Acceptance criteria + +| # | Test | Expected | +|---|------|----------| +| 1 | Playground happy path: type message → submit | v1+v2+ollama all return; score breakdown visible | +| 2 | Playground does NOT actually execute the chat model | only the routing decision; no token spend | +| 3 | "Why this model?" expands in chat | shows reasonTags + cost estimate + workflow | +| 4 | Category filter on `/routing` | filtering to "Coding" shows only Coding-routed decisions | +| 5 | Circuit-breaker dashboard shows current state | OPEN/HALF_OPEN/CLOSED + recent failures + manual reset button | +| 6 | Taxonomy CRUD | admin can create/edit/delete TaxonomyRole entries via UI | +| 7 | Per-user routing history at `/settings/routing-history` | user sees their own decisions, with override option | +| 8 | Rerun decision button | hits `POST /routing/evaluate` with same context, shows new decision side-by-side | +| 9 | Save playground case as fixture | converts the playground decision into a replay-fixture row | +| 10 | Dark mode / mobile / RTL | all surfaces pass | + +## i18n keys (need real translations in all 8 locales) + +```typescript +{ + routing: { + playground: { + title: 'Routing Playground', + messageLabel: 'Message', + compareV2Label: 'Compare with v2 evaluator', + compareOllamaLabel: 'Compare with Ollama router', + submitButton: 'Evaluate routing', + v1Heading: 'v1 decision (current production)', + v2Heading: 'v2 decision (shadow / canary)', + ollamaHeading: 'Ollama-router decision', + scoreBreakdownHeading: 'Score breakdown', + candidateListHeading: 'All candidates', + saveAsFixtureButton: 'Save as regression fixture', + reasonTagsLabel: 'Reason tags', + }, + whyThisModel: { + heading: 'Why this model?', + categoryLabel: 'Category', + workflowLabel: 'Workflow', + learnedBiasLabel: 'Learned bias', + costEstimateLabel: 'Cost estimate', + privacyLabel: 'Privacy class', + rerunButton: 'Rerun decision', + fullReasoningButton: 'See full reasoning', + }, + circuitBreakers: { + title: 'Circuit Breakers', + stateColumn: 'State', + providerColumn: 'Provider', + failureCountColumn: 'Failures', + lastFailureAtColumn: 'Last failure', + opensUntilColumn: 'Opens until', + manualResetButton: 'Reset manually', + states: { CLOSED: 'Closed', OPEN: 'Open', HALF_OPEN: 'Half-open' }, + }, + taxonomy: { + title: 'Taxonomy Admin', + roles: { /* ... */ }, + domains: { /* ... */ }, + }, + routingHistoryMine: { + title: 'My Routing History', + empty: 'No routing decisions yet', + overrideButton: 'Always route X to Y for me', + }, + }, +} +``` + +## Tests + +``` +apps/claw-routing-service/src/modules/playground/services/__tests__/playground.service.spec.ts +qa/test-routing-r5-playground.sh +apps/claw-frontend/e2e/routing-playground.spec.ts (Playwright) +apps/claw-frontend/e2e/routing-circuit-breakers.spec.ts +apps/claw-frontend/e2e/why-this-model.spec.ts +``` + +## Rollback + +Per-surface flag: `ROUTING_R5_PLAYGROUND_ENABLED`, `ROUTING_R5_EXPLANATION_IN_CHAT_ENABLED`, etc. Each surface can be hidden via flag without touching code. diff --git a/docs/15-ai-context/routing-flagship-streams/07-r6-multi-tenant-fleet.md b/docs/15-ai-context/routing-flagship-streams/07-r6-multi-tenant-fleet.md new file mode 100644 index 00000000..e081aed8 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/07-r6-multi-tenant-fleet.md @@ -0,0 +1,103 @@ +# Stream 07 — R.6 Multi-Tenant Fleet Routing + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/07_R6_multi_tenant_fleet_routing.md` + +## Mission + +Add org-scoped routing policies, allowed-provider rules, org budgets, org rate limits, and policy propagation. Today policies are global; this stream lets fleet admins manage routing for their org. + +**BLOCKER B4:** Requires `User.orgId` end-to-end in auth-service. `Organization` + `OrganizationMember` already exist in agent-service but are isolated there. Must be lifted into auth-service OR cross-service-shared first. + +## Schema changes (see PRISMA_FUTURE_MODELS.md) + +- `RoutingPolicy`: add nullable `orgId` column + index `(orgId, isActive, priority)` +- New `OrgProviderRule` table (ALLOW/DENY per provider per org) +- New `OrgRateLimit` table (req/min per org) +- (Stream 05) `UserCostBudget.scope='ORG'` already supports org budgets + +## Policy resolution order (most-restrictive wins) + +``` +1. Explicit user override (per-thread settings) → wins immediately +2. Privacy hard constraints (PRIVACY_KEYWORDS) → wins (never cloud) +3. Org DENY rules (org banned this provider) → block + fallback +4. Org policy (orgId-scoped, highest priority) → may override AUTO +5. Org ALLOW list (if set, restricts choices) → filter candidates +6. Global policy (orgId=null, priority-ordered) → may override AUTO +7. Safe defaults (existing v1 behavior) +``` + +## Files to add / modify + +``` +apps/claw-routing-service/src/modules/routing/ +├── managers/ +│ └── policy-resolution.manager.ts (NEW — replaces inline applyPolicies) +├── repositories/ +│ └── org-provider-rule.repository.ts (NEW) +│ └── org-rate-limit.repository.ts (NEW) +├── dto/ +│ ├── create-org-policy.dto.ts (NEW — extends create-policy.dto) +│ ├── create-org-provider-rule.dto.ts (NEW) +│ └── update-org-rate-limit.dto.ts (NEW) +└── types/ + └── policy-resolution.types.ts (NEW) +``` + +## Acceptance criteria + +| # | Test | Expected | +|---|------|----------| +| 1 | Org admin creates policy `forceClaude` for orgX | All orgX users see Anthropic on next request; non-orgX users unaffected | +| 2 | Org admin adds DENY rule for OpenAI on orgX | All orgX users get fallback away from OpenAI; reasonTag `org_provider_denied` | +| 3 | Existing global policies still apply | When user has no org, global behavior unchanged | +| 4 | Org rate limit 60/min exceeded | Routing returns 429 with messageKey `ORG_RATE_LIMIT_EXCEEDED` | +| 5 | Privacy keyword + org ALLOW=[OpenAI] | Privacy wins → local route; org rule ignored | +| 6 | Org has both allow and deny | DENY wins; ALLOW further restricts remaining | +| 7 | User in 2 orgs (future) | Org with most-restrictive policy applies | +| 8 | Org policy CRUD via UI | Admin UI allows org-admin role to create/edit policies for their org only | +| 9 | Cross-org isolation | Org admin from orgA cannot see/edit orgB policies | +| 10 | Policy propagation latency | Within 60s of admin save, all org devices see new policy | +| 11 | Audit on org policy change | RabbitMQ `routing.org_policy.changed` published | + +## Endpoint contract + +```http +POST /api/v1/routing/policies (existing — now accepts orgId in body) +GET /api/v1/routing/policies?orgId=X (existing — filter) +POST /api/v1/routing/orgs/:orgId/provider-rules +GET /api/v1/routing/orgs/:orgId/provider-rules +DELETE /api/v1/routing/orgs/:orgId/provider-rules/:id +PUT /api/v1/routing/orgs/:orgId/rate-limit +GET /api/v1/routing/orgs/:orgId/rate-limit +``` + +## Tests + +``` +apps/claw-routing-service/src/modules/routing/managers/__tests__/policy-resolution.manager.spec.ts + - org policy overrides global when both match + - org DENY blocks despite ALLOW + - privacy keyword beats all + - org rate limit returns 429 + - user with no org uses global only + - cross-org isolation in queries + +qa/test-routing-r6-multi-tenant.sh + - 2 orgs, each with different policies + - assert orgA user gets orgA policy, orgB user gets orgB policy + - assert rate limit blocks 61st request in 1 minute +``` + +## Rollback + +`ROUTING_R6_MULTI_TENANT_ENABLED=false` → policy-resolution falls back to existing global-only behavior. Org-tagged policies are still in DB but ignored. + +## Risks + +| # | Risk | Mitigation | +|---|------|------------| +| 1 | Blocker B4: User.orgId not in auth-service | Build this stream after auth-service org work | +| 2 | Policy resolution complexity creates O(N²) scan | Index `(orgId, isActive, priority)`; cache org policies in Redis 30s | +| 3 | Org admin in orgA accidentally affects orgB | Strict orgId filter in repository layer; integration test | +| 4 | Rate limit storage | In-memory sliding window per orgId + Redis backup for distributed | diff --git a/docs/15-ai-context/routing-flagship-streams/08-r7-i18n-non-english.md b/docs/15-ai-context/routing-flagship-streams/08-r7-i18n-non-english.md new file mode 100644 index 00000000..bfd53512 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/08-r7-i18n-non-english.md @@ -0,0 +1,133 @@ +# Stream 08 — R.7 i18n Non-English Routing + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/08_R7_i18n_non_english_routing.md` + +## Mission + +Stop treating every message as English. Detect language; route to language-aware models; expand keyword arrays for top non-EN languages; track language confidence on every decision. + +## Files to add (scaffold included) + +``` +apps/claw-routing-service/src/modules/language-detection/ (NEW MODULE) +├── language-detection.module.ts +├── controllers/ +│ └── language-detection.controller.ts (POST /routing/detect-language) +├── services/ +│ └── language-detection.service.ts +├── managers/ +│ ├── language-classifier.manager.ts (uses cld3 or fastext-lite) +│ └── code-mixed-detector.manager.ts (e.g. Arabic + English technical terms) +├── dto/ +│ └── detect-language.dto.ts +├── types/ +│ └── language-detection.types.ts +├── constants/ +│ ├── language-codes.constants.ts (ISO-639-1 supported set) +│ └── language-rtl.constants.ts (which languages are RTL) +└── utilities/ + └── language-strength-resolver.utility.ts (reads RouterModelRegistry.languageStrengthJson) +``` + +## Schema changes (see PRISMA_FUTURE_MODELS.md) + +- `RoutingDecision`: add `detectedLanguage`, `languageConfidence`, `isCodeMixed` +- `RouterModelRegistry`: add `languageStrengthJson` (per-language quality score) + +## Detection pipeline + +``` +1. Run cld3 (Compact Language Detector) on message → top-1 language + confidence +2. If confidence < 0.6 → tag as 'uncertain', use 'en' as fallback +3. If multi-language (e.g. "كود في Python" Arabic+English) → flag isCodeMixed +4. Save to RoutingDecision.detectedLanguage + languageConfidence +5. Pass language to scoring engine → boost candidates with high languageStrength +6. If RTL language (ar/he/fa/ur) and ROUTING_R7_ARABIC_RTL_HINT_ENABLED → prepend "respond in RTL-friendly format" to system prompt +``` + +## Translated keyword arrays + +Initial scope: Arabic, Spanish, German (top 3 non-EN per audit user demographics). + +``` +apps/claw-routing-service/src/modules/routing/constants/locale-keywords/ +├── ar/coding-keywords.constants.ts ('كود', 'برمجة', 'وظيفة') +├── ar/legal-keywords.constants.ts ('قانون', 'محام', 'عقد') +├── ar/medical-keywords.constants.ts ('طب', 'تشخيص', 'دواء') +├── es/coding-keywords.constants.ts ('código', 'programación', 'función') +├── es/legal-keywords.constants.ts ('legal', 'abogado', 'contrato') +├── es/medical-keywords.constants.ts ('médico', 'diagnóstico', 'medicamento') +├── de/coding-keywords.constants.ts ('Code', 'Programmierung', 'Funktion') +├── de/legal-keywords.constants.ts ('Recht', 'Anwalt', 'Vertrag') +└── de/medical-keywords.constants.ts ('Medizin', 'Diagnose', 'Medikament') +``` + +Activation: `ROUTING_R7_SUPPORTED_LOCALES=en,ar,es,de` (extend per release). + +Roadmap: fr, it, pt, ru, hi, ja, zh-cn (post-stream). + +## Acceptance criteria + +| # | Test | Expected | +|---|------|----------| +| 1 | Arabic message "كود تصنيف صور" | `detectedLanguage=ar`, classified as Coding via AR keywords | +| 2 | Spanish "Necesito un médico" | `detectedLanguage=es`, classified as Medical via ES keywords | +| 3 | German "Wie funktioniert dieses Gesetz?" | `detectedLanguage=de`, classified as Legal via DE keywords | +| 4 | Code-mixed "Write me a 'دالة' in Python" | `isCodeMixed=true`, primary lang detected, both keyword sets scanned | +| 5 | Arabic + routing prefers `claude-opus-4` (high AR strength) | candidate score boosted; opus wins over gemini-flash | +| 6 | RTL message → response system prompt includes RTL hint | when flag enabled | +| 7 | Language confidence < 0.6 | falls through to EN keywords; tag `language_uncertain` | +| 8 | Unsupported locale (e.g. hi for now) | detected + saved + classifier falls back to EN | +| 9 | Performance: detection adds <10ms | cld3 is fast; measure p95 | + +## Endpoint contract + +```http +POST /api/v1/routing/detect-language +{ "message": "مرحبا، كيف يمكنني كتابة دالة Python؟" } + +200: +{ + "detectedLanguage": "ar", + "languageConfidence": 0.97, + "isCodeMixed": true, + "secondaryLanguage": "en", + "secondaryConfidence": 0.12, + "isRtl": true +} +``` + +## RouterModelRegistry language strength seed + +```typescript +{ + 'claude-opus-4': { en: 1.0, ar: 0.92, es: 0.95, de: 0.94, fr: 0.94, hi: 0.85, ja: 0.88, zh: 0.85 }, + 'claude-sonnet-4': { en: 1.0, ar: 0.88, es: 0.93, de: 0.92, fr: 0.92, hi: 0.78, ja: 0.85, zh: 0.82 }, + 'gpt-4o': { en: 1.0, ar: 0.85, es: 0.95, de: 0.94, fr: 0.94, hi: 0.82, ja: 0.92, zh: 0.92 }, + 'gpt-4o-mini': { en: 1.0, ar: 0.70, es: 0.90, de: 0.88, fr: 0.88, hi: 0.65, ja: 0.80, zh: 0.78 }, + 'gemini-2.5-flash': { en: 1.0, ar: 0.78, es: 0.92, de: 0.90, fr: 0.90, hi: 0.80, ja: 0.88, zh: 0.85 }, + 'gemma3:4b': { en: 0.95, ar: 0.65, es: 0.78, de: 0.75, fr: 0.78 }, + 'qwen3:1.7b': { en: 0.92, ar: 0.55, es: 0.70, de: 0.68, fr: 0.70, ja: 0.80, zh: 0.95 }, + 'phi4-mini': { en: 0.95, ar: 0.50, es: 0.72, de: 0.70, fr: 0.72 }, +} +``` + +Seeded via `apps/claw-routing-service/prisma/seed/language-strength.seed.ts`. + +## Tests + +``` +apps/claw-routing-service/src/modules/language-detection/managers/__tests__/language-classifier.manager.spec.ts + - 8 languages detected correctly with confidence > 0.8 + - code-mixed detected + - very short message (1 word) returns 'en' fallback + - empty message returns null + +qa/test-routing-r7-language-detection.sh + - 20 prompts in 8 languages, assert detection correct + - assert routing biases toward stronger-language models +``` + +## Rollback + +`ROUTING_R7_LANGUAGE_DETECTION_ENABLED=false` → endpoint returns null; hot path skips detection; `detectedLanguage` left null on new decisions. diff --git a/docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md b/docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md new file mode 100644 index 00000000..88bb5bb4 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/09-r8-advanced-intelligence.md @@ -0,0 +1,148 @@ +# Stream 09 — R.8 Advanced Routing Intelligence + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/09_R8_advanced_routing_intelligence.md` + +## Mission + +Nine independent sub-features that each make the router smarter. Each ships behind its own flag and can be activated independently. + +## Sub-features + +### 9.1 — Prompt-length-aware filtering + +**Trigger:** routing context arrives with token-count estimate. +**Behavior:** filter out candidates with `contextWindowTokens < estimatedTokens × 1.2` (20% headroom). +**Data needed:** existing `RouterModelRegistry.contextWindowTokens` (already there). +**UI:** in playground score breakdown, show "filtered: insufficient context window". +**Tests:** 100k-token prompt cannot route to 4k-context model; near-boundary uses cushion. +**Flag:** `ROUTING_R8_PROMPT_LENGTH_FILTER_ENABLED`. +**Rollback:** flag off. + +### 9.2 — Latency-based circuit breaker + +**Trigger:** rolling p95 latency for provider exceeds `ROUTING_R8_LATENCY_CIRCUIT_THRESHOLD_MS` (default 15000). +**Behavior:** open circuit; same lifecycle as existing failure-based breaker (HALF_OPEN probe → CLOSE on success). +**Data needed:** existing `RouterCircuitBreaker` table extended with `trigger='LATENCY_P95'` + `latencyThresholdMs` column. +**UI:** circuit-breaker dashboard (stream R.5) shows trigger reason. +**Tests:** simulate slow provider → assert circuit opens after threshold breach. +**Flag:** `ROUTING_R8_LATENCY_CIRCUIT_BREAKER_ENABLED`. +**Rollback:** flag off; failure-based circuit unchanged. + +### 9.3 — Switch model mid-stream + +**Trigger:** first SSE chunk takes >`ROUTING_R8_MID_STREAM_FIRST_CHUNK_MAX_MS` (default 5000). +**Behavior:** kill the in-flight call; immediately reroute to next-best candidate; UI shows "switched to faster model" inline notice. +**Data needed:** chat-service must support call cancellation mid-stream. +**Risk:** lose partial output; user sees brief flicker. +**Flag:** `ROUTING_R8_MID_STREAM_SWITCH_ENABLED`. +**Rollback:** flag off — stay with original model regardless. + +### 9.4 — Fine-tuned model preference per user + +**Trigger:** user has a `UserFineTunePreference` row for the detected domain. +**Behavior:** boost their fine-tune in scoring engine by `weight` × `learnedBiasWeightMax`. +**Data needed:** new `UserFineTunePreference` table (see PRISMA_FUTURE_MODELS.md 09.4). +**UI:** `/settings/fine-tunes` — manage own preferences. +**Tests:** user with fine-tune for "legal" → routes to their fine-tune over claude-opus-4 for legal questions. +**Flag:** `ROUTING_R8_FINE_TUNE_PREFERENCE_ENABLED`. +**Rollback:** flag off. + +### 9.5 — Region-aware routing + +**Trigger:** routing context has `userRegion` (EU/US/APAC) OR org has region preference. +**Behavior:** prefer regional endpoint (e.g. Bedrock us-east-1 vs eu-west-1) for GDPR compliance + latency win. +**Data needed:** new `RouterRegionPreference` table + connector-service must expose region endpoints (Blocker B5). +**Tests:** EU user gets eu-west-1; GDPR-tagged data forces EU even for US-based user. +**Flag:** `ROUTING_R8_REGION_AWARE_ROUTING_ENABLED`. +**Rollback:** flag off — default region used. + +### 9.6 — Multi-intent splitter + +**Trigger:** message has 2+ detected intents (e.g. "code + image", "translate + summarize"). +**Behavior:** split into N parallel routed calls; merge results into a single response. +**Data needed:** existing `MULTI_INTENT_PRIORITY` constants + chat-service must support parallel-result merging. +**UI:** message shows N pills, one per sub-intent, with per-sub-result. +**Tests:** "write code AND a marketing email" → 2 routes; "translate AND summarize" → 2 routes. +**Flag:** `ROUTING_R8_MULTI_INTENT_SPLITTER_ENABLED`. +**Rollback:** flag off — picks priority winner only (current behavior). + +### 9.7 — Embedding-task routing + +**Trigger:** `EMBEDDING` modality detected (Stream R.2). +**Behavior:** new evaluator branch picks embedding-specific model (text-embedding-3-large / nomic-embed-text / etc.). +**Data needed:** RouterModelRegistry must include embedding models. +**Tests:** "embed this paragraph" → text-embedding-3-large; "use local" → nomic-embed-text. +**Flag:** `ROUTING_R8_EMBEDDING_ROUTING_ENABLED`. +**Rollback:** flag off — embeddings get classified as text routing (suboptimal but not broken). + +### 9.8 — Ensemble consensus mode + +**Trigger:** new `RoutingMode.CONSENSUS` OR high-stakes domain (medical/legal) auto-promoted. +**Behavior:** fire 3 models in parallel; score agreement; return highest-confidence answer + agreement score. +**Data needed:** chat-service parallel infrastructure (exists for `/chat-messages/parallel`). +**Tests:** medical question → 3 models agree → high-confidence; 3 disagree → flag as uncertain. +**Flag:** `ROUTING_R8_CONSENSUS_MODE_ENABLED`. +**Rollback:** flag off — falls back to JUDGE_PIPELINE or DIRECT_LLM. + +### 9.9 — Cost / quality slider + +**Trigger:** user has set a slider value 0-100 in `/settings`. +**Behavior:** scoring engine uses `qualityWeight = slider/100`, `costWeight = 1 - qualityWeight`. +**Data needed:** new `UserCostQualitySlider` table (see PRISMA_FUTURE_MODELS.md 09.9). +**UI:** `/settings/routing-preferences` — slider with live preview. +**Tests:** slider=0 → cheapest; slider=100 → best quality; slider=50 → balanced. +**Flag:** `ROUTING_R8_COST_QUALITY_SLIDER_ENABLED`. +**Rollback:** flag off — uses default 0.5 weight. + +## Module structure (scaffold included) + +``` +apps/claw-routing-service/src/modules/intelligence/ (NEW MODULE — umbrella for 9 sub-features) +├── intelligence.module.ts +├── managers/ +│ ├── prompt-length-guard.manager.ts (9.1) +│ ├── latency-circuit-breaker.manager.ts (9.2) +│ ├── mid-stream-switcher.manager.ts (9.3) +│ ├── fine-tune-preference.manager.ts (9.4) +│ ├── region-router.manager.ts (9.5) +│ ├── multi-intent-splitter.manager.ts (9.6) +│ ├── embedding-router.manager.ts (9.7) +│ ├── consensus-mode.manager.ts (9.8) +│ └── cost-quality-slider.manager.ts (9.9) +├── types/ +│ └── intelligence.types.ts +└── constants/ + └── intelligence.constants.ts +``` + +## Acceptance per sub-feature + +| # | Feature | Acceptance | +|---|---------|------------| +| 9.1 | Prompt length | 100k-token msg → only 128k+ context candidates | +| 9.2 | Latency circuit | p95 > 15s for 5 min → circuit OPEN | +| 9.3 | Mid-stream switch | First chunk > 5s → killed + rerouted | +| 9.4 | Fine-tune | User with fine-tune for legal → biased toward it | +| 9.5 | Region | EU user → EU endpoint | +| 9.6 | Multi-intent | 2 intents → 2 parallel calls | +| 9.7 | Embedding | Embedding intent → embedding model | +| 9.8 | Consensus | 3 models fire; agreement scored | +| 9.9 | Slider | Slider value affects scoring weight | + +## Tests + flag matrix + +``` +qa/test-routing-r8.1-prompt-length.sh +qa/test-routing-r8.2-latency-circuit.sh +qa/test-routing-r8.3-mid-stream-switch.sh +qa/test-routing-r8.4-fine-tune.sh +qa/test-routing-r8.5-region.sh +qa/test-routing-r8.6-multi-intent.sh +qa/test-routing-r8.7-embedding.sh +qa/test-routing-r8.8-consensus.sh +qa/test-routing-r8.9-cost-quality-slider.sh +``` + +## Rollback + +Each sub-feature has its own flag. Disabling one does NOT affect the others. diff --git a/docs/15-ai-context/routing-flagship-streams/10-r9-quality-hardening.md b/docs/15-ai-context/routing-flagship-streams/10-r9-quality-hardening.md new file mode 100644 index 00000000..dba44471 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/10-r9-quality-hardening.md @@ -0,0 +1,142 @@ +# Stream 10 — R.9 Quality + Reliability Hardening + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/10_R9_quality_reliability_hardening.md` + +## Mission + +Make the routing service production-grade with real test coverage, regression suites, load tests, drift detection, and release gates. + +## Test coverage targets + +| Component | Current (est) | Target initial | Target final | +|-----------|--------------:|---------------:|-------------:| +| `routing.manager.ts` (1635 LOC) | ~10% | 80% | 92% | +| `route-evaluator/` v2 | ~30% | 80% | 92% | +| `scoring/scoring-engine.manager.ts` | ~20% | 80% | 92% | +| `workflows/workflow-orchestrator.manager.ts` | 0% (scaffold) | 80% (after R.3) | 92% | +| `classifier/classifier.manager.ts` | ~40% | 85% | 92% | +| `learning-loop/learning-loop.manager.ts` | ~30% | 80% | 92% | +| `reliability/circuit-breaker.manager.ts` | ~50% | 85% | 92% | +| `sync/router-sync.manager.ts` | ~40% | 80% | 92% | +| `observability/observability.service.ts` | ~30% | 75% | 90% | + +## Test artifacts to ship + +``` +apps/claw-routing-service/test/ +├── fixtures/ +│ ├── routing-500-prompts.json (500-prompt regression set) +│ ├── multilingual-routing-prompts.json (top 8 languages, 25 each) +│ ├── multimodal-routing-prompts.json (PDF/YouTube/audio/video × 25 each) +│ ├── replay-confirmed-regressions.json (promoted suspicious cases) +│ └── provider-mock-responses.json (mocks for OpenAI/Anthropic/Gemini/etc.) +├── load/ +│ ├── routing-decision-load.k6.js (k6 script: 100 req/s × 10 min) +│ ├── routing-decision-spike.k6.js (spike: 500 req/s × 30s) +│ └── README.md (how to run, expected p95) +├── integration/ +│ ├── routing-flagship-end-to-end.spec.ts (50 prompts through full v2 pipeline) +│ ├── routing-fallback-chain.spec.ts (force each provider to fail in turn) +│ └── routing-canary-comparison.spec.ts (assert v1 vs v2 outcomes) +└── regression/ + ├── routing-regression-runner.ts (re-runs 500-prompt set; outputs diff) + └── routing-drift-detector.ts (compares against baseline snapshot) +``` + +## QA scripts to add + +``` +qa/test-routing-r9-coverage.sh — npm test:cov; fails if <80% +qa/test-routing-r9-regression-500.sh — fires 500 prompts; asserts ≥99% stable outcome +qa/test-routing-r9-load.sh — runs k6 load test; asserts p95 < 50ms +qa/test-routing-r9-drift.sh — runs drift detector; alerts if accuracy drop > 2% +qa/test-routing-r9-provider-mock.sh — runs against mock provider suite (no cloud cost) +qa/test-routing-r9-migration-rollback.sh — applies + rolls back each migration; asserts no data loss +``` + +## CI integration + +Add to `.github/workflows/ci.yml`: + +```yaml +jobs: + routing-regression: + runs-on: ubuntu-latest + needs: [build] + if: github.event_name == 'pull_request' + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + - run: cd apps/claw-routing-service && npm run test:cov + - run: cd apps/claw-routing-service && bash ../../qa/test-routing-r9-regression-500.sh + - name: Block PR on regression + run: | + REGRESSION_COUNT=$(jq '.bad_regression' apps/claw-routing-service/test/regression/results.json) + if [ "$REGRESSION_COUNT" -gt "0" ]; then + echo "FAIL: $REGRESSION_COUNT regressions" + exit 1 + fi +``` + +## Drift detection + +Cron `0 4 * * *` (4 AM daily): + +``` +1. Run routing on the 500-prompt baseline fixture set +2. Compare to last RoutingCalibrationSnapshot +3. If accuracy delta > 2% → emit `routing.drift.detected` event +4. Alert via existing audit channel +5. Generate `claudePrompt` for diagnosis (similar to Replay Lab export) +``` + +## Provider mock suite + +``` +apps/claw-routing-service/test/mocks/ +├── openai.mock.ts (deterministic responses for routing tests) +├── anthropic.mock.ts +├── gemini.mock.ts +├── deepseek.mock.ts +├── grok.mock.ts +├── ollama.mock.ts +└── llamacpp.mock.ts +``` + +Used by integration + regression tests so they don't cost real API tokens. + +## Release gates + +Add `docs/16-quality-engineering/ROUTING_RELEASE_GATE.md`: + +``` +RELEASE GATE — routing-service + +MUST PASS before any merge to main: + +[ ] npm run typecheck → 0 errors +[ ] npm run lint → 0 errors +[ ] npm run test:cov → ≥80% on all 4 metrics (statements/branches/functions/lines) +[ ] qa/test-routing-r9-regression-500.sh → 0 confirmed regressions +[ ] qa/test-routing-r9-load.sh → p95 < 50ms +[ ] qa/test-routing-r9-drift.sh → no drift alert +[ ] qa/test-routing-r9-migration-rollback.sh → no data loss +[ ] Docker logs check → 0 UnhandledPromiseRejection +[ ] CHANGELOG updated +[ ] Per-stream docs updated for any flag activation +``` + +## Acceptance + +| # | Test | Expected | +|---|------|----------| +| 1 | npm run test:cov in clean repo | ≥80% statements/branches | +| 2 | Add a routing regression by hand-editing routing.constants.ts | 500-prompt regression suite catches it; PR blocked | +| 3 | Run k6 load test | p95 routing decision < 50ms at 100 req/s | +| 4 | Drift detector against frozen baseline | Identifies the change; emits alert | +| 5 | Add a new migration + rollback test | Both forward + backward succeed without data loss | +| 6 | Provider mock test (no live cloud) | Tests pass deterministically; runs in CI | + +## Rollback + +These are quality + CI artifacts, not features. "Rollback" = remove from CI workflow (doesn't affect runtime). diff --git a/docs/15-ai-context/routing-flagship-streams/11-quick-wins.md b/docs/15-ai-context/routing-flagship-streams/11-quick-wins.md new file mode 100644 index 00000000..6d87c5a2 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/11-quick-wins.md @@ -0,0 +1,167 @@ +# Stream 11 — Quick Wins Backlog + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/11_quick_wins_backlog.md` + +## Mission + +10 single-day tickets that move the needle without requiring full streams. Designed to ship safely without dependencies. + +--- + +## 11.1 — Wire `WorkflowKind` to `RoutingDecision` + +**Scope:** add `selectedWorkflow` field to RoutingDecision; for now always set `DIRECT_LLM`. Unlocks future R.3 work + UI badge in stream R.5. + +**Files:** +- `apps/claw-routing-service/prisma/schema.prisma` — add `selectedWorkflow WorkflowKind?` +- `apps/claw-routing-service/src/modules/routing/managers/routing.manager.ts` — set `DIRECT_LLM` on every decision +- `apps/claw-frontend/src/types/routing.types.ts` — add field +- `apps/claw-frontend/src/components/routing/routing-decision-row.tsx` — render badge + +**Tests:** existing route tests assert `selectedWorkflow=DIRECT_LLM` on response. +**Acceptance:** every new RoutingDecision row has non-null `selectedWorkflow`. +**Rollback:** field is nullable; remove badge from UI; no schema rollback needed. +**Done checklist:** typecheck + lint + test + visual confirm in `/routing`. + +--- + +## 11.2 — Surface `RoutingDecision.explanation` in chat message header + +**Scope:** small `ⓘ` icon next to provider/model badge in chat-message bubble; click expands to existing `explanation` text. + +**Files:** +- `apps/claw-frontend/src/components/chat/message-bubble.tsx` — add icon + popover +- `apps/claw-frontend/src/hooks/chat/use-message-explanation.ts` — new hook; uses existing decision in props +- i18n keys in 8 locales for "Why this model?" + +**Acceptance:** user clicks ⓘ → sees explanation; closes on outside click. +**Rollback:** hide icon via flag. +**Done checklist:** Playwright test for popover open/close + a11y check. + +--- + +## 11.3 — Add `/routing/playground` page (skeleton only) + +**Scope:** new page that calls existing `/routing/evaluate` (v1) endpoint with a textarea + submit; renders decision JSON. + +**Files:** +- `apps/claw-frontend/src/app/(portal)/routing/playground/page.tsx` +- `apps/claw-frontend/src/hooks/routing/use-playground.ts` +- `apps/claw-frontend/src/components/routing/playground-result-display.tsx` +- i18n keys + +**Acceptance:** operator can paste message → see v1 decision; no chat-model call (no cost). +**Note:** v2 / Ollama side-by-side from Stream R.5 lands later. This is the skeleton. +**Rollback:** hide nav entry via flag. + +--- + +## 11.4 — Add YouTube URL detection (regex only, no provider) + +**Scope:** detect YouTube URLs in message; set `detectedModality=YOUTUBE_INPUT` on decision; log a TODO line. No actual transcript fetching. + +**Files:** +- `apps/claw-routing-service/src/modules/routing/managers/routing.manager.ts` — single regex check before other detection +- `apps/claw-routing-service/src/modules/routing/constants/youtube-url.constants.ts` — regex (already scaffolded in R.2) + +**Acceptance:** YouTube URL → decision has `detectedModalities` including `YOUTUBE_INPUT`; log line says `youtube_detected_workflow_not_implemented`. +**Rollback:** remove regex check. +**Note:** real workflow needs R.2 + R.3. + +--- + +## 11.5 — File-attachment MIME → workflow hint + +**Scope:** if request has attachment with `application/pdf` MIME + verb (`summarize|explain`) → set `workflowHint=PDF_EXTRACTION` on decision. No actual PDF extraction wired. + +**Files:** +- `apps/claw-routing-service/src/modules/routing/managers/routing.manager.ts` — single check before AUTO +- Wire attachments into `RoutingContext` (chat-service change required — see Blocker B1) + +**Acceptance:** PDF + summarize → decision has `workflowHint=PDF_EXTRACTION`. +**Rollback:** remove check. +**Risk:** Blocker B1 — chat-service must pass attachment metadata in routing context. + +--- + +## 11.6 — Add category filter to `useRoutingDecisions` + +**Scope:** add `categoryFilter?: string` query param to `GET /routing/decisions/:threadId`; pipe through to repository. + +**Files:** +- `apps/claw-routing-service/src/modules/routing/controllers/routing.controller.ts` — accept query param +- `apps/claw-routing-service/src/modules/routing/repositories/routing-decisions.repository.ts` — add WHERE clause +- `apps/claw-frontend/src/hooks/routing/use-routing-decisions.ts` — pass filter +- `apps/claw-frontend/src/app/(portal)/routing/page.tsx` — dropdown filter UI + +**Acceptance:** filter by "Coding" → only Coding decisions shown. +**Rollback:** remove filter UI; query param ignored. + +--- + +## 11.7 — Provider+model chart on adaptive-insights + +**Scope:** new chart on `/routing/adaptive-insights` showing per-(provider, model) success/failure/latency over 7/30/90-day window. + +**Files:** +- `apps/claw-frontend/src/components/routing/provider-model-chart.tsx` — new component using recharts (already in deps) +- `apps/claw-frontend/src/app/(portal)/routing/adaptive-insights/page.tsx` — add the chart +- Backend already exposes this data via existing observability endpoint + +**Acceptance:** chart renders with real data; hover shows tooltip. +**Rollback:** hide chart via flag. + +--- + +## 11.8 — Promote 5 confirmed regressions to test fixtures + +**Scope:** operational ticket — pick 5 high-value confirmed regressions from Replay Lab, run "Promote to fixture" on each, commit the generated test code. + +**Files:** +- `apps/claw-routing-service/test/regression/promoted-fixtures.spec.ts` — appended with 5 new test cases + +**Acceptance:** 5 new tests in repo; all pass; PR-blocking on regression. +**Rollback:** revert the commit. + +--- + +## 11.9 — "Rerun decision" button on `/routing` recent list + +**Scope:** small refresh icon on each row; clicking re-fires `/routing/evaluate` with the original context; shows old vs new decision in modal. + +**Files:** +- `apps/claw-frontend/src/components/routing/routing-decision-row.tsx` — add button +- `apps/claw-frontend/src/hooks/routing/use-rerun-decision.ts` — new mutation hook +- `apps/claw-frontend/src/components/routing/rerun-decision-modal.tsx` — comparison modal + +**Acceptance:** click → modal shows old vs new side by side. +**Rollback:** hide button. + +--- + +## 11.10 — Detect non-English text and tag decision + +**Scope:** run cheap character-set detector (no lib needed: count non-ASCII chars / total chars > 0.3 → mark as non-EN); save `detectedLanguage='non-en'` flag on decision. Real language detection comes in R.7. + +**Files:** +- `apps/claw-routing-service/src/modules/routing/utilities/quick-language-detector.utility.ts` — new pure utility +- `apps/claw-routing-service/src/modules/routing/managers/routing.manager.ts` — call before other detection + +**Acceptance:** Arabic/Spanish/German message → decision has `detectedLanguage='non-en'`; baseline data for R.7 work. +**Rollback:** remove the call. + +--- + +## Activation order (no dependencies) + +All 10 are independent; ship in any order. Suggest: +1. 11.1 (workflow on decision) — unlocks UI badges +2. 11.6 (category filter) — easy admin win +3. 11.7 (provider+model chart) — visible improvement +4. 11.2 ("why this model?") — user-visible +5. 11.9 (rerun button) — operator win +6. 11.10 (non-EN tag) — sets up R.7 data +7. 11.4 (YouTube regex) — sets up R.2 data +8. 11.3 (playground skeleton) — sets up R.5 +9. 11.8 (promote regressions) — anytime +10. 11.5 (PDF MIME hint) — blocked on chat-service work diff --git a/docs/15-ai-context/routing-flagship-streams/12-business-roadmap.md b/docs/15-ai-context/routing-flagship-streams/12-business-roadmap.md new file mode 100644 index 00000000..b9b10d32 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/12-business-roadmap.md @@ -0,0 +1,135 @@ +# Stream 12 — Business Positioning + Release Roadmap + +**Source prompt:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/12_business_positioning_and_release_roadmap.md` + +## Positioning (1-sentence) + +> "ClawAI isn't another AI wrapper — it's the intelligence layer that picks the right model and workflow for the task, with cost, privacy, latency, modality, domain awareness, learned feedback, and full transparency." + +## Why this matters + +Every other "AI platform" today: +- Hardcodes a single LLM per request +- Charges per cloud call regardless of need +- Sends private data to whatever model the user picked, with no privacy enforcement +- Can't tell PDF from YouTube from spreadsheet — treats every input as text +- Doesn't learn from feedback — same user keeps getting the same bad answers +- Has no concept of multi-step workflows (search-first, judge-pipeline, ensemble) +- Hides routing decisions from operators — debugging "why did it pick X?" is impossible + +ClawAI flips all of that. + +## Release plan + +### Release 1 — Foundation (Streams 11.1 + 11.2 + 01) +**Goal:** close the learning loop; surface explanations; tag workflow on every decision. +**Streams:** R.1 + quick wins 11.1, 11.2. +**Why first:** highest leverage. Data is already collected; the hot path just needs to read it. Visibility unlocks user trust. +**Demo:** open chat; click ⓘ on a message → see "we picked X because of your thumbs-up history on Y topic". Open `/routing` → see workflow badge. +**Metrics:** +- learned-bias activation rate (% of decisions where bias changed the pick) +- user feedback positive rate (does picking better models actually correlate with positive feedback?) + +### Release 2 — Trust + safety (Streams 02 + 06 + 10) +**Goal:** v2 canary at 5%; playground UI; 500-prompt regression suite blocks bad changes. +**Streams:** R.1/R.3 canary + R.5 playground + R.9 hardening. +**Why second:** before exposing more capability, harden the foundation. +**Demo:** open `/routing/playground` → see v1 + v2 + score breakdown live. Show CI failing on an injected regression. +**Metrics:** +- canary regression rate +- CI block rate (regressions caught before merge) +- p95 routing decision latency + +### Release 3 — Multimodal detection + workflows (Streams 03 + 04 + 11.4 + 11.5 + 11.10) +**Goal:** detect YouTube/PDF/video/audio/spreadsheet/URL/non-English; SEARCH_FIRST + PDF_EXTRACTION + YOUTUBE_TRANSCRIPT live. +**Streams:** R.2 + R.3 (subset) + quick wins. +**Why third:** unlocks new product capabilities (YouTube analysis, PDF Q&A, web search). +**Demo:** paste YouTube URL → get a summary. Upload PDF + "summarize" → structured extraction. +**Metrics:** +- modality detection accuracy +- per-workflow completion rate +- workflow fallback rate + +### Release 4 — Judge + Compare + Code Review (Stream 04 high-priority workflows) +**Goal:** JUDGE_PIPELINE for medical/legal; COMPARE_ENSEMBLE for explicit compare; CODE_REVIEW for code blocks. +**Streams:** R.3 high-priority workflows. +**Why fourth:** safety-critical workflows; raises quality for high-stakes domains. +**Demo:** medical question → primary + critic agree → high-confidence answer with judge stamp. +**Metrics:** +- judge verified rate +- ensemble agreement rate +- code review actionable suggestion rate + +### Release 5 — Cost + multi-tenant (Streams 05 + 07) +**Goal:** per-user budgets; org-scoped policies; org rate limits. +**Streams:** R.4 + R.6. +**Why fifth:** enterprise-grade pricing controls. +**Demo:** user over $10/month budget → routed local automatically; org admin bans OpenAI org-wide. +**Metrics:** +- budget enforcement rate +- org policy propagation latency +- cost saved per user per month + +### Release 6 — Advanced intelligence + i18n (Streams 08 + 09) +**Goal:** language-aware routing; prompt-length filter; mid-stream switch; embedding routing; consensus; cost/quality slider. +**Streams:** R.7 + R.8. +**Why last:** polish + non-English markets. +**Demo:** Arabic legal question → opus (high AR strength). 100k-token prompt → only 128k+ candidates. Slider at 100 → claude-opus-4; slider at 0 → gemma3:4b local. +**Metrics:** +- non-EN routing accuracy +- mid-stream rescue rate +- consensus agreement on high-stakes + +## Success metrics (cross-release) + +| Metric | Today | Target after R6 | +|--------|------:|---------------:| +| Route accuracy (vs human-judged "right model") | 99.1% (English keywords only) | 96%+ across 8 languages × 13 workflows | +| Cost saved per user per month (vs naive routing to gpt-4 always) | unknown | $20+ | +| Latency saved (vs always-cloud routing) | partial via PRIVACY_FIRST | 30%+ median latency reduction | +| Judge verified rate (medical/legal) | 0% (no judge) | 92%+ | +| User feedback positive rate | unknown baseline | +15pp over baseline after R.1 lands | +| Fallback rescue rate (when primary fails) | wired | 99.5%+ | +| Workflow completion rate (non-DIRECT_LLM) | 0% (workflows paper-only) | 95%+ | +| Regression count (per quarter) | unknown | 0 (blocked by CI) | +| User trust score (survey: "do you understand why X was picked?") | unknown | 75%+ after R.2 | +| Canary regression rate (v2) | n/a | <1% before promotion to 100% | +| Cost transparency: % of users who know their monthly spend | 0% | 90%+ after R.5 | + +## Sales / marketing collateral implications + +This routing platform alone justifies: + +| Audience | Pitch | +|----------|-------| +| Enterprise CTO | "Stop paying for the wrong models. ClawAI's router enforces your org's policies, budgets, and privacy rules — every call." | +| Developer | "Stop guessing which model to use. ClawAI's router picks the right one, explains why, and learns from your feedback." | +| Compliance officer | "Privacy keywords never leave your machine. Org-scoped allow/deny lists enforce data residency. Full audit trail." | +| Security team | "Circuit breakers + canary deployments + 500-prompt regression suite. We treat routing like production code, not magic." | +| AI researcher | "Multi-dimensional scoring engine + per-domain learned bias + judge pipeline + ensemble consensus. Open metrics, open replay lab." | + +## Public-facing pages to ship alongside this work + +- `/landing/routing` — "How ClawAI picks the right model" with live demo +- `/docs/routing` — public documentation +- `/blog/routing-deep-dive` — engineering blog (judge pipeline, learning loop) +- `/case-studies/legal-firm-saves-X-with-routing` +- `/comparison/clawai-vs-openrouter` — vs raw model gateways + +## Long-term differentiation + +Even if competitors copy the surface features (workflow kinds, modality detection), the **proprietary learning loop** is the long-term moat: + +- Year-over-year, ClawAI's router accumulates per-user / per-org / per-topic preferences +- New users benefit from cohort priors +- Cost/quality tradeoffs improve quarterly as model capabilities change +- Competitors starting fresh need 12+ months of feedback data to match + +## Risks to positioning + +| Risk | Mitigation | +|------|------------| +| Cloud providers ship their own "router" (OpenAI's GPT-4-Turbo auto-routes internally) | ClawAI is multi-cloud + local + workflow-aware. They route within their lineup; we route across all. | +| Privacy enforcement seen as "feature theater" | Audit logs + open source the privacy keyword list + 3rd-party penetration test public report | +| "Just use OpenRouter" objection | OpenRouter is a price aggregator. ClawAI is an intelligence layer (workflows, judge, learned bias). Different category. | +| Bad routing decision goes viral | Surface explanation in every chat message + per-user override in 1 click + CI regression suite prevents recurrence | diff --git a/docs/15-ai-context/routing-flagship-streams/ENV_ADDITIONS.md b/docs/15-ai-context/routing-flagship-streams/ENV_ADDITIONS.md new file mode 100644 index 00000000..465483fe --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/ENV_ADDITIONS.md @@ -0,0 +1,130 @@ +# Environment Variables — Flagship Additions + +Block to copy into `.env.example`, `.env`, `scripts/install.sh`, `scripts/install.ps1`. All flags default to `false` / `0` so the production hot path is unchanged until each stream is activated. + +```bash +# ============================================================================= +# Routing Flagship — feature flags (one per stream) +# ============================================================================= +# All disabled by default. Flip to true / non-zero to activate a stream. +# Streams have hard dependencies — see docs/15-ai-context/routing-flagship-streams/00-master-plan.md + +# R.1 — Close learning loop +ROUTING_R1_LEARNED_BIAS_ENABLED=false +ROUTING_R1_LEARNED_BIAS_WEIGHT_MAX=0.3 +ROUTING_R1_MIN_SAMPLE_SIZE=10 +ROUTING_R1_STICKY_THREAD_ROUTE_ENABLED=false +ROUTING_R1_PREFER_SAME_FAMILY=false +ROUTING_R1_CONFIDENCE_CALIBRATION_ENABLED=false +ROUTING_R1_CONFIDENCE_CALIBRATION_WINDOW_DAYS=30 + +# R.1/R.3 — v2 evaluator canary +ROUTING_V2_PRIMARY_ENABLED=false +ROUTING_V2_CANARY_PERCENT=0 +ROUTING_V2_ROLLBACK_SWITCH=true +ROUTING_V2_REGRESSION_THRESHOLD_PERCENT=1 +ROUTING_V2_COST_INCREASE_THRESHOLD_PERCENT=10 +ROUTING_V2_CONFIDENCE_DROP_THRESHOLD=0.1 +ROUTING_V2_FAILURE_RATE_THRESHOLD_PERCENT=2 + +# R.2 — Multimodal intent detection +ROUTING_R2_MODALITY_DETECTION_ENABLED=false +ROUTING_R2_YOUTUBE_DETECTION_ENABLED=false +ROUTING_R2_PDF_DETECTION_ENABLED=false +ROUTING_R2_VIDEO_DETECTION_ENABLED=false +ROUTING_R2_AUDIO_DETECTION_ENABLED=false +ROUTING_R2_SPREADSHEET_DETECTION_ENABLED=false +ROUTING_R2_URL_DETECTION_ENABLED=false +ROUTING_R2_TOOL_CALLING_FILTER_ENABLED=false +ROUTING_R2_STREAMING_FILTER_ENABLED=false +ROUTING_R2_EMBEDDING_ROUTING_ENABLED=false + +# R.3 — Workflow orchestrator live workflows +ROUTING_R3_WORKFLOWS_ENABLED=false +ROUTING_R3_WORKFLOW_DIRECT_LLM_ENABLED=true # default workflow — always on +ROUTING_R3_WORKFLOW_SEARCH_FIRST_ENABLED=false +ROUTING_R3_WORKFLOW_EXTRACT_FIRST_ENABLED=false +ROUTING_R3_WORKFLOW_PDF_EXTRACTION_ENABLED=false +ROUTING_R3_WORKFLOW_YOUTUBE_TRANSCRIPT_ENABLED=false +ROUTING_R3_WORKFLOW_IMAGE_ANALYSIS_ENABLED=false +ROUTING_R3_WORKFLOW_IMAGE_GENERATION_ENABLED=true # already wired +ROUTING_R3_WORKFLOW_VIDEO_ANALYSIS_ENABLED=false +ROUTING_R3_WORKFLOW_AUDIO_TRANSCRIBE_ENABLED=false +ROUTING_R3_WORKFLOW_FILE_GENERATION_ENABLED=true # already wired +ROUTING_R3_WORKFLOW_CODE_REVIEW_ENABLED=false +ROUTING_R3_WORKFLOW_COMPARE_ENSEMBLE_ENABLED=false +ROUTING_R3_WORKFLOW_JUDGE_PIPELINE_ENABLED=false + +# R.4 — Cost budget intelligence +ROUTING_R4_COST_BUDGET_ENABLED=false +ROUTING_R4_DEFAULT_USER_MONTHLY_CAP_USD=0 # 0 = no default cap +ROUTING_R4_DEFAULT_ORG_MONTHLY_CAP_USD=0 +ROUTING_R4_BUDGET_WARN_AT_PERCENT=80 +ROUTING_R4_FORCE_LOCAL_WHEN_OVER_BUDGET=true +ROUTING_R4_FREE_TIER_AWARENESS_ENABLED=false # blocked on connector-service work + +# R.5 — Operator playground + transparency +ROUTING_R5_PLAYGROUND_ENABLED=false +ROUTING_R5_EXPLANATION_IN_CHAT_ENABLED=false +ROUTING_R5_TAXONOMY_ADMIN_UI_ENABLED=false +ROUTING_R5_CIRCUIT_BREAKER_DASHBOARD_ENABLED=false + +# R.6 — Multi-tenant fleet routing +ROUTING_R6_MULTI_TENANT_ENABLED=false +ROUTING_R6_ORG_PROVIDER_RULES_ENABLED=false +ROUTING_R6_ORG_RATE_LIMIT_ENABLED=false + +# R.7 — i18n non-English routing +ROUTING_R7_LANGUAGE_DETECTION_ENABLED=false +ROUTING_R7_LANGUAGE_AWARE_MODEL_SELECTION_ENABLED=false +ROUTING_R7_CODE_MIXED_DETECTION_ENABLED=false +ROUTING_R7_ARABIC_RTL_HINT_ENABLED=false +ROUTING_R7_SUPPORTED_LOCALES=en,ar,de,es,fr,it,pt,ru + +# R.8 — Advanced routing intelligence (each sub-feature gated) +ROUTING_R8_PROMPT_LENGTH_FILTER_ENABLED=false +ROUTING_R8_LATENCY_CIRCUIT_BREAKER_ENABLED=false +ROUTING_R8_LATENCY_CIRCUIT_THRESHOLD_MS=15000 +ROUTING_R8_MID_STREAM_SWITCH_ENABLED=false +ROUTING_R8_MID_STREAM_FIRST_CHUNK_MAX_MS=5000 +ROUTING_R8_FINE_TUNE_PREFERENCE_ENABLED=false +ROUTING_R8_REGION_AWARE_ROUTING_ENABLED=false +ROUTING_R8_MULTI_INTENT_SPLITTER_ENABLED=false +ROUTING_R8_EMBEDDING_ROUTING_ENABLED=false +ROUTING_R8_CONSENSUS_MODE_ENABLED=false +ROUTING_R8_COST_QUALITY_SLIDER_ENABLED=false + +# R.9 — Quality + reliability hardening +ROUTING_R9_REGRESSION_SUITE_ENABLED=false # CI flag +ROUTING_R9_DRIFT_DETECTION_ENABLED=false +ROUTING_R9_DRIFT_ALERT_THRESHOLD_PERCENT=2 +ROUTING_R9_CALIBRATION_SNAPSHOT_CRON="0 4 * * *" +``` + +## Activation order (matches dependency graph) + +``` +1. ROUTING_R1_LEARNED_BIAS_ENABLED=true +2. ROUTING_V2_PRIMARY_ENABLED=true + ROUTING_V2_CANARY_PERCENT=5 +3. ROUTING_R3_WORKFLOWS_ENABLED=true (per-workflow flags individually) +4. ROUTING_R2_MODALITY_DETECTION_ENABLED=true + per-modality flags +5. ROUTING_R4_COST_BUDGET_ENABLED=true +6. ROUTING_R5_* (no production risk — safe to enable in any order) +7. ROUTING_R7_LANGUAGE_DETECTION_ENABLED=true +8. ROUTING_R6_MULTI_TENANT_ENABLED=true (after auth-service org work) +9. ROUTING_R8_* (per sub-feature) +10. ROUTING_R9_* (CI/ops flags — enable in CI first) +``` + +## Rollback + +```bash +# Disable a stream instantly: +sed -i 's/ROUTING_R1_LEARNED_BIAS_ENABLED=true/ROUTING_R1_LEARNED_BIAS_ENABLED=false/' .env +./scripts/claw.sh restart routing-service +# < 10 seconds, no DB migration needed. +``` + +## Reading flags + +All flags MUST be read via `AppConfig` (Zod-validated). NEVER `process.env.ROUTING_*` directly. See `apps/claw-routing-service/src/app/config/app.config.ts` for the existing pattern. diff --git a/docs/15-ai-context/routing-flagship-streams/INDEX.md b/docs/15-ai-context/routing-flagship-streams/INDEX.md new file mode 100644 index 00000000..d6dcb113 --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/INDEX.md @@ -0,0 +1,75 @@ +# Routing Flagship — Scaffolding Index + +**Branch:** `feature/routing-flagship-implementation` +**Source plan:** `plan-prompts/ClawAI_routing_implementation_flagship_pack/` +**Audit basis:** `docs/15-ai-context/routing-system-audit.md` + +**Scope of this branch:** *Scaffolding only* — per-stream module folders, type/constant stubs, doc per stream listing what's required, env-var stubs, future-model markers. No production wiring, no tests, no DB migrations, no nginx changes. Each stream doc is the single source of truth for what an implementing agent needs to do. + +## Streams in execution order + +| # | Stream | Doc | New modules scaffolded | +|---|--------|-----|------------------------| +| 00 | Master plan (meta) | [00-master-plan.md](./00-master-plan.md) | — | +| 01 | R.1 Close learning loop | [01-r1-learning-loop.md](./01-r1-learning-loop.md) | extension on `routing/` (learned-bias util + manager) | +| 02 | R.1/R.3 v2 evaluator canary | [02-r1r3-v2-evaluator-canary.md](./02-r1r3-v2-evaluator-canary.md) | extension on `route-evaluator/` (canary-bucket) | +| 03 | R.2 Multimodal intent detection | [03-r2-multimodal-intent-detection.md](./03-r2-multimodal-intent-detection.md) | NEW module `modality-detection/` | +| 04 | R.3 Workflow orchestrator goes live | [04-r3-workflow-orchestrator.md](./04-r3-workflow-orchestrator.md) | extension on `workflows/` (13 workflow handler stubs) | +| 05 | R.4 Cost budget intelligence | [05-r4-cost-budget-intelligence.md](./05-r4-cost-budget-intelligence.md) | NEW module `cost-budget/` | +| 06 | R.5 Operator playground + transparency | [06-r5-operator-playground.md](./06-r5-operator-playground.md) | NEW module `playground/`, FE pages | +| 07 | R.6 Multi-tenant fleet routing | [07-r6-multi-tenant-fleet.md](./07-r6-multi-tenant-fleet.md) | extension on `routing/policies` (orgId, propagation) | +| 08 | R.7 i18n non-English routing | [08-r7-i18n-non-english.md](./08-r7-i18n-non-english.md) | NEW module `language-detection/` | +| 09 | R.8 Advanced routing intelligence | [09-r8-advanced-intelligence.md](./09-r8-advanced-intelligence.md) | NEW module `intelligence/` (9 sub-features) | +| 10 | R.9 Quality + reliability hardening | [10-r9-quality-hardening.md](./10-r9-quality-hardening.md) | test scaffolds + QA scripts | +| 11 | Quick wins backlog | [11-quick-wins.md](./11-quick-wins.md) | 10 single-day tickets | +| 12 | Business positioning + release roadmap | [12-business-roadmap.md](./12-business-roadmap.md) | docs only | + +## Module scaffolds in this branch + +Each new module folder contains: `module.ts`, `controllers/`, `services/`, `managers/`, `repositories/`, `dto/`, `types/`, `constants/`, `enums/` (where applicable). All methods are stubs that throw `NotImplementedError('SCAFFOLD-Rx')` so the module is discoverable but inert. + +``` +apps/claw-routing-service/src/modules/ +├── modality-detection/ (Stream 03 — R.2) +├── cost-budget/ (Stream 05 — R.4) +├── playground/ (Stream 06 — R.5) +├── language-detection/ (Stream 08 — R.7) +└── intelligence/ (Stream 09 — R.8) +``` + +Extensions on existing modules are documented in their per-stream docs; no skeleton files are added to existing modules to avoid conflicts with the other agent on `main`. + +## Future Prisma models (scaffold marker — NOT yet in schema.prisma) + +Listed in [PRISMA_FUTURE_MODELS.md](./PRISMA_FUTURE_MODELS.md). Add via migration when implementing each stream. + +## New environment variables + +Listed in [ENV_ADDITIONS.md](./ENV_ADDITIONS.md). Copy block into `.env.example` + `.env` + `scripts/install.{sh,ps1}` when activating a stream. + +## Conventions used in this scaffold + +1. Every stub method body: `throw new Error('SCAFFOLD-Rx — not implemented; see docs/15-ai-context/routing-flagship-streams/0X-name.md');` +2. Every stub file has a top-of-file comment: `// SCAFFOLD: stream Rx (NN-name) — replace this stub with real implementation before activation.` +3. Modules are NOT registered in `app.module.ts` — they're discoverable but won't load at runtime. +4. Prisma changes live in `PRISMA_FUTURE_MODELS.md` until activation — schema.prisma is untouched. +5. i18n keys live in per-stream doc tables, not in locale files yet — adding them now without backend support would leak fake-feature strings to users. + +## Activation sequence (recommendation) + +Per the audit's leverage analysis, activate in this order: + +``` +01-r1-learning-loop → the single biggest win +02-r1r3-v2-evaluator-canary → unlocks the rest +04-r3-workflow-orchestrator → needs 01 + 02 +03-r2-multimodal-detection → needs 04 for routing the detected intents +05-r4-cost-budget → independent +06-r5-playground → no production risk; can run any time +08-r7-i18n → user-visible win +07-r6-multi-tenant → blocked on org schema +09-r8-advanced → pick individual sub-features +10-r9-quality → ongoing alongside everything +``` + +Quick wins (Stream 11) can ship out-of-order; they're explicitly designed to be safe. diff --git a/docs/15-ai-context/routing-flagship-streams/PRISMA_FUTURE_MODELS.md b/docs/15-ai-context/routing-flagship-streams/PRISMA_FUTURE_MODELS.md new file mode 100644 index 00000000..d48abace --- /dev/null +++ b/docs/15-ai-context/routing-flagship-streams/PRISMA_FUTURE_MODELS.md @@ -0,0 +1,243 @@ +# Prisma — Future Models (scaffolded, not yet applied) + +These models are scaffolded by the flagship pack but **NOT yet present in `schema.prisma`**. Add via migration when activating the relevant stream. + +--- + +## R.4 — Cost budget (Stream 05) + +```prisma +enum CostBudgetScope { + USER + ORG +} + +enum CostBudgetStatus { + OK + WARN + EXCEEDED +} + +model UserCostBudget { + id String @id @default(cuid()) + scope CostBudgetScope + ownerId String @map("owner_id") // userId or orgId + monthlyCapUsd Decimal @map("monthly_cap_usd") @db.Decimal(12, 4) + currentSpendUsd Decimal @default(0) @map("current_spend_usd") @db.Decimal(12, 4) + status CostBudgetStatus @default(OK) + warnAtPercent Int @default(80) @map("warn_at_percent") + overrideAllowed Boolean @default(false) @map("override_allowed") + resetAt DateTime @map("reset_at") + lastWarningSentAt DateTime? @map("last_warning_sent_at") + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + @@unique([scope, ownerId]) + @@index([status]) + @@index([resetAt]) + @@map("user_cost_budgets") +} +``` + +**Backfill:** none — empty table on activation. First budget check returns OK if no row exists. + +--- + +## R.6 — Multi-tenant fleet (Stream 07) + +```prisma +// Modify existing RoutingPolicy: add orgId column. + +model RoutingPolicy { + id String @id @default(cuid()) + name String + routingMode RoutingMode @map("routing_mode") + priority Int @default(0) + isActive Boolean @default(true) @map("is_active") + config Json + orgId String? @map("org_id") // NEW — null = global policy + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + @@index([routingMode]) + @@index([isActive]) + @@index([priority]) + @@index([orgId, isActive, priority]) // NEW + @@map("routing_policies") +} + +// NEW table for org-scoped allow/deny lists + +enum OrgProviderRuleKind { + ALLOW + DENY +} + +model OrgProviderRule { + id String @id @default(cuid()) + orgId String @map("org_id") + provider String + kind OrgProviderRuleKind + reason String? + createdAt DateTime @default(now()) @map("created_at") + + @@unique([orgId, provider]) + @@index([orgId, kind]) + @@map("org_provider_rules") +} + +// NEW table for org rate limits + +model OrgRateLimit { + id String @id @default(cuid()) + orgId String @unique @map("org_id") + requestsPerMinute Int @map("requests_per_minute") + burst Int @default(0) + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + @@map("org_rate_limits") +} +``` + +**Backfill:** `UPDATE routing_policies SET org_id = NULL;` (already null on creation — preserves existing policies as global). + +--- + +## R.7 — Language detection (Stream 08) + +```prisma +// Modify existing RoutingDecision: add language columns. + +model RoutingDecision { + // ... existing fields + detectedLanguage String? @map("detected_language") // ISO-639-1 (e.g. 'en', 'ar', 'es') + languageConfidence Decimal? @map("language_confidence") @db.Decimal(5, 4) + isCodeMixed Boolean @default(false) @map("is_code_mixed") + // ... existing fields +} + +// Extend RouterModelRegistry with per-language strength. + +model RouterModelRegistry { + // ... existing fields + languageStrengthJson Json? @map("language_strength_json") + // Shape: { "en": 1.0, "ar": 0.85, "es": 0.9, ... } + // ... existing fields +} +``` + +**Backfill:** none. Existing decisions remain `detectedLanguage=NULL`. + +--- + +## R.2/R.3 — Modality + workflow on RoutingDecision (Streams 03, 04) + +```prisma +// Modify existing RoutingDecision: add modality + workflow columns. + +model RoutingDecision { + // ... existing fields + detectedModalities Json? @map("detected_modalities") + // Shape: ["TEXT", "PDF_INPUT", "YOUTUBE_INPUT"] + selectedWorkflow WorkflowKind? @map("selected_workflow") + workflowConfidence Decimal? @map("workflow_confidence") @db.Decimal(5, 4) + // ... existing fields +} +``` + +**Backfill:** none. + +--- + +## R.8 — Advanced intelligence (Stream 09 — one sub-feature per migration) + +### 09.1 Prompt-length filtering — no new model, uses RouterModelRegistry.contextWindowTokens (already exists) + +### 09.2 Latency circuit breaker — extends existing RouterCircuitBreaker + +```prisma +model RouterCircuitBreaker { + // ... existing fields + trigger String @default("FAILURE_RATE") // 'FAILURE_RATE' | 'LATENCY_P95' + latencyThresholdMs Int? @map("latency_threshold_ms") + // ... existing fields +} +``` + +### 09.4 Fine-tuned model preference + +```prisma +model UserFineTunePreference { + id String @id @default(cuid()) + userId String @map("user_id") + domain DomainTag + provider String + model String + weight Decimal @default(0.5) @map("weight") @db.Decimal(3, 2) + createdAt DateTime @default(now()) @map("created_at") + + @@unique([userId, domain]) + @@index([userId]) + @@map("user_fine_tune_preferences") +} +``` + +### 09.5 Region preference + +```prisma +model RouterRegionPreference { + id String @id @default(cuid()) + scope String // 'USER' | 'ORG' | 'GLOBAL' + ownerId String? @map("owner_id") + provider String + preferredRegion String @map("preferred_region") + reason String? // 'GDPR' | 'LATENCY' | 'COST' + createdAt DateTime @default(now()) @map("created_at") + + @@index([scope, ownerId, provider]) + @@map("router_region_preferences") +} +``` + +### 09.9 Cost/quality slider + +```prisma +model UserCostQualitySlider { + userId String @id @map("user_id") + qualityWeight Decimal @default(0.5) @map("quality_weight") @db.Decimal(3, 2) // 0 = cheapest, 1 = best + updatedAt DateTime @updatedAt @map("updated_at") + + @@map("user_cost_quality_sliders") +} +``` + +--- + +## Apply order + +When activating a stream, run migrations in this order to avoid FK conflicts: + +```bash +# R.4 +npx prisma migrate dev --name r4_add_user_cost_budget + +# R.6 +npx prisma migrate dev --name r6_add_org_id_to_routing_policy +npx prisma migrate dev --name r6_add_org_provider_rules_and_rate_limit + +# R.7 +npx prisma migrate dev --name r7_add_language_to_routing_decision +npx prisma migrate dev --name r7_add_language_strength_to_model_registry + +# R.2/R.3 +npx prisma migrate dev --name r2_r3_add_modality_workflow_to_decision + +# R.8 (per sub-feature) +npx prisma migrate dev --name r8_2_extend_circuit_breaker_latency +npx prisma migrate dev --name r8_4_add_user_fine_tune_preference +npx prisma migrate dev --name r8_5_add_router_region_preference +npx prisma migrate dev --name r8_9_add_user_cost_quality_slider +``` + +All migrations are **additive** — no destructive changes, all new columns nullable or have defaults.