minorcell · minorcell · Mar 2, 2026 · Mar 4, 2026
diff --git a/README.md b/README.md
@@ -97,18 +97,19 @@ First run will guide you through Provider/Model setup and save config to `~/.mem
 | Resume Session | `memo --prev`                                  | Load latest session for current directory |
 | Web Console    | `memo web --host 127.0.0.1 --port 5494 --open` | Browser-based operation                   |
 
+`memo web` requires a shared password via `MEMO_SERVER_PASSWORD`.
+
 ---
 
 ## 🏗️ Architecture
 
 ```
 memo-code/
 ├── packages/
-│   ├── core/          # Core logic: Session state machine, Config handling
+│   ├── core/          # Core logic: Session state machine, Config handling, HTTP server API
 │   ├── tools/         # Tool routing, MCP Client management, built-in tools (exec_command, read_text_file, apply_patch...)
 │   ├── tui/           # Terminal runtime: CLI entry, interactive TUI
 │   ├── web-ui/        # Web frontend: React components
-│   └── web-server/    # Web backend: session management, API adapter
 └── docs/              # Technical documentation
 ```
 
@@ -117,7 +118,7 @@ memo-code/
 - **Architecture**: Clean Core / Tools / TUI separation, state-machine driven session management
 - **Testing**: Core + Tools coverage > 70%, complete unit + integration tests
 - **Protocol**: Native MCP (Model Context Protocol) support, can integrate any MCP tool server
-- **Token Estimation**: Real-time context monitoring based on tiktoken, configurable auto-compaction strategy
+- **Token Estimation**: Real-time context monitoring based on AI SDK usage + local fallback estimator, configurable auto-compaction strategy
 - **Distribution**: npm package with pre-built Web assets, hot-reloading without perception
 
 ---

diff --git a/README.zh.md b/README.zh.md
@@ -89,16 +89,17 @@ memo
 | 继续会话   | `memo --prev`                                  | 加载当前目录的最新会话 |
 | Web 控制台 | `memo web --host 127.0.0.1 --port 5494 --open` | 浏览器操作             |
 
+`memo web` 需要通过 `MEMO_SERVER_PASSWORD` 提供共享密码。
+
 ## 🏗️ 架构设计
 
 ```
 memo-code/
 ├── packages/
-│   ├── core/          # 核心逻辑：Session 状态机、Config 处理
+│   ├── core/          # 核心逻辑：Session 状态机、Config 处理、HTTP Server API
 │   ├── tools/         # Tool 路由、MCP Client管理、内置工具实现（exec_command, read_text_file, apply_patch...）
 │   ├── tui/           # 终端运行时：CLI 入口、交互式 TUI
 │   ├── web-ui/        # Web 前端：React 组件
-│   └── web-server/    # Web 后端：会话管理、API 适配器
 └── docs/              # 技术文档
 ```
 
@@ -107,7 +108,7 @@ memo-code/
 - **架构**：清晰的 Core / Tools / TUI 分层，状态机驱动会话管理
 - **测试**：Core + Tools 覆盖率 > 70%，完整的单元 + 集成测试
 - **协议**：原生支持 MCP (Model Context Protocol)，可接入任意 MCP 工具服务器
-- **Token 估算**：基于 tiktoken 的实时上下文监控，支持可配置的自动压缩策略
+- **Token 估算**：基于 AI SDK usage + 本地估算器的实时上下文监控，支持可配置的自动压缩策略
 - **分发**：npm 包预构建 Web 资源，热加载无感知
 
 ## 🔧 内置工具

diff --git a/docs/core.md b/docs/core.md
@@ -20,7 +20,7 @@ Core should stay UI-agnostic: do not add Ink/UI rendering details into `packages
     - `session.ts`: Session/Turn state machine; runs ReAct loop, writes events, tracks tokens, fires hooks; **supports concurrent tool calls**.
 - `toolRouter/`: tool routing and management
     - `index.ts`: manages built-in + MCP tools, generates Tool Use API tool definitions.
-- `utils/`: parsing and tokenizer wrappers (assistant output parsing, message wrappers, tiktoken wrapper).
+- `utils/`: parsing and tokenizer wrappers (assistant output parsing, message wrappers, lightweight token estimator).
 - `types.ts`: shared types (**extended for Tool Use API support**).
 - `index.ts`: package entry exporting the modules above.
 
@@ -74,7 +74,7 @@ if (toolUseBlocks.length > 1) {
 ## Entry API: Session/Turn (`createAgentSession`)
 
 - `createAgentSession(deps, options)` returns a Session; `runTurn` runs one ReAct turn. UI controls turn count.
-- Default deps can be omitted: `tools` (built-in set), `callLLM` (provider-based OpenAI client, **auto-sends tool definitions**), `loadPrompt`, `historySinks` (writes to `~/.memo/sessions/...`), `tokenCounter`.
+- Default deps can be omitted: `tools` (built-in set), `callLLM` (AI SDK Gateway-based client, **auto-sends tool definitions**), `loadPrompt`, `historySinks` (writes to `~/.memo/sessions/...`), `tokenCounter`.
 - Config source: `~/.memo/config.toml` (overridable via `MEMO_HOME`), keys include `current_provider` and `providers` list. Missing config triggers interactive UI setup.
 - Callbacks:
     - `onAssistantStep` (stream-like output)
@@ -96,7 +96,7 @@ await session.close()
 - Default output path: `~/.memo/sessions/-<project_abs_path_flattened>/<YYYY-MM-DDTHH-MM-SS>-<id>.jsonl`, with provider/model/tokenizer/token-usage metadata.
 - For concurrent calls, each tool observation is logged individually, and merged observation is also recorded.
 
-## LLM Adapter (`runtime/defaults.ts`)
+## LLM Adapter (`runtime/session/defaults.ts`)
 
 - `withDefaultDeps` provides OpenAI SDK based invocation (selected by provider/model/base_url/env_api_key).
 - **Automatically generates Tool Use API tool definitions**: `toolRouter.generateToolDefinitions()`.
@@ -200,7 +200,7 @@ if (toolUseBlocks.length > 1) {
 }
 ```
 
-## System Prompt (`runtime/prompt.md`)
+## System Prompt (`runtime/prompt/prompt.md`)
 
 Incorporates Claude Code best practices:
 

diff --git a/docs/model-agnostic-design.md b/docs/model-agnostic-design.md
@@ -51,7 +51,7 @@ base_url = "http://localhost:11434/v1"
 
 ### 2. 统一 HTTP 客户端层
 
-位置：`packages/core/src/runtime/defaults.ts:147-174`
+位置：`packages/core/src/runtime/session/defaults.ts:147-174`
 
 使用 OpenAI SDK 作为统一接口：
 
@@ -70,7 +70,7 @@ const client = new OpenAI({
 
 ### 3. 消息格式转换层
 
-位置：`packages/core/src/runtime/defaults.ts:34-60`
+位置：`packages/core/src/runtime/session/defaults.ts:34-60`
 
 将内部 `ChatMessage` 格式转换为 OpenAI API 格式：
 
@@ -113,7 +113,7 @@ function toOpenAIMessage(message: ChatMessage): OpenAI.ChatCompletionMessagePara
 
 ### 4. 响应格式归一化层
 
-位置：`packages/core/src/runtime/defaults.ts:176-236`
+位置：`packages/core/src/runtime/session/defaults.ts:176-236`
 
 将模型响应转换为内部统一的 `LLMResponse` 格式：
 
@@ -206,6 +206,6 @@ base_url = "https://your-api-endpoint.com/v1"
 ## 相关文件
 
 - `packages/core/src/config/config.ts` - Provider 配置管理
-- `packages/core/src/runtime/defaults.ts` - HTTP 客户端和消息转换
+- `packages/core/src/runtime/session/defaults.ts` - HTTP 客户端和消息转换
 - `packages/core/src/types.ts` - 统一类型定义
 - `packages/tui/src/slash/registry.ts` - CLI 命令处理
diff --git a/docs/npm-distribution-design.md b/docs/npm-distribution-design.md
@@ -76,8 +76,8 @@ packages/core/src/   │            │
 | --------------- | ------------- | -------------------------------- |
 | `react`, `ink`  | bundle inline | required at runtime              |
 | `fast-glob`     | bundle inline | avoid user-side install concerns |
-| `openai`        | bundle inline | API client                       |
-| `tiktoken`      | bundle inline | token counting                   |
+| `openai`        | bundle inline | compatibility type/runtime deps  |
+| `ai`            | bundle inline | AI SDK runtime                   |
 | `zod`           | bundle inline | schema validation                |
 | Node built-ins  | `external`    | provided by Node.js              |
 
@@ -99,7 +99,7 @@ export default defineConfig({
     },
     onSuccess() {
         // copy runtime resource file
-        copyFileSync('packages/core/src/runtime/prompt.md', 'dist/prompt.md')
+        copyFileSync('packages/core/src/runtime/prompt/prompt.md', 'dist/prompt.md')
     },
 })
 ```
@@ -276,7 +276,7 @@ memo --doctor
 
 ### 9.1 Possible Optimizations
 
-- **Code splitting**: lazy-load large dependencies (for example tiktoken wasm)
+- **Code splitting**: lazy-load large optional dependencies when needed
 - **Compression**: use Brotli to reduce package size further
 - **Incremental updates**: support hot-update style mechanism
 

diff --git a/docs/token-counting.md b/docs/token-counting.md
@@ -4,28 +4,29 @@ This document describes how Memo Code CLI estimates and records tokens for promp
 
 ## Counting Implementation
 
-- **Underlying encoder**: uses `@dqbd/tiktoken`, default encoding `cl100k_base`; override via `tokenizerModel`.
-- **Plain text count**: `countText(text)` encodes a string directly and returns token length.
+- **Primary source**: prefers model-returned usage (`inputTokens/outputTokens/totalTokens`) from AI SDK calls.
+- **Fallback counter**: local lightweight estimator (default model label `cl100k_base`); override via `tokenizerModel`.
+- **Plain text count**: `countText(text)` estimates by character mix (ASCII/CJK/symbol/newline weighting).
 - **Message array count (ChatML approximation)**: `countMessages(messages)` uses a common OpenAI ChatML estimate:
     - fixed overhead of 4 tokens per message (role/name wrappers, etc.)
-    - `content` counted via tiktoken encoding
+    - `content` counted with local estimator
     - if `name` is supported later, adds 1 token
     - adds 2 tokens at the end for assistant priming
 
-This is closer to actual ChatML overhead than naive text concatenation, but still an approximation.
+This remains an approximation for context budgeting, while runtime accounting prefers provider usage when available.
 
 ## Usage Scenarios
 
 - **Prompt budgeting**: before each step, `runTurn` estimates prompt tokens with `countMessages` and applies:
     - `warnPromptTokens`: prints warning
     - `maxPromptTokens`: returns early when exceeded, preventing over-limit LLM requests
-- **Usage reconciliation**: each step combines local count and model-returned `usage` (if available), records into token usage and JSONL history events.
+- **Usage reconciliation**: each step combines local estimate and AI SDK usage (if available), records into token usage and JSONL history events.
 
 ## Precision and Limitations
 
 - Fixed ChatML overhead varies slightly by model. Current "4 per message + 2 ending" estimate may differ by dozens of tokens on specific models.
-- Extra structural overhead for tool/function calling is not explicitly modeled yet. For exact reconciliation, model-specific constants can be added later.
-- If using custom `callLLM`, pass matching model encoding or custom `tokenCounter` implementation to align with real usage.
+- Extra structural overhead for tool/function calling is not explicitly modeled yet. For exact reconciliation, prefer provider usage data.
+- If using custom `callLLM`, inject custom `tokenCounter` when you need model-specific estimation rules.
 
 ## How to Override
 

diff --git a/package.json b/package.json
@@ -9,6 +9,7 @@
     },
     "files": [
         "dist/index.js",
+        "dist/core-server.js",
         "dist/prompt.md",
         "dist/task-prompts/*.md",
         "dist/web/**/*",
@@ -20,14 +21,12 @@
     },
     "scripts": {
         "start": "tsx packages/tui/src/cli.tsx",
-        "build": "pnpm run web:build && tsup",
+        "build": "pnpm run web:ui:build && tsup",
         "dev": "tsup --watch",
-        "web:dev": "pnpm -r --parallel --stream --filter @memo-code/web-server --filter @memo-code/web-ui dev",
-        "web:server:dev": "pnpm --filter @memo-code/web-server dev",
+        "web:dev": "pnpm -r --parallel --stream --filter @memo-code/web-ui dev",
         "web:ui:dev": "pnpm --filter @memo-code/web-ui dev",
         "web:ui:build": "pnpm --filter @memo-code/web-ui build",
-        "web:server:build": "pnpm --filter @memo-code/web-server build",
-        "web:build": "pnpm run web:ui:build && pnpm run web:server:build",
+        "web:build": "pnpm run web:ui:build",
         "site:dev": "pnpm --filter @memo-code/site dev",
         "site:build": "pnpm --filter @memo-code/site build",
         "site:start": "pnpm --filter @memo-code/site start",
@@ -57,14 +56,11 @@
         "vitest": "^2.1.8"
     },
     "dependencies": {
-        "@dqbd/tiktoken": "^1.0.22",
+        "@ai-sdk/openai": "^3.0.37",
         "@inkjs/ui": "^2.0.0",
         "@modelcontextprotocol/sdk": "^1.24.3",
         "@mozilla/readability": "^0.6.0",
-        "@nestjs/common": "^11.0.1",
-        "@nestjs/core": "^11.0.1",
-        "@nestjs/jwt": "^11.0.1",
-        "@nestjs/platform-express": "^11.0.1",
+        "ai": "^6.0.105",
         "fast-glob": "^3.3.3",
         "ink": "^6.7.0",
         "ipaddr.js": "^2.3.0",
@@ -73,14 +69,11 @@
         "openai": "^6.10.0",
         "react": "^19.2.4",
         "react-reconciler": "^0.33.0",
-        "reflect-metadata": "^0.2.2",
         "robots-parser": "^3.0.1",
-        "rxjs": "^7.8.1",
         "string-width": "^7.2.0",
         "toml": "^3.0.0",
         "turndown": "^7.2.2",
         "undici": "^6.23.0",
-        "ws": "^8.18.3",
         "yaml": "^2.8.1",
         "zod": "^4.3.6",
         "zod-to-json-schema": "^3.25.1"

diff --git a/packages/core/README.md b/packages/core/README.md
@@ -14,13 +14,13 @@ Core provides the central capabilities of **Memo Code CLI**: the ReAct loop, ses
 - `types.ts`: Shared types (`AgentDeps`, `Session/Turn`, `TokenUsage`, `HistoryEvent`, etc.).
 - `utils/`
     - Utility functions (assistant output parsing, message wrappers).
-    - `tokenizer.ts`: tiktoken-based tokenizer helpers.
+    - `tokenizer.ts`: lightweight token estimation helpers (fallback with AI SDK usage).
 - `index.ts`: Package entry, exports core modules and types.
 
 ## Key Flows
 
 - `createAgentSession(deps, options)`: Creates a Session, fills default dependencies, loads prompt, and returns an object with `runTurn`.
-- `withDefaultDeps`: Injects default toolset, LLM client, prompt, history sink (writes to `~/.memo/sessions/YY/MM/DD/<uuid>.jsonl`), and tokenizer based on config and overrides.
+- `withDefaultDeps`: Injects default toolset, AI SDK Gateway client, prompt, history sink (writes to `~/.memo/sessions/YY/MM/DD/<uuid>.jsonl`), and tokenizer based on config and overrides.
 - Session history: JSONL events (`session_start/turn_start/assistant/action/observation/final/turn_end/session_end`) with metadata like provider, model, tokenizer, and token usage.
 - Config: `~/.memo/config.toml` (overridable via `MEMO_HOME`). If missing, UI setup flow is triggered.
 

diff --git a/packages/core/package.json b/packages/core/package.json
@@ -15,10 +15,11 @@
     "version": "0.1.0",
     "private": true,
     "scripts": {
-        "build": "tsup --config tsup.config.ts && node -e \"const { copyFileSync } = require('node:fs'); copyFileSync('src/runtime/prompt.md', 'dist/prompt.md');\"",
+        "build": "tsup --config tsup.config.ts && node -e \"const { copyFileSync } = require('node:fs'); copyFileSync('src/runtime/prompt/prompt.md', 'dist/prompt.md');\"",
         "test": "vitest run"
     },
     "dependencies": {
+        "@memo-code/types": "workspace:*",
         "ignore": "^7.0.5",
         "zod": "^4.3.6"
     },

diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -2,17 +2,17 @@
 export * from './types'
 export * from './runtime/prompt'
 export * from './runtime/skills'
+export * from './runtime/skills/admin'
 export * from './runtime/history'
-export * from './runtime/history_parser'
-export * from './runtime/history_index'
+export * from './runtime/history/parser'
+export * from './runtime/history/indexer'
 export * from './runtime/workspace'
-export * from './runtime/file_suggestions'
-export * from './runtime/slash'
-export * from './runtime/mcp_admin'
-export * from './runtime/skills_admin'
-export * from './runtime/defaults'
+export * from './runtime/workspace/file_suggestions'
+export * from './runtime/mcp/admin'
+export * from './runtime/session/defaults'
 export * from './config/config'
 export * from './utils/utils'
 export * from './utils/tokenizer'
 export * from './runtime/session'
 export * from './web/types'
+export * from './server/http_server'
diff --git a/...s/core/src/runtime/compact_prompt.test.ts → .../src/runtime/agent/compact_prompt.test.ts b/...s/core/src/runtime/compact_prompt.test.ts → .../src/runtime/agent/compact_prompt.test.ts
@@ -5,7 +5,7 @@ import {
     buildCompactionUserPrompt,
     CONTEXT_SUMMARY_PREFIX,
     isContextSummaryMessage,
-} from '@memo/core/runtime/compact_prompt'
+} from '@memo/core/runtime/agent/compact_prompt'
 
 describe('compact_prompt', () => {
     test('buildCompactionUserPrompt formats assistant tool calls and tool messages', () => {

diff --git a/packages/core/src/runtime/compact_prompt.ts → .../core/src/runtime/agent/compact_prompt.ts b/packages/core/src/runtime/compact_prompt.ts → .../core/src/runtime/agent/compact_prompt.ts
diff --git a/packages/core/src/runtime/hooks.test.ts → ...ages/core/src/runtime/agent/hooks.test.ts b/packages/core/src/runtime/hooks.test.ts → ...ages/core/src/runtime/agent/hooks.test.ts
@@ -8,7 +8,7 @@ import type {
     ChatMessage,
     AssistantToolCall,
 } from '@memo/core/types'
-import { buildHookRunners, runHook, snapshotHistory } from '@memo/core/runtime/hooks'
+import { buildHookRunners, runHook, snapshotHistory } from '@memo/core/runtime/agent/hooks'
 
 describe('buildHookRunners', () => {
     test('creates empty hook map when no hooks provided', () => {

diff --git a/packages/core/src/runtime/hooks.ts → packages/core/src/runtime/agent/hooks.ts b/packages/core/src/runtime/hooks.ts → packages/core/src/runtime/agent/hooks.ts
diff --git a/...es/core/src/runtime/session_hooks.test.ts → ...e/src/runtime/agent/session_hooks.test.ts b/...es/core/src/runtime/session_hooks.test.ts → ...e/src/runtime/agent/session_hooks.test.ts
@@ -7,7 +7,7 @@ import type { Tool } from '@memo/tools/router'
 import {
     CONTEXT_COMPACTION_SYSTEM_PROMPT,
     CONTEXT_SUMMARY_PREFIX,
-} from '@memo/core/runtime/compact_prompt'
+} from '@memo/core/runtime/agent/compact_prompt'
 
 const echoTool: Tool = {
     name: 'echo',

diff --git a/packages/core/src/runtime/session_runtime.ts → ...core/src/runtime/agent/session_runtime.ts b/packages/core/src/runtime/session_runtime.ts → ...core/src/runtime/agent/session_runtime.ts
@@ -7,7 +7,7 @@ import {
     CONTEXT_COMPACTION_SYSTEM_PROMPT,
     CONTEXT_SUMMARY_PREFIX,
     isContextSummaryMessage,
-} from '@memo/core/runtime/compact_prompt'
+} from '@memo/core/runtime/agent/compact_prompt'
 import type {
     ChatMessage,
     AgentSession,
@@ -32,7 +32,7 @@ import {
     runHook,
     snapshotHistory,
     type HookRunnerMap,
-} from '@memo/core/runtime/hooks'
+} from '@memo/core/runtime/agent/hooks'
 import {
     createToolOrchestrator,
     type ToolApprovalHooks,
@@ -58,7 +58,7 @@ import {
     resolveToolPermission,
     stableStringify,
     toToolHistoryMessage,
-} from '@memo/core/runtime/session_runtime_helpers'
+} from '@memo/core/runtime/agent/session_runtime_helpers'
 import type { ApprovalRequest, ApprovalDecision } from '@memo/tools/approval'
 
 const DEFAULT_AUTO_COMPACT_THRESHOLD_PERCENT = 80

diff --git a/...c/runtime/session_runtime_helpers.test.ts → ...ime/agent/session_runtime_helpers.test.ts b/...c/runtime/session_runtime_helpers.test.ts → ...ime/agent/session_runtime_helpers.test.ts
@@ -12,7 +12,7 @@ import {
     stableStringify,
     toToolHistoryMessage,
     truncateSessionTitle,
-} from '@memo/core/runtime/session_runtime_helpers'
+} from '@memo/core/runtime/agent/session_runtime_helpers'
 
 describe('accumulateUsage', () => {
     test('uses explicit total when provided', () => {

diff --git a/...re/src/runtime/session_runtime_helpers.ts → .../runtime/agent/session_runtime_helpers.ts b/...re/src/runtime/session_runtime_helpers.ts → .../runtime/agent/session_runtime_helpers.ts