plastic-labs · andyylin · May 21, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/config.toml b/config.toml
@@ -0,0 +1,156 @@
+# Self-hosted Honcho for Hermes Agent on Andy's Pi.
+# Secrets live in /home/pi/honcho/.env; do not put API keys here.
+
+[app]
+LOG_LEVEL = "INFO"
+SESSION_OBSERVERS_LIMIT = 10
+GET_CONTEXT_MAX_TOKENS = 100000
+MAX_FILE_SIZE = 5242880
+MAX_MESSAGE_SIZE = 25000
+EMBED_MESSAGES = true
+MAX_EMBEDDING_TOKENS = 8192
+NAMESPACE = "honcho"
+
+[embedding]
+VECTOR_DIMENSIONS = 1024
+MAX_INPUT_TOKENS = 8192
+
+[db]
+CONNECTION_URI = "postgresql+psycopg://honcho:honcho@database:5432/honcho"
+SCHEMA = "public"
+POOL_SIZE = 10
+MAX_OVERFLOW = 20
+POOL_TIMEOUT = 30
+POOL_RECYCLE = 300
+
+[auth]
+USE_AUTH = false
+
+[cache]
+ENABLED = true
+URL = "redis://redis:6379/0?suppress=true"
+DEFAULT_TTL_SECONDS = 300
+
+[llm]
+DEFAULT_MAX_TOKENS = 2500
+# OPENAI_API_KEY and OPENAI_BASE_URL come from .env.
+# We use OpenRouter through Honcho's OpenAI-compatible transport.
+
+[embedding.MODEL_CONFIG]
+transport = "openai"
+model = "bge-m3"
+dimensions_mode = "never"
+
+[embedding.MODEL_CONFIG.overrides]
+base_url = "http://host.docker.internal:11434/v1"
+api_key_env = "LOCAL_EMBEDDING_API_KEY"
+
+[vector_store]
+TYPE = "pgvector"
+
+[deriver]
+ENABLED = true
+WORKERS = 1
+POLLING_SLEEP_INTERVAL_SECONDS = 1.0
+STALE_SESSION_TIMEOUT_MINUTES = 5
+DEDUPLICATE = true
+MAX_INPUT_TOKENS = 23000
+WORKING_REPRESENTATION_MAX_OBSERVATIONS = 100
+REPRESENTATION_BATCH_MAX_TOKENS = 1024
+LOG_OBSERVATIONS = false
+
+[deriver.MODEL_CONFIG]
+transport = "openai"
+model = "openai/gpt-4.1-mini"
+max_output_tokens = 4096
+
+[deriver.MODEL_CONFIG.overrides]
+base_url = "https://openrouter.ai/api/v1"
+api_key_env = "LLM_OPENAI_API_KEY"
+
+[summary]
+ENABLED = true
+MESSAGES_PER_SHORT_SUMMARY = 20
+MESSAGES_PER_LONG_SUMMARY = 60
+
+[summary.MODEL_CONFIG]
+transport = "openai"
+model = "openai/gpt-4.1-mini"
+max_output_tokens = 4096
+
+[summary.MODEL_CONFIG.overrides]
+base_url = "https://openrouter.ai/api/v1"
+api_key_env = "LLM_OPENAI_API_KEY"
+
+[dialectic]
+MAX_OUTPUT_TOKENS = 12000
+MAX_TOOL_OUTPUT_CHARS = 10000
+HISTORY_TOKEN_LIMIT = 8192
+SESSION_HISTORY_MAX_TOKENS = 4096
+
+[dialectic.LEVELS.minimal]
+MAX_TOOL_ITERATIONS = 1
+MAX_OUTPUT_TOKENS = 1500
+TOOL_CHOICE = "auto"
+[dialectic.LEVELS.minimal.MODEL_CONFIG]
+transport = "openai"
+model = "openai/gpt-4.1-mini"
+[dialectic.LEVELS.minimal.MODEL_CONFIG.overrides]
+base_url = "https://openrouter.ai/api/v1"
+api_key_env = "LLM_OPENAI_API_KEY"
+
+[dialectic.LEVELS.low]
+MAX_TOOL_ITERATIONS = 5
+TOOL_CHOICE = "auto"
+[dialectic.LEVELS.low.MODEL_CONFIG]
+transport = "openai"
+model = "x-ai/grok-4.3"
+[dialectic.LEVELS.low.MODEL_CONFIG.overrides]
+base_url = "https://openrouter.ai/api/v1"
+api_key_env = "LLM_OPENAI_API_KEY"
+
+[dialectic.LEVELS.medium]
+MAX_TOOL_ITERATIONS = 2
+[dialectic.LEVELS.medium.MODEL_CONFIG]
+transport = "openai"
+model = "x-ai/grok-4.3"
+[dialectic.LEVELS.medium.MODEL_CONFIG.overrides]
+base_url = "https://openrouter.ai/api/v1"
+api_key_env = "LLM_OPENAI_API_KEY"
+
+[dream]
+ENABLED = true
+DOCUMENT_THRESHOLD = 50
+IDLE_TIMEOUT_MINUTES = 60
+MIN_HOURS_BETWEEN_DREAMS = 8
+ENABLED_TYPES = ["omni"]
+MAX_TOOL_ITERATIONS = 20
+HISTORY_TOKEN_LIMIT = 16384
+
+[dream.DEDUCTION_MODEL_CONFIG]
+transport = "openai"
+model = "z-ai/glm-5"
+max_output_tokens = 16384
+[dream.DEDUCTION_MODEL_CONFIG.overrides]
+base_url = "https://openrouter.ai/api/v1"
+api_key_env = "LLM_OPENAI_API_KEY"
+
+[dream.INDUCTION_MODEL_CONFIG]
+transport = "openai"
+model = "z-ai/glm-5"
+max_output_tokens = 16384
+[dream.INDUCTION_MODEL_CONFIG.overrides]
+base_url = "https://openrouter.ai/api/v1"
+api_key_env = "LLM_OPENAI_API_KEY"
+
+[peer_card]
+ENABLED = true
+
+[metrics]
+ENABLED = false
+
+[telemetry]
+ENABLED = false
+
+[sentry]
+ENABLED = false
diff --git a/config.toml.example b/config.toml.example
@@ -61,6 +61,11 @@ OPENAI_API_KEY = "your-api-key-here"
 # ANTHROPIC_API_KEY = "your-api-key"
 # GEMINI_API_KEY = "your-api-key"
 
+# Optional OpenRouter app attribution. Used only when an OpenAI-compatible
+# base_url is https://openrouter.ai/api/v1.
+# OPENROUTER_APP_URL = "https://your-app.example"
+# OPENROUTER_APP_TITLE = "Your Honcho App"
+
 # Embedding settings
 [embedding]
 VECTOR_DIMENSIONS = 1536

diff --git a/docker-compose.local-embeddings.yml b/docker-compose.local-embeddings.yml
@@ -0,0 +1,36 @@
+# Side-by-side Honcho stack for testing local bge-m3 embeddings.
+# Use with:
+#   docker compose -p honcho-local-embed -f docker-compose.yml -f docker-compose.local-embeddings.yml up -d --build
+
+x-local-embedding-env: &local_embedding_env
+  EMBEDDING_VECTOR_DIMENSIONS: "1024"
+  EMBEDDING_MAX_INPUT_TOKENS: "8192"
+  EMBEDDING_MODEL_CONFIG__TRANSPORT: openai
+  EMBEDDING_MODEL_CONFIG__MODEL: bge-m3
+  EMBEDDING_MODEL_CONFIG__DIMENSIONS_MODE: never
+  EMBEDDING_MODEL_CONFIG__OVERRIDES__BASE_URL: http://host.docker.internal:11434/v1
+  EMBEDDING_MODEL_CONFIG__OVERRIDES__API_KEY_ENV: LOCAL_EMBEDDING_API_KEY
+  LOCAL_EMBEDDING_API_KEY: ollama
+
+services:
+  api:
+    ports: !override
+      - "8001:8000"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      <<: *local_embedding_env
+
+  deriver:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      <<: *local_embedding_env
+
+  database:
+    ports: !override
+      - "127.0.0.1:5433:5432"
+
+  redis:
+    ports: !override
+      - "127.0.0.1:6380:6379"
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,94 @@
+# Honcho self-hosted deployment
+# Primary LLM via "vllm" slot, optional backup via "custom" slot
+#
+# Usage:
+#   docker compose up -d
+#   docker compose logs -f api deriver
+#
+# First run will build the image and run migrations automatically.
+
+services:
+  api:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    entrypoint: ["sh", "docker/entrypoint.sh"]
+    depends_on:
+      database:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    ports:
+      - "8000:8000"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    env_file:
+      - .env
+    environment:
+      - DB_CONNECTION_URI=postgresql+psycopg://honcho:honcho@database:5432/honcho
+      - CACHE_URL=redis://redis:6379/0?suppress=true
+      - CACHE_ENABLED=true
+    volumes:
+      - ./docker/entrypoint.sh:/app/docker/entrypoint.sh:ro
+      - ./config.toml:/app/config.toml:ro
+    restart: unless-stopped
+
+  deriver:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    entrypoint: ["/app/.venv/bin/python", "-m", "src.deriver"]
+    depends_on:
+      database:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    env_file:
+      - .env
+    environment:
+      - DB_CONNECTION_URI=postgresql+psycopg://honcho:honcho@database:5432/honcho
+      - CACHE_URL=redis://redis:6379/0?suppress=true
+      - CACHE_ENABLED=true
+      - METRICS_ENABLED=false
+    volumes:
+      - ./config.toml:/app/config.toml:ro
+    restart: unless-stopped
+
+  database:
+    image: pgvector/pgvector:pg15
+    restart: unless-stopped
+    ports:
+      - "127.0.0.1:5432:5432"
+    command: ["postgres", "-c", "max_connections=200"]
+    environment:
+      - POSTGRES_DB=honcho
+      - POSTGRES_USER=honcho
+      - POSTGRES_PASSWORD=honcho
+      - PGDATA=/var/lib/postgresql/data/pgdata
+    volumes:
+      - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
+      - pgdata:/var/lib/postgresql/data/
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U honcho -d honcho"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+  redis:
+    image: redis:8.2
+    restart: unless-stopped
+    ports:
+      - "127.0.0.1:6379:6379"
+    volumes:
+      - redis-data:/data
+    healthcheck:
+      test: ["CMD-SHELL", "redis-cli ping"]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+
+volumes:
+  pgdata:
+  redis-data: