cactus-compute · yanhann10 · Feb 21, 2026 · Feb 21, 2026
diff --git a/mingle/ai_server/ai_server.py b/mingle/ai_server/ai_server.py
diff --git a/assets/banner.png → mingle/ai_server/assets/banner.png b/assets/banner.png → mingle/ai_server/assets/banner.png
diff --git a/benchmark.py → mingle/ai_server/benchmark.py b/benchmark.py → mingle/ai_server/benchmark.py
@@ -1,6 +1,9 @@
 
 import sys, os
-sys.path.insert(0, "cactus/python/src")
+import pathlib
+
+_REPO_ROOT = str(pathlib.Path(__file__).resolve().parents[2])
+sys.path.insert(0, os.path.join(_REPO_ROOT, "cactus/python/src"))
 os.environ["CACTUS_NO_CLOUD_TELE"] = "1"
 
 import json

diff --git a/main.py → mingle/ai_server/main.py b/main.py → mingle/ai_server/main.py
@@ -1,14 +1,64 @@
 
 import sys
-sys.path.insert(0, "cactus/python/src")
-functiongemma_path = "cactus/weights/functiongemma-270m-it"
+import os as _os
+
+# Resolve repo root (two levels up from mingle/ai_server/)
+_REPO_ROOT = _os.path.normpath(_os.path.join(_os.path.dirname(_os.path.abspath(__file__)), "../.."))
+sys.path.insert(0, _os.path.join(_REPO_ROOT, "cactus/python/src"))
+functiongemma_path = _os.path.join(_REPO_ROOT, "cactus/weights/functiongemma-270m-it")
 
 import json, os, time
+
+# Load .env from repo root if present (dev convenience — does not override existing env vars)
+try:
+    from dotenv import load_dotenv
+    load_dotenv(_os.path.join(_REPO_ROOT, ".env"), override=False)
+except ImportError:
+    pass
+
 from cactus import cactus_init, cactus_complete, cactus_destroy
 from google import genai
 from google.genai import types
 
 
+# --- Model persistence singleton ---
+_cactus_model = None
+
+def _get_cactus_model():
+    global _cactus_model
+    if _cactus_model is None:
+        _cactus_model = cactus_init(functiongemma_path)
+    return _cactus_model
+
+
+# --- Complexity classifier ---
+_MULTI_ACTION_KW = ["and", "also", "then", "plus", "as well", "both", "additionally"]
+_ACTION_VERBS = ["set", "send", "check", "play", "find", "remind", "text", "get", "search"]
+
+def _classify_complexity(messages, tools) -> str:
+    user_text = " ".join(
+        m["content"] for m in messages if m["role"] == "user"
+    ).lower()
+    tool_count = len(tools)
+    conjunction_count = sum(1 for kw in _MULTI_ACTION_KW if f" {kw} " in f" {user_text} ")
+    verb_count = sum(1 for v in _ACTION_VERBS if v in user_text.split())
+    if conjunction_count >= 1 and verb_count >= 2:
+        return "hard"
+    if tool_count >= 4 and verb_count >= 2:
+        return "hard"
+    if tool_count >= 3:
+        return "medium"
+    return "easy"
+
+
+# Per-complexity routing table
+_COMPLEXITY_CONFIG = {
+    "easy":   {"tool_rag_top_k": 1, "confidence_threshold": 0.75, "max_tokens": 128},
+    "medium": {"tool_rag_top_k": 2, "confidence_threshold": 0.82, "max_tokens": 192},
+    "hard":   {"tool_rag_top_k": 0, "confidence_threshold": 0.97, "max_tokens": 320},
+}
+
+
 def generate_cactus(messages, tools):
     """Run function calling on-device via FunctionGemma + Cactus."""
     model = cactus_init(functiongemma_path)
@@ -94,18 +144,61 @@ def generate_cloud(messages, tools):
     }
 
 
-def generate_hybrid(messages, tools, confidence_threshold=0.99):
-    """Baseline hybrid inference strategy; fall back to cloud if Cactus Confidence is below threshold."""
-    local = generate_cactus(messages, tools)
+def generate_hybrid(messages, tools, confidence_threshold=None):
+    """Hybrid inference: classify complexity, route to on-device or cloud.
+
+    Uses a model persistence singleton to avoid re-initialising Cactus on
+    every call (major latency improvement). Complexity-aware routing lowers
+    confidence thresholds for simple requests so more work stays on-device.
+    """
+    complexity = _classify_complexity(messages, tools)
+    cfg = _COMPLEXITY_CONFIG[complexity]
+
+    # Use caller-supplied threshold if provided, otherwise use per-complexity default
+    threshold = confidence_threshold if confidence_threshold is not None else cfg["confidence_threshold"]
+
+    model = _get_cactus_model()
+
+    # Pass all tools — tool_rag_top_k in cactus_complete handles native RAG filtering
+    cactus_tools = [{"type": "function", "function": t} for t in tools]
 
-    if local["confidence"] >= confidence_threshold:
-        local["source"] = "on-device"
-        return local
+    raw_str = cactus_complete(
+        model,
+        [{"role": "system", "content": "You are a helpful assistant that can use tools."}] + messages,
+        tools=cactus_tools,
+        force_tools=True,
+        max_tokens=cfg["max_tokens"],
+        tool_rag_top_k=cfg["tool_rag_top_k"],      # native Cactus RAG tool filtering
+        confidence_threshold=threshold,             # native Cactus confidence gate
+        stop_sequences=["<|im_end|>", "<end_of_turn>"],
+    )
+
+    try:
+        raw = json.loads(raw_str)
+    except json.JSONDecodeError:
+        raw = {}
+
+    local_confidence = raw.get("confidence", 0)
+    local_function_calls = raw.get("function_calls", [])
+    local_time_ms = raw.get("total_time_ms", 0)
+    cloud_handoff = raw.get("cloud_handoff", False)
+
+    # Accept on-device result: not a cloud_handoff, confidence met, and non-empty calls
+    if not cloud_handoff and local_confidence >= threshold and local_function_calls:
+        return {
+            "function_calls": local_function_calls,
+            "total_time_ms": local_time_ms,
+            "confidence": local_confidence,
+            "source": "on-device",
+            "complexity": complexity,
+        }
 
+    # Fall back to cloud
     cloud = generate_cloud(messages, tools)
     cloud["source"] = "cloud (fallback)"
-    cloud["local_confidence"] = local["confidence"]
-    cloud["total_time_ms"] += local["total_time_ms"]
+    cloud["local_confidence"] = local_confidence
+    cloud["total_time_ms"] += local_time_ms
+    cloud["complexity"] = complexity
     return cloud
 
 

diff --git a/mingle/ai_server/rag_corpus/.gitkeep b/mingle/ai_server/rag_corpus/.gitkeep
diff --git a/mingle/ai_server/requirements.txt b/mingle/ai_server/requirements.txt
@@ -0,0 +1,5 @@
+fastapi
+uvicorn[standard]
+google-genai
+pydantic
+python-dotenv
diff --git a/submit.py → mingle/ai_server/submit.py b/submit.py → mingle/ai_server/submit.py
@@ -6,9 +6,12 @@
 """
 
 import argparse
+import os
 import time
 import requests
 
+_HERE = os.path.dirname(os.path.abspath(__file__))
+
 SERVER_URL = "https://cactusevals.ngrok.app"
 HEADERS = {"ngrok-skip-browser-warning": "true"}
 
@@ -19,7 +22,7 @@ def submit(team, location):
     print("=" * 60)
 
     try:
-        with open("main.py", "rb") as f:
+        with open(os.path.join(_HERE, "main.py"), "rb") as f:
             resp = requests.post(
                 f"{SERVER_URL}/eval/submit",
                 data={"team": team, "location": location},

diff --git a/mingle/backend/db.js b/mingle/backend/db.js
@@ -0,0 +1,35 @@
+const Database = require("better-sqlite3");
+const path = require("path");
+
+const DB_PATH = path.join(__dirname, "mingle.db");
+
+const db = new Database(DB_PATH);
+
+// Enable WAL mode for better concurrent read performance
+db.pragma("journal_mode = WAL");
+
+db.exec(`
+  CREATE TABLE IF NOT EXISTS profiles (
+    id TEXT PRIMARY KEY,
+    name TEXT NOT NULL,
+    role TEXT NOT NULL,
+    company TEXT NOT NULL,
+    bio TEXT NOT NULL,
+    skills TEXT NOT NULL,
+    looking_for TEXT NOT NULL,
+    can_help_with TEXT NOT NULL,
+    domains TEXT NOT NULL,
+    linkedin_url TEXT,
+    created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+  );
+
+  CREATE TABLE IF NOT EXISTS network (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    owner_user_id TEXT NOT NULL,
+    profile_id TEXT NOT NULL,
+    saved_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE(owner_user_id, profile_id)
+  );
+`);
+
+module.exports = db;
diff --git a/mingle/backend/mingle.db b/mingle/backend/mingle.db
diff --git a/mingle/backend/mingle.db-shm b/mingle/backend/mingle.db-shm
diff --git a/mingle/backend/mingle.db-wal b/mingle/backend/mingle.db-wal
diff --git a/mingle/backend/node_modules/.bin/mime b/mingle/backend/node_modules/.bin/mime
diff --git a/mingle/backend/node_modules/.bin/prebuild-install b/mingle/backend/node_modules/.bin/prebuild-install
diff --git a/mingle/backend/node_modules/.bin/qrcode b/mingle/backend/node_modules/.bin/qrcode
diff --git a/mingle/backend/node_modules/.bin/rc b/mingle/backend/node_modules/.bin/rc
diff --git a/mingle/backend/node_modules/.bin/semver b/mingle/backend/node_modules/.bin/semver
diff --git a/mingle/backend/node_modules/.bin/uuid b/mingle/backend/node_modules/.bin/uuid