From da76e9e7590112c7d7daaa9a9a26806236736640 Mon Sep 17 00:00:00 2001 From: Luke Craig Date: Wed, 10 Jun 2026 22:45:08 -0400 Subject: [PATCH] mcp: add Phase-1 MCP server for AI-led rehosting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds src/penguin/mcp/, an in-container Model Context Protocol server that exposes Penguin's rehosting loop to an LLM agent as discrete tools, replacing the "hand-edit YAML -> reboot VM -> grep multi-KB result files" loop. Tools: - lifecycle: run(project_dir) -> runs one emulation, returns results dir + health - mutate: set_env / set_nvram / set_uboot_env / add_netdev / block_signal / add_pseudofile / add_static_file / show_patch / reset_patch. Each deep-merges into a single reviewable patch_90_mcp.yaml (auto-merged via auto_patching); config.yaml is never edited, and reset_patch reverts everything. - diagnose: health / missing_env / pseudofile_failures / netbinds / console(grep) / db_query(SELECT over plugins.db) / missing_files. Returns parsed JSON, not dumps. Design: - diagnostics.py + mutations.py are dependency-free (pyyaml + stdlib) and unit-tested (tests/unit_tests/test_mcp.py, 7 tests) — no container or mcp package needed. - server.py is the only module importing mcp (FastMCP); loaded by `penguin mcp`. - `penguin mcp` CLI subcommand starts the server (stdio transport). - mcp added to install_requires (env-markered for py>=3.10). Phase 2 (follow-up): live control via the remotectrl socket (uprobes/syscall hooks/ plugin toggles on a running guest) and guest interaction. --- src/penguin/__main__.py | 14 +++ src/penguin/mcp/README.md | 43 ++++++++ src/penguin/mcp/__init__.py | 22 ++++ src/penguin/mcp/diagnostics.py | 167 +++++++++++++++++++++++++++++ src/penguin/mcp/mutations.py | 139 ++++++++++++++++++++++++ src/penguin/mcp/server.py | 186 +++++++++++++++++++++++++++++++++ src/setup.cfg | 1 + tests/unit_tests/test_mcp.py | 109 +++++++++++++++++++ 8 files changed, 681 insertions(+) create mode 100644 src/penguin/mcp/README.md create mode 100644 src/penguin/mcp/__init__.py create mode 100644 src/penguin/mcp/diagnostics.py create mode 100644 src/penguin/mcp/mutations.py create mode 100644 src/penguin/mcp/server.py create mode 100644 tests/unit_tests/test_mcp.py diff --git a/src/penguin/__main__.py b/src/penguin/__main__.py index aaa361f32..ae6e9e694 100644 --- a/src/penguin/__main__.py +++ b/src/penguin/__main__.py @@ -675,6 +675,20 @@ def getint(d): run_from_config(project_dir, config, output, timeout=timeout, verbose=ctx.obj['VERBOSE']) +@cli.command() +@click.option("--transport", type=str, default="stdio", help="MCP transport (default: stdio).") +@click.pass_context +def mcp(ctx, transport): + """ + Start the MCP server for AI-led rehosting (runs in-container, speaks MCP over stdio). + + Exposes Penguin's loop to an LLM agent as tools: run, config mutations (accumulated in + patch_90_mcp.yaml), and structured diagnostics over results/N/. See penguin.mcp. + """ + from .mcp.server import serve + serve(transport=transport) + + @cli.command() @click.argument("project_dir", type=str) @click.option("--config", type=str, default=None, help="Path to a config file. Defaults to /config.yaml.") diff --git a/src/penguin/mcp/README.md b/src/penguin/mcp/README.md new file mode 100644 index 000000000..2bd9c61a4 --- /dev/null +++ b/src/penguin/mcp/README.md @@ -0,0 +1,43 @@ +# Penguin MCP server (Phase 1) + +A [Model Context Protocol](https://modelcontextprotocol.io) adapter that lets an LLM agent +drive Penguin's rehosting loop through discrete tools instead of hand-editing YAML and +grepping raw result files. It runs **inside the Penguin container** and speaks MCP over +stdio. + +## Run it + +```sh +penguin mcp # stdio transport; launch under your MCP client / agent +``` + +The wrapper launches this in-container, so the server has direct access to the project +tree, the run entry point, and `plugins.db`. + +## Tools + +**Lifecycle** +- `run(project_dir, timeout?)` — execute one emulation (config + auto-merged patches) and + return the new `results/N` dir plus a health summary. + +**Config mutations** (each deep-merges into a single reviewable `patch_90_mcp.yaml`, +auto-merged by Penguin's `auto_patching`; revert with `reset_patch`) +- `set_env`, `set_nvram`, `set_uboot_env`, `add_netdev`, `block_signal`, + `add_pseudofile`, `add_static_file`, `show_patch`, `reset_patch` + +**Structured diagnostics** (parsed JSON, not file dumps) +- `health`, `missing_env`, `pseudofile_failures`, `netbinds`, `console(pattern)`, + `db_query(sql)`, `missing_files(procname)` + +## Design notes + +- `diagnostics.py` and `mutations.py` are dependency-free (pyyaml + stdlib) and unit-tested + (`tests/unit_tests/test_mcp.py`) — they need neither a container nor the `mcp` package. +- `server.py` is the only module that imports `mcp` (FastMCP); it's loaded by `penguin mcp`. +- Mutations never touch `config.yaml`; they live in one patch file so changes are auditable + and reversible — matching the "prefer patches" discipline. + +## Not yet (Phase 2) + +Live control via the `remotectrl` Unix socket (add uprobes/syscall hooks, toggle plugins on +a *running* guest, no reboot) and guest interaction (`guest_cmd`, VPN-bridge reachability). diff --git a/src/penguin/mcp/__init__.py b/src/penguin/mcp/__init__.py new file mode 100644 index 000000000..667f1c99e --- /dev/null +++ b/src/penguin/mcp/__init__.py @@ -0,0 +1,22 @@ +""" +Penguin MCP server (Phase 1) — a Model Context Protocol adapter for AI-led rehosting. + +This package exposes Penguin's rehosting loop to an LLM agent as discrete tools instead +of "hand-edit YAML, reboot the VM, grep multi-KB result files". It runs **inside the +Penguin container** (where it has direct access to the project tree, the run entry point, +and the SQLite event DB) and speaks MCP over stdio (`penguin mcp`). + +Layout: +- ``diagnostics`` — dependency-free readers that parse ``results/N/`` artifacts into JSON. +- ``mutations`` — dependency-free writers that express config changes as a reviewable + ``patch_90_mcp.yaml`` (auto-merged by Penguin's ``auto_patching``). +- ``server`` — the FastMCP server wiring those + ``run`` into MCP tools (imports the + ``mcp`` package; only loaded by the ``penguin mcp`` subcommand). + +Phase 1 = lifecycle (run) + config-mutation patch-writers + structured diagnostics. +Phase 2 (not yet) = live control via the ``remotectrl`` socket + guest interaction. +""" + +from . import diagnostics, mutations + +__all__ = ["diagnostics", "mutations"] diff --git a/src/penguin/mcp/diagnostics.py b/src/penguin/mcp/diagnostics.py new file mode 100644 index 000000000..e48c8617b --- /dev/null +++ b/src/penguin/mcp/diagnostics.py @@ -0,0 +1,167 @@ +""" +Structured diagnostic readers over a Penguin ``results/N/`` directory. + +These functions parse the artifacts Penguin actually writes (verified against the +``loggers``/``analysis`` pyplugins) into plain Python/JSON structures, so an agent gets +the *answer* instead of a multi-KB file dump. They are deliberately dependency-free +(pyyaml + stdlib only) and defensive: a missing/!written file yields ``{"error": ...}`` +rather than raising, because not every plugin runs every time. + +Verified artifact names (do NOT use the stale ``*.txt`` names from old docs): + console.log, health_final.yaml, env_missing.yaml, pseudofiles_failures.yaml, + pseudofiles_modeled.yaml, netbinds.csv, netbinds_summary.csv, nvram.csv, + uboot.log, plugins.db (SQLite). +""" + +from __future__ import annotations + +import csv +import os +import re +import sqlite3 +from typing import Any, Optional + +import yaml + + +def latest_results(proj_dir: str) -> Optional[str]: + """Return the newest ``results/N`` dir for a project (resolving ``results/latest``).""" + results_base = os.path.join(proj_dir, "results") + latest = os.path.join(results_base, "latest") + if os.path.islink(latest) or os.path.isdir(latest): + return os.path.realpath(latest) + if not os.path.isdir(results_base): + return None + nums = [] + for d in os.listdir(results_base): + if d.isdigit() and os.path.isdir(os.path.join(results_base, d)): + nums.append(int(d)) + if not nums: + return None + return os.path.join(results_base, str(max(nums))) + + +def _resolve(results_dir: Optional[str], proj_dir: Optional[str]) -> Optional[str]: + if results_dir: + return results_dir + if proj_dir: + return latest_results(proj_dir) + return None + + +def _load_yaml(path: str) -> Any: + with open(path) as f: + return yaml.safe_load(f) + + +def _need(results_dir: Optional[str], proj_dir: Optional[str], name: str): + rd = _resolve(results_dir, proj_dir) + if not rd: + return None, {"error": "no results dir found; pass results_dir or run first"} + path = os.path.join(rd, name) + if not os.path.exists(path): + return None, {"error": f"{name} not present in {rd} (plugin may not have run)"} + return path, None + + +def read_health(results_dir: str = None, proj_dir: str = None) -> dict: + """The end-of-run health summary (score components, panic flag, counts).""" + path, err = _need(results_dir, proj_dir, "health_final.yaml") + if err: + return err + return {"health": _load_yaml(path)} + + +def read_missing_env(results_dir: str = None, proj_dir: str = None) -> dict: + """Env vars / ``/proc/cmdline`` keys the firmware read but the config didn't provide.""" + path, err = _need(results_dir, proj_dir, "env_missing.yaml") + if err: + return err + return {"missing_env": _load_yaml(path)} + + +def read_pseudofile_failures(results_dir: str = None, proj_dir: str = None) -> dict: + """Missing/unmodeled /dev /proc /sys files the firmware touched, with op counts.""" + path, err = _need(results_dir, proj_dir, "pseudofiles_failures.yaml") + if err: + return err + return {"pseudofile_failures": _load_yaml(path)} + + +def read_netbinds(results_dir: str = None, proj_dir: str = None) -> dict: + """Listening sockets the guest opened (the success signal). Rows from netbinds.csv.""" + path, err = _need(results_dir, proj_dir, "netbinds.csv") + if err: + return err + rows = [] + with open(path, newline="") as f: + for row in csv.reader(f): + if row: + rows.append(row) + return {"netbinds": rows, "count": len(rows)} + + +def grep_console( + results_dir: str = None, proj_dir: str = None, pattern: str = None, max_lines: int = 100 +) -> dict: + """Return console.log lines matching a regex (or the tail if no pattern).""" + path, err = _need(results_dir, proj_dir, "console.log") + if err: + return err + with open(path, errors="replace") as f: + lines = f.read().splitlines() + if pattern: + try: + rx = re.compile(pattern) + except re.error as e: + return {"error": f"bad regex: {e}"} + hits = [ln for ln in lines if rx.search(ln)] + else: + hits = lines + truncated = len(hits) > max_lines + return {"lines": hits[-max_lines:], "truncated": truncated, "total_matched": len(hits)} + + +def query_db( + sql: str, results_dir: str = None, proj_dir: str = None, limit: int = 100 +) -> dict: + """Run a read-only SELECT against ``plugins.db`` (syscalls_logger/exec_logger events). + + The DB has a parent ``event`` table joined to ``syscall``/``read``/``write``/``exec`` on + ``id`` (procname lives on ``event`` — you must JOIN). Only SELECT is allowed. + """ + path, err = _need(results_dir, proj_dir, "plugins.db") + if err: + return err + if not sql.lstrip().lower().startswith("select"): + return {"error": "only SELECT queries are allowed"} + if ";" in sql.rstrip().rstrip(";"): + return {"error": "multiple statements are not allowed"} + con = sqlite3.connect(f"file:{path}?mode=ro", uri=True) + try: + con.row_factory = sqlite3.Row + cur = con.execute(sql) + out = [dict(r) for r in cur.fetchmany(limit)] + return {"rows": out, "count": len(out), "truncated": len(out) == limit} + except sqlite3.Error as e: + return {"error": f"sqlite: {e}"} + finally: + con.close() + + +def missing_files( + results_dir: str = None, proj_dir: str = None, procname: str = None, limit: int = 30 +) -> dict: + """Canned query: files a process tried to open/stat that returned ENOENT (what to add).""" + where = ( + "s.name IN ('sys_open','sys_openat','sys_stat64','sys_access','sys_faccessat') " + "AND s.retno_repr LIKE '%ENOENT%'" + ) + if procname: + where += f" AND e.procname = '{procname}'" + sql = ( + "SELECT e.procname, s.arg0_repr AS path, COUNT(*) AS n " + "FROM syscall s JOIN event e ON e.id = s.id " + f"WHERE {where} GROUP BY e.procname, s.arg0_repr ORDER BY n DESC" + ) + return query_db(sql, results_dir=results_dir, proj_dir=proj_dir, limit=limit) diff --git a/src/penguin/mcp/mutations.py b/src/penguin/mcp/mutations.py new file mode 100644 index 000000000..4a577115a --- /dev/null +++ b/src/penguin/mcp/mutations.py @@ -0,0 +1,139 @@ +""" +Config-mutation writers — express an agent's config changes as a reviewable patch. + +Rather than editing ``config.yaml`` in place, every mutation deep-merges into a single +``patch_90_mcp.yaml`` in the project directory. Penguin auto-discovers ``patch_*.yaml`` +(when ``core.auto_patching`` is on, the default) and merges it into the validated config, +so the agent's changes are: (a) applied without touching the base config, (b) all in one +auditable file, and (c) trivially reverted (``reset_patch``). The ``90`` prefix orders it +after lower-numbered hand-authored patches. + +Dependency-free (pyyaml + stdlib). Each function returns the resulting patch dict so the +caller/agent can see the new state. +""" + +from __future__ import annotations + +import os +from typing import Any, Optional + +import yaml + +PATCH_NAME = "patch_90_mcp.yaml" + +_HEADER = ( + "# Managed by the Penguin MCP server (penguin.mcp). Each tool call deep-merges here.\n" + "# Safe to edit or delete by hand; `reset_patch` removes it.\n" +) + + +def _patch_path(proj_dir: str) -> str: + return os.path.join(proj_dir, PATCH_NAME) + + +def _load(proj_dir: str) -> dict: + path = _patch_path(proj_dir) + if os.path.exists(path): + with open(path) as f: + return yaml.safe_load(f) or {} + return {} + + +def _deep_merge(dst: dict, src: dict) -> dict: + """Recursively merge src into dst (dicts merge; lists union-append; scalars overwrite).""" + for k, v in src.items(): + if isinstance(v, dict) and isinstance(dst.get(k), dict): + _deep_merge(dst[k], v) + elif isinstance(v, list) and isinstance(dst.get(k), list): + for item in v: + if item not in dst[k]: + dst[k].append(item) + else: + dst[k] = v + return dst + + +def _apply(proj_dir: str, fragment: dict) -> dict: + if not os.path.isdir(proj_dir): + raise ValueError(f"project dir does not exist: {proj_dir}") + patch = _deep_merge(_load(proj_dir), fragment) + with open(_patch_path(proj_dir), "w") as f: + f.write(_HEADER) + yaml.safe_dump(patch, f, sort_keys=False, default_flow_style=False) + return patch + + +# --- individual mutations ------------------------------------------------------------- + +def set_env(proj_dir: str, key: str, value: Any) -> dict: + """Set an environment variable / boot arg (e.g. ``igloo_init``, a model string).""" + return _apply(proj_dir, {"env": {key: value}}) + + +def set_nvram(proj_dir: str, key: str, value: Any) -> dict: + """Seed an initial NVRAM key/value.""" + return _apply(proj_dir, {"nvram": {key: value}}) + + +def set_uboot_env(proj_dir: str, key: str, value: str) -> dict: + """Seed a U-Boot env var (served via fw_getenv by the ``uboot`` plugin).""" + return _apply(proj_dir, {"uboot_env": {key: value}}) + + +def add_netdev(proj_dir: str, name: str) -> dict: + """Declare a network interface the firmware expects (e.g. ``egiga0``, ``vlan1``).""" + return _apply(proj_dir, {"netdevs": [name]}) + + +def block_signal(proj_dir: str, signum: int) -> dict: + """Block a signal guest-wide (supported: 6/9/15/17).""" + return _apply(proj_dir, {"blocked_signals": [int(signum)]}) + + +def add_pseudofile( + proj_dir: str, + path: str, + read: Optional[dict] = None, + write: Optional[dict] = None, + ioctl: Optional[dict] = None, +) -> dict: + """Create/model a pseudofile. With no models, just makes the path exist (``{}``). + + Example models: read={"model": "const_buf", "val": "hello"}, + write={"model": "discard"}, ioctl={"*": {"model": "return_const", "val": 0}}. + """ + spec: dict = {} + if read is not None: + spec["read"] = read + if write is not None: + spec["write"] = write + if ioctl is not None: + spec["ioctl"] = ioctl + return _apply(proj_dir, {"pseudofiles": {path: spec}}) + + +def add_static_file(proj_dir: str, path: str, spec: dict) -> dict: + """Add a pre-boot filesystem edit. ``spec`` is the action dict, e.g. + {"type": "symlink", "target": "/igloo/utils/exit0.sh"} or + {"type": "inline_file", "contents": "...", "mode": 0o755}. + """ + return _apply(proj_dir, {"static_files": {path: spec}}) + + +# --- inspection / lifecycle ----------------------------------------------------------- + +def show_patch(proj_dir: str) -> dict: + """Return the current MCP-managed patch (the accumulated agent changes).""" + path = _patch_path(proj_dir) + if not os.path.exists(path): + return {"patch": {}, "exists": False} + return {"patch": _load(proj_dir), "exists": True, "path": path} + + +def reset_patch(proj_dir: str) -> dict: + """Delete the MCP-managed patch (revert all agent changes).""" + path = _patch_path(proj_dir) + existed = os.path.exists(path) + if existed: + os.remove(path) + return {"removed": existed} diff --git a/src/penguin/mcp/server.py b/src/penguin/mcp/server.py new file mode 100644 index 000000000..c18211d89 --- /dev/null +++ b/src/penguin/mcp/server.py @@ -0,0 +1,186 @@ +""" +FastMCP server exposing Penguin's rehosting loop as MCP tools (Phase 1). + +Runs inside the Penguin container; launched by ``penguin mcp`` (stdio transport). +Tool groups: + * lifecycle — ``run`` (executes a single emulation run, returns a health summary) + * mutate — set_env / set_nvram / set_uboot_env / add_netdev / add_pseudofile / + add_static_file / block_signal / show_patch / reset_patch + * diagnose — health / missing_env / pseudofile_failures / netbinds / console / db + +This module imports the ``mcp`` package, so it is only loaded when serving (the +``diagnostics`` and ``mutations`` modules stay dependency-free and unit-testable). +""" + +from __future__ import annotations + +import os +from typing import Any, Optional + +from mcp.server.fastmcp import FastMCP + +from . import diagnostics as diag +from . import mutations as mut + +app = FastMCP("penguin") + + +def _alloc_results_dir(proj_dir: str) -> str: + """Allocate the next ``results/N`` dir and repoint ``results/latest`` (mirrors the CLI).""" + base = os.path.join(proj_dir, "results") + os.makedirs(base, exist_ok=True) + nums = [int(d) for d in os.listdir(base) if d.isdigit() and os.path.isdir(os.path.join(base, d))] + idx = max(nums) + 1 if nums else 0 + latest = os.path.join(base, "latest") + if os.path.islink(latest): + os.unlink(latest) + try: + os.symlink(f"./{idx}", latest) + except OSError: + pass + return os.path.join(base, str(idx)) + + +# --- lifecycle ------------------------------------------------------------------------ + +@app.tool() +def run(project_dir: str, timeout: Optional[int] = None) -> dict: + """Run one emulation of the project (applies config + auto-merged patches) and return + the results dir plus a health summary. This is the core loop step after mutations.""" + from penguin.__main__ import run_from_config # lazy: heavy import path + + if not os.path.isdir(os.path.join(project_dir, "base")): + return {"error": f"{project_dir} has no base/ (run `penguin init` first)"} + config_path = os.path.join(project_dir, "config.yaml") + if not os.path.exists(config_path): + return {"error": f"no config.yaml in {project_dir}"} + out = _alloc_results_dir(project_dir) + try: + run_from_config(project_dir, config_path, out, timeout=timeout) + except Exception as e: # surface, don't crash the server + return {"error": f"run failed: {e}", "results_dir": out} + summary = diag.read_health(results_dir=out) + return {"results_dir": out, **summary} + + +# --- mutate --------------------------------------------------------------------------- + +@app.tool() +def set_env(project_dir: str, key: str, value: Any) -> dict: + """Set an env var / boot arg (e.g. igloo_init). Tip: value 'DYNVALDYNVALDYNVAL' + discovers the expected value via env_cmp.txt on the next run.""" + return mut.set_env(project_dir, key, value) + + +@app.tool() +def set_nvram(project_dir: str, key: str, value: Any) -> dict: + """Seed an initial NVRAM key/value.""" + return mut.set_nvram(project_dir, key, value) + + +@app.tool() +def set_uboot_env(project_dir: str, key: str, value: str) -> dict: + """Seed a U-Boot env var (served via fw_getenv by the uboot plugin).""" + return mut.set_uboot_env(project_dir, key, value) + + +@app.tool() +def add_netdev(project_dir: str, name: str) -> dict: + """Declare a network interface the firmware expects (e.g. egiga0, vlan1).""" + return mut.add_netdev(project_dir, name) + + +@app.tool() +def block_signal(project_dir: str, signum: int) -> dict: + """Block a signal guest-wide to stop a service being killed (supported: 6/9/15/17).""" + return mut.block_signal(project_dir, signum) + + +@app.tool() +def add_pseudofile( + project_dir: str, + path: str, + read: Optional[dict] = None, + write: Optional[dict] = None, + ioctl: Optional[dict] = None, +) -> dict: + """Create/model a /dev /proc /sys pseudofile. No models = just make it exist. + e.g. ioctl={"*": {"model": "return_const", "val": 0}}.""" + return mut.add_pseudofile(project_dir, path, read=read, write=write, ioctl=ioctl) + + +@app.tool() +def add_static_file(project_dir: str, path: str, spec: dict) -> dict: + """Add a pre-boot FS edit, e.g. spec={"type":"symlink","target":"/igloo/utils/exit0.sh"}.""" + return mut.add_static_file(project_dir, path, spec) + + +@app.tool() +def show_patch(project_dir: str) -> dict: + """Show the accumulated MCP-managed config patch (all changes made this session).""" + return mut.show_patch(project_dir) + + +@app.tool() +def reset_patch(project_dir: str) -> dict: + """Revert all MCP-managed config changes (delete patch_90_mcp.yaml).""" + return mut.reset_patch(project_dir) + + +# --- diagnose ------------------------------------------------------------------------- + +@app.tool() +def health(project_dir: str = None, results_dir: str = None) -> dict: + """End-of-run health summary (score components, panic flag, counts).""" + return diag.read_health(results_dir=results_dir, proj_dir=project_dir) + + +@app.tool() +def missing_env(project_dir: str = None, results_dir: str = None) -> dict: + """Env vars / cmdline keys the firmware read but the config didn't provide.""" + return diag.read_missing_env(results_dir=results_dir, proj_dir=project_dir) + + +@app.tool() +def pseudofile_failures(project_dir: str = None, results_dir: str = None) -> dict: + """Missing/unmodeled /dev /proc /sys files the firmware touched, with op counts.""" + return diag.read_pseudofile_failures(results_dir=results_dir, proj_dir=project_dir) + + +@app.tool() +def netbinds(project_dir: str = None, results_dir: str = None) -> dict: + """Listening sockets the guest opened (the success signal).""" + return diag.read_netbinds(results_dir=results_dir, proj_dir=project_dir) + + +@app.tool() +def console( + project_dir: str = None, results_dir: str = None, pattern: str = None, max_lines: int = 100 +) -> dict: + """console.log lines matching a regex (or the tail). Grep for panics, segfaults, errors.""" + return diag.grep_console( + results_dir=results_dir, proj_dir=project_dir, pattern=pattern, max_lines=max_lines + ) + + +@app.tool() +def db_query( + sql: str, project_dir: str = None, results_dir: str = None, limit: int = 100 +) -> dict: + """Read-only SELECT over plugins.db (syscall/exec events). JOIN syscall→event for procname.""" + return diag.query_db(sql, results_dir=results_dir, proj_dir=project_dir, limit=limit) + + +@app.tool() +def missing_files( + project_dir: str = None, results_dir: str = None, procname: str = None, limit: int = 30 +) -> dict: + """Files a process tried to open/stat that returned ENOENT (what pseudofiles to add).""" + return diag.missing_files( + results_dir=results_dir, proj_dir=project_dir, procname=procname, limit=limit + ) + + +def serve(transport: str = "stdio") -> None: + """Entry point for `penguin mcp`.""" + app.run(transport=transport) diff --git a/src/setup.cfg b/src/setup.cfg index 6607b52f1..11205627b 100644 --- a/src/setup.cfg +++ b/src/setup.cfg @@ -29,6 +29,7 @@ install_requires = pyyaml jsonschema jinja2 + mcp; python_version >= "3.10" [options.entry_points] console_scripts = diff --git a/tests/unit_tests/test_mcp.py b/tests/unit_tests/test_mcp.py new file mode 100644 index 000000000..18963fa4c --- /dev/null +++ b/tests/unit_tests/test_mcp.py @@ -0,0 +1,109 @@ +"""Unit tests for the dependency-free MCP helpers (no container, no `mcp` package).""" + +import os +import sqlite3 + +import yaml + +from penguin.mcp import diagnostics as diag +from penguin.mcp import mutations as mut + + +# --- mutations ------------------------------------------------------------------------ + +def test_mutations_accumulate_into_one_patch(tmp_path): + proj = str(tmp_path) + mut.set_env(proj, "igloo_init", "/sbin/init") + mut.set_nvram(proj, "lan_ipaddr", "192.168.1.1") + mut.add_netdev(proj, "eth0") + mut.add_netdev(proj, "eth0") # dedup + mut.add_netdev(proj, "vlan1") + mut.block_signal(proj, 6) + mut.add_pseudofile(proj, "/dev/foo", ioctl={"*": {"model": "return_const", "val": 0}}) + + patch_path = os.path.join(proj, "patch_90_mcp.yaml") + assert os.path.exists(patch_path) + with open(patch_path) as f: + patch = yaml.safe_load(f) + + assert patch["env"]["igloo_init"] == "/sbin/init" + assert patch["nvram"]["lan_ipaddr"] == "192.168.1.1" + assert patch["netdevs"] == ["eth0", "vlan1"] # deduped, ordered + assert patch["blocked_signals"] == [6] + assert patch["pseudofiles"]["/dev/foo"]["ioctl"]["*"]["model"] == "return_const" + + +def test_show_and_reset_patch(tmp_path): + proj = str(tmp_path) + assert mut.show_patch(proj)["exists"] is False + mut.set_env(proj, "FOO", "bar") + shown = mut.show_patch(proj) + assert shown["exists"] is True and shown["patch"]["env"]["FOO"] == "bar" + assert mut.reset_patch(proj)["removed"] is True + assert not os.path.exists(os.path.join(proj, "patch_90_mcp.yaml")) + assert mut.reset_patch(proj)["removed"] is False + + +def test_deep_merge_overwrites_scalar_keeps_siblings(tmp_path): + proj = str(tmp_path) + mut.set_env(proj, "A", "1") + mut.set_env(proj, "B", "2") + patch = mut.set_env(proj, "A", "3") # overwrite A, keep B + assert patch["env"] == {"A": "3", "B": "2"} + + +# --- diagnostics ---------------------------------------------------------------------- + +def _mk_results(tmp_path): + proj = tmp_path + rd = proj / "results" / "0" + rd.mkdir(parents=True) + (proj / "results" / "latest").symlink_to("./0") + return str(proj), str(rd) + + +def test_latest_results_resolves_symlink(tmp_path): + proj, rd = _mk_results(tmp_path) + assert os.path.realpath(diag.latest_results(proj)) == os.path.realpath(rd) + + +def test_readers_parse_artifacts(tmp_path): + proj, rd = _mk_results(tmp_path) + with open(os.path.join(rd, "health_final.yaml"), "w") as f: + yaml.safe_dump({"nopanic": 1, "bound_sockets": 3}, f) + with open(os.path.join(rd, "env_missing.yaml"), "w") as f: + yaml.safe_dump(["sxid", "boardmodel"], f) + with open(os.path.join(rd, "pseudofiles_failures.yaml"), "w") as f: + yaml.safe_dump({"/dev/dsa": {"ioctl": 5}}, f) + with open(os.path.join(rd, "netbinds.csv"), "w") as f: + f.write("httpd,4,tcp,0.0.0.0,80\ntelnetd,4,tcp,0.0.0.0,23\n") + with open(os.path.join(rd, "console.log"), "w") as f: + f.write("boot ok\nKernel panic - not syncing: Attempted to kill init!\ndone\n") + + assert diag.read_health(proj_dir=proj)["health"]["bound_sockets"] == 3 + assert "sxid" in diag.read_missing_env(proj_dir=proj)["missing_env"] + assert diag.read_pseudofile_failures(proj_dir=proj)["pseudofile_failures"]["/dev/dsa"] + nb = diag.read_netbinds(proj_dir=proj) + assert nb["count"] == 2 and nb["netbinds"][0][0] == "httpd" + panic = diag.grep_console(proj_dir=proj, pattern="panic") + assert panic["total_matched"] == 1 and "Attempted to kill init" in panic["lines"][0] + + +def test_missing_file_returns_error_not_raise(tmp_path): + proj, _ = _mk_results(tmp_path) + assert "error" in diag.read_health(proj_dir=proj) # no health_final.yaml written + + +def test_db_query_readonly_guard_and_select(tmp_path): + proj, rd = _mk_results(tmp_path) + db = os.path.join(rd, "plugins.db") + con = sqlite3.connect(db) + con.execute("CREATE TABLE event (id INTEGER PRIMARY KEY, procname TEXT)") + con.execute("INSERT INTO event VALUES (1, 'httpd')") + con.commit() + con.close() + + assert diag.query_db("DROP TABLE event", proj_dir=proj)["error"] + assert diag.query_db("SELECT 1; DELETE FROM event", proj_dir=proj)["error"] + rows = diag.query_db("SELECT procname FROM event", proj_dir=proj) + assert rows["rows"][0]["procname"] == "httpd"