diff --git a/README.md b/README.md index d2058fb..d10e13d 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,9 @@ # openclaw-task-watchdog -[![npm version](https://img.shields.io/npm/v/openclaw-task-watchdog.svg)](https://www.npmjs.com/package/openclaw-task-watchdog) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -[![OpenClaw Plugin](https://img.shields.io/badge/OpenClaw-Plugin-blueviolet)](https://github.com/nicepkg/openclaw) +[![OpenClaw Plugin](https://img.shields.io/badge/OpenClaw-Plugin-blueviolet)](https://github.com/openclaw/openclaw) -**OpenClaw Task Watchdog Plugin** — Auto-notify on subagent failures, exec errors, and stale tasks. +**OpenClaw Task Watchdog Plugin** - Auto-notify on subagent failures, exec errors, and stale tasks. [中文说明](#中文说明) @@ -17,11 +16,11 @@ OpenClaw excels at dispatching subagents and running long tasks via `exec`. But | Pain Point | What Happens | |-----------|-------------| | **Silent failures** | A subagent crashes or times out, but the parent session never finds out | -| **Forgotten tasks** | An `exec` command exits with error code 137 (OOM) — nobody notices | +| **Forgotten tasks** | An `exec` command exits with error code 137 (OOM) - nobody notices | | **Stale jobs** | A background task has been "running" for 45 minutes with no progress | | **Manual checking** | Users repeatedly ask "is it done yet?" instead of getting proactive alerts | -**Task Watchdog** bridges this gap by monitoring task lifecycle events and injecting timely notifications into the parent session — so you always know when something needs attention. +**Task Watchdog** bridges this gap by monitoring task lifecycle events and injecting timely notifications into the parent session - so you always know when something needs attention. ## Architecture @@ -52,7 +51,7 @@ OpenClaw excels at dispatching subagents and running long tasks via `exec`. But | Hook | What it does | |------|-------------| | **`subagent_ended`** | Detects abnormal subagent outcomes (error, timeout, killed, reset, deleted) and notifies the parent session. Sends continuation reminders on normal completions. | -| **`after_tool_call` (exec)** | Watches for abnormal `exec` exits — non-zero exit codes, OOM kills, signals, permission denied, command not found. | +| **`after_tool_call` (exec)** | Watches for abnormal `exec` exits - non-zero exit codes, OOM kills, signals, permission denied, command not found. | | **`heartbeat_prompt_contribution`** | When timer patrol is off, injects patrol instructions into heartbeat cycles to check for stale running tasks. | | **`gateway_start`** | Starts a timer-based patrol that periodically requests heartbeats to trigger stale-task checks. | | **`message_received`** | Records user message timestamps for silence detection. Resets consecutive tool call counter. | @@ -75,8 +74,14 @@ The plugin detects two types of agent silence: ## Installation ```bash -# Via OpenClaw plugin install -openclaw plugin install openclaw-task-watchdog +# From this checkout before npm publish +openclaw plugins install . +``` + +After the npm package is published: + +```bash +openclaw plugins install openclaw-task-watchdog # Via npm npm install openclaw-task-watchdog @@ -90,14 +95,16 @@ All settings are optional. Configure via `openclaw.plugin.json` → `config`: |-------|------|---------|-------------| | `subagentNotifyOn` | `string[]` | `["error", "timeout", "killed"]` | Subagent outcomes that trigger notifications. Options: `error`, `timeout`, `killed`, `reset`, `deleted` | | `execNotifyOnAbnormal` | `boolean` | `true` | Enable notifications on abnormal exec exits | -| `injectionTtlMs` | `integer` | `300000` (5 min) | TTL for next-turn injection messages (5000–600000 ms) | +| `injectionTtlMs` | `integer` | `300000` (5 min) | TTL for next-turn injection messages (5000 to 600000 ms) | | `timerPatrol` | `boolean` | `true` | Enable timer-based patrol on gateway start | | `heartbeatPatrol` | `boolean` | `false` | Enable heartbeat-based patrol (only when timerPatrol is disabled) | -| `timerPatrolIntervalMs` | `integer` | `120000` (2 min) | Timer patrol interval (30000–600000 ms) | -| `staleThresholdMs` | `integer` | `1800000` (30 min) | How long before a task is considered stale (60000–7200000 ms) | -| `consecutiveToolCallThreshold` | `integer` | `5` | Number of consecutive tool calls without a reply before triggering a nudge (2–20) | +| `timerPatrolIntervalMs` | `integer` | `120000` (2 min) | Timer patrol interval (30000 to 600000 ms) | +| `staleThresholdMs` | `integer` | `1800000` (30 min) | How long before a task is considered stale (60000 to 7200000 ms) | +| `consecutiveToolCallThreshold` | `integer` | `5` | Number of consecutive tool calls without a reply before triggering a nudge (2 to 20) | | `subagentConsecutiveThreshold` | `integer` | `15` | Consecutive tool call threshold for subagent sessions. Defaults to `consecutiveToolCallThreshold * 3` if not set | -| `silenceThresholdMs` | `integer` | `180000` (3 min) | How long after a user message without reply before triggering a silence nudge (60000–1800000 ms) | +| `silenceThresholdMs` | `integer` | `180000` (3 min) | How long after a user message without reply before triggering a silence nudge (60000 to 1800000 ms) | +| `feishuWebhookUrl` | `string` | `""` | Optional Feishu bot webhook URL for direct delivery | +| `forceFeishu` | `boolean` | `false` | Also send notifications to Feishu when `feishuWebhookUrl` is configured. System event delivery stays enabled | Example: @@ -106,11 +113,15 @@ Example: "task-watchdog": { "subagentNotifyOn": ["error", "timeout", "killed", "reset"], "timerPatrolIntervalMs": 180000, - "staleThresholdMs": 900000 + "staleThresholdMs": 900000, + "feishuWebhookUrl": "", + "forceFeishu": false } } ``` +Leave `forceFeishu` as `false` to use only OpenClaw system-event delivery. Set it to `true` when you want direct Feishu bot delivery as a second path. Feishu webhook failures are logged with `warn` and do not stop the system-event channel. + ## Development ```bash @@ -147,11 +158,12 @@ OpenClaw 通过子 agent 或 `exec` 执行长任务时,失败可能被忽略 ### 安装 ```bash -openclaw plugin install openclaw-task-watchdog +openclaw plugins install . ``` ### 开发 ```bash -npm install && npx tsc +npm install +npm run build ``` diff --git a/SKILL.md b/SKILL.md index 773076f..6f9447e 100644 --- a/SKILL.md +++ b/SKILL.md @@ -12,12 +12,12 @@ OpenClaw plugin that auto-notifies on subagent failures, exec errors, and stale ## Install ```bash -openclaw plugin install openclaw-task-watchdog +openclaw plugins install . ``` ## Config -All optional — works with defaults: +All optional - works with defaults: ```json { @@ -26,7 +26,11 @@ All optional — works with defaults: "execNotifyOnAbnormal": true, "timerPatrol": true, "timerPatrolIntervalMs": 120000, - "staleThresholdMs": 1800000 + "staleThresholdMs": 1800000, + "feishuWebhookUrl": "", + "forceFeishu": false } } ``` + +Set `forceFeishu` to `true` only after adding a Feishu bot webhook URL. Webhook failures are logged with `warn`; normal OpenClaw system-event notifications continue. diff --git a/openclaw.plugin.json b/openclaw.plugin.json index 407682e..47fe692 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -74,6 +74,16 @@ "maximum": 1800000, "default": 180000, "description": "How long after a user message without reply before triggering a silence nudge (default 3 minutes)" + }, + "feishuWebhookUrl": { + "type": "string", + "default": "", + "description": "Optional Feishu bot webhook URL for direct delivery. Leave empty to use only OpenClaw system events." + }, + "forceFeishu": { + "type": "boolean", + "default": false, + "description": "Also send notifications directly to Feishu when feishuWebhookUrl is configured. System event delivery remains enabled." } } } diff --git a/package.json b/package.json index 4823bbd..8a42000 100644 --- a/package.json +++ b/package.json @@ -3,6 +3,11 @@ "version": "1.4.0", "description": "Watchdog plugin: notify parent agent on subagent failure / exec abnormal exit via next-turn injection", "type": "module", + "scripts": { + "build": "tsc", + "typecheck": "tsc --noEmit", + "pack:dry-run": "npm pack --dry-run --json" + }, "exports": { ".": { "import": "./dist/index.mjs" diff --git a/src/index.mts b/src/index.mts index 08faeff..f305baf 100644 --- a/src/index.mts +++ b/src/index.mts @@ -16,6 +16,8 @@ type WatchdogConfig = { consecutiveToolCallThreshold?: number; subagentConsecutiveThreshold?: number; silenceThresholdMs?: number; + feishuWebhookUrl?: string; + forceFeishu?: boolean; }; // ── Constants ────────────────────────────────────────────────────────────── @@ -109,6 +111,25 @@ export default definePluginEntry({ return `已等待 ${formatDuration(elapsed)}`; } + async function notifyViaFeishu(webhookUrl: string, text: string): Promise { + try { + const response = await fetch(webhookUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + msg_type: "text", + content: { text }, + }), + }); + + if (!response.ok) { + log.warn(`[watchdog] Feishu webhook returned HTTP ${response.status}`); + } + } catch (err) { + log.warn(`[watchdog] Feishu webhook failed: ${err instanceof Error ? err.message : String(err)}`); + } + } + // ── Idempotency guard ────────────────────────────────────────────────── const idempotencyCleanupTimer = setInterval(() => { @@ -144,6 +165,8 @@ export default definePluginEntry({ sessionKey: string, critical: boolean = false, ): Promise { + const config = (api.pluginConfig as WatchdogConfig) ?? {}; + if (isNotified(idempotencyKey)) { log.debug(`[watchdog] already notified → ${idempotencyKey}`); return false; @@ -182,6 +205,12 @@ export default definePluginEntry({ pendingAlerts.splice(0, pendingAlerts.length - PENDING_ALERTS_MAX); } + // Optional Feishu direct delivery, disabled by default. + const feishuWebhookUrl = config.feishuWebhookUrl?.trim(); + if (config.forceFeishu === true && feishuWebhookUrl) { + await notifyViaFeishu(feishuWebhookUrl, safeText); + } + // ── Path B: forced delivery for critical alerts ── if (critical) { api.runtime?.system?.runHeartbeatOnce?.({