From f8113c5dfd5946e51b4c34c5a4f365bab0f85152 Mon Sep 17 00:00:00 2001 From: eanzhao Date: Mon, 27 Apr 2026 18:14:13 +0800 Subject: [PATCH 1/4] enhance(daily): structured-section prompt with omit-empty rule and repo-aware queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The single-paragraph "3-6 bullets" daily prompt produced thin reports and padded when sources were silent. Treat the prompt as a fetch-and-summarize spec: explicit section order (Title / Shipped / In flight / Reviews / Issues / CI / Trend / Blockers), hard per-section line budgets, and an "omit the section entirely if empty" rule. Switch the suggested GitHub queries to per-repo endpoints when `repositories=` is provided so the LLM doesn't fold the allowlist back into a global /search/* (which doesn't filter by repo cleanly). Pin the new schema and the empty-section rule with two AgentBuilderToolTests checks so future copy edits don't silently regress the spec. Refs #423 §A. Streaming-edit delivery (§B Option 2), batched delivery (§B Option 1), and the failure-notification cross-tenant fallback (§C) remain follow-ups. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../AgentBuilderTemplates.cs | 92 ++++++++++++++----- .../AgentBuilderToolTests.cs | 72 +++++++++++++++ 2 files changed, 141 insertions(+), 23 deletions(-) diff --git a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs index 9a9d63289..f998a6104 100644 --- a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs +++ b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs @@ -41,31 +41,11 @@ public static bool TryBuildDailyReportSpec( } var repoList = NormalizeRepositories(repositories); - var repoConstraint = repoList.Count == 0 - ? "Search across the user's recent GitHub activity." - : $"Focus on these repositories first: {string.Join(", ", repoList)}."; - - var skillPrompt = new StringBuilder() - .AppendLine("You are Aevatar Daily Report Runner.") - .AppendLine("Each run produces one concise Feishu-ready update for the user's recent work.") - .AppendLine("Use NyxID-backed tools only. Prefer nyxid_proxy with service slug `api-github` for GitHub data access.") - .AppendLine("Required output format:") - .AppendLine("1. A short title line") - .AppendLine("2. 3-6 concise bullet points") - .AppendLine("3. One closing line with blockers or `No blockers.`") - .AppendLine() - .AppendLine($"Primary GitHub username: {normalizedUser}") - .AppendLine(repoConstraint) - .AppendLine("Suggested GitHub proxy calls:") - .AppendLine("- GET /search/commits?q=author:{username}+author-date:>={iso_date}") - .AppendLine("- GET /search/issues?q=author:{username}+updated:>={iso_date}") - .AppendLine("- GET /search/issues?q=commenter:{username}+updated:>={iso_date}") - .AppendLine("If there is no meaningful activity, say so plainly instead of inventing progress.") - .ToString(); + var skillPrompt = BuildDailyReportSkillPrompt(normalizedUser, repoList); var executionPrompt = repoList.Count == 0 - ? $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Return plain text only." - : $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Prioritize repositories: {string.Join(", ", repoList)}. Return plain text only."; + ? $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Follow the section schema in the system prompt. Return plain text only." + : $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Restrict source queries to these repositories (one pass per repo, do not collapse to a global search): {string.Join(", ", repoList)}. Follow the section schema in the system prompt. Return plain text only."; spec = new DailyReportTemplateSpec( "daily_report", @@ -122,6 +102,72 @@ public static bool TryBuildSocialMediaSpec( return true; } + // Daily report system prompt is treated as a fetch-and-summarize SPECIFICATION rather than a + // freeform creative brief: explicit section order, hard per-section line budgets, and an + // "omit if empty" rule. See issue #423 for the rationale (current single-paragraph output is + // too thin and pads when sources are silent). + private static string BuildDailyReportSkillPrompt(string normalizedUser, IReadOnlyList repoList) + { + var repoScope = repoList.Count == 0 + ? "Repository scope: not pinned. Use the global GitHub search endpoints listed below." + : $"Repository scope: {string.Join(", ", repoList)}. Run the per-repo endpoints once per repo; do NOT fold the list into a global search query (the /search/* endpoints don't filter to a repo allowlist cleanly)."; + + var prompt = new StringBuilder() + .AppendLine("You are Aevatar Daily Report Runner.") + .AppendLine("Each run produces one Feishu-ready summary of the user's recent GitHub work over the last 24 hours.") + .AppendLine("Use NyxID-backed tools only. Prefer nyxid_proxy with service slug `api-github` for GitHub data access.") + .AppendLine() + .AppendLine($"Primary GitHub username: {normalizedUser}") + .AppendLine(repoScope) + .AppendLine() + .AppendLine("# Output sections (emit in this exact order)") + .AppendLine() + .AppendLine("Each section has a hard line budget. If a section has zero data OR the source is unavailable, OMIT THE SECTION ENTIRELY (header and body) — do not pad with `no activity` or filler.") + .AppendLine() + .AppendLine("1. Title (1 line) — `Daily report — {username} — last 24h`.") + .AppendLine("2. Shipped (≤6 lines) — PRs merged in the last 24h and commits to the default branch. Format `- [owner/repo#NNN] title`.") + .AppendLine("3. In flight (≤6 lines) — open PRs authored by the user. Append `(stale)` when the PR has had no activity for >24h.") + .AppendLine("4. Reviews (≤4 lines) — PRs the user reviewed in the window. Include kind counts, e.g. `approved 2 / requested-changes 1 / commented 3`.") + .AppendLine("5. Issues (≤4 lines) — issues opened, closed, or commented on by the user.") + .AppendLine("6. CI (≤3 lines) — failing builds on the default branch of the tracked repos (omit entirely when none are red).") + .AppendLine("7. Trend (1 line, optional) — running totals vs the prior 24h, e.g. `Trend: shipped 3 (+1), reviews 5 (-2)`. Omit when the prior-window data could not be fetched.") + .AppendLine("8. Blockers (1 line, always last) — `Blockers: ` or `No blockers.` Auto-detect from: PRs >24h waiting on a review, CI red >2h, issues with labels `blocked` or `needs-info`.") + .AppendLine() + .AppendLine("If the entire 24h window has no measurable activity across all sources, return ONLY the title line followed by `No measurable activity in the last 24h.` and nothing else.") + .AppendLine("Do not invent activity. Do not paraphrase issue or PR titles into different wording. Keep each line short — Feishu text messages have a body cap, prefer trimming trailing detail over exceeding it.") + .AppendLine() + .AppendLine("# Suggested GitHub proxy calls") + .AppendLine(); + + if (repoList.Count == 0) + { + prompt + .AppendLine("Repository allowlist not provided — use the global search endpoints (substitute `{iso_date}` with the start of the 24h window in ISO 8601 UTC):") + .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:merged+merged:>={iso_date} // shipped PRs") + .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:open // in flight") + .AppendLine("- GET /search/issues?q=reviewed-by:{username}+updated:>={iso_date} // reviews") + .AppendLine("- GET /search/issues?q=author:{username}+is:issue+created:>={iso_date} // issues opened") + .AppendLine("- GET /search/issues?q=author:{username}+is:issue+is:closed+closed:>={iso_date} // issues closed") + .AppendLine("- GET /search/issues?q=commenter:{username}+updated:>={iso_date} // issues commented"); + } + else + { + prompt + .AppendLine("Repository allowlist provided — run these per-repo (replace `{owner}/{repo}` with each entry, substitute `{iso_date}` with the start of the 24h window in ISO 8601 UTC). Do NOT collapse into one global query.") + .AppendLine("- GET /repos/{owner}/{repo}/pulls?state=closed&per_page=20 // filter merged_at >= {iso_date} client-side for shipped") + .AppendLine("- GET /repos/{owner}/{repo}/pulls?state=open&per_page=20 // filter author = {username} client-side for in flight") + .AppendLine("- GET /search/issues?q=reviewed-by:{username}+repo:{owner}/{repo}+updated:>={iso_date}") + .AppendLine("- GET /search/issues?q=author:{username}+repo:{owner}/{repo}+is:issue+updated:>={iso_date}") + .AppendLine("- GET /repos/{owner}/{repo}/actions/runs?branch={default_branch}&per_page=10 // filter conclusion=failure for CI section"); + } + + prompt + .AppendLine() + .AppendLine("If a query returns 4xx, 5xx, or empty, treat that source as zero for the affected section and continue — do not retry, do not fall back to invented data."); + + return prompt.ToString(); + } + private static string BuildSocialMediaWorkflowId(string agentId) => $"social-media-{SanitizeSegment(agentId)}"; diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs index 34f3ae433..27e149658 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs @@ -54,6 +54,78 @@ public async Task ExecuteAsync_ListTemplates_ReturnsDailyReportTemplate() } } + [Fact] + public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_AndOmitWhenEmptyRule() + { + // Pinning test for issue #423: the daily prompt is treated as a fetch-and-summarize + // SPEC, not a freeform brief. This test fails fast on copy edits that would silently + // regress the multi-section schema, the per-section line budgets, the "omit empty + // section" rule, or the "no measurable activity" empty-day fallback. + var ok = AgentBuilderTemplates.TryBuildDailyReportSpec( + githubUsername: "alice", + repositories: null, + out var spec, + out var error); + + ok.Should().BeTrue(); + error.Should().BeNull(); + spec.Should().NotBeNull(); + + var skillContent = spec!.SkillContent; + + // Structural sections must all be named in order; downstream LLM instructions hinge on + // the section labels matching exactly so the "Format `- [owner/repo#NNN] title`" + // micro-instructions land. + skillContent.Should().Contain("# Output sections"); + skillContent.Should().Contain("1. Title"); + skillContent.Should().Contain("2. Shipped"); + skillContent.Should().Contain("3. In flight"); + skillContent.Should().Contain("4. Reviews"); + skillContent.Should().Contain("5. Issues"); + skillContent.Should().Contain("6. CI"); + skillContent.Should().Contain("8. Blockers"); + + // Empty-handling rules — the bug we're guarding against is the LLM padding sections + // with "no activity in this area" boilerplate when sources are silent. + skillContent.Should().Contain("OMIT THE SECTION ENTIRELY"); + skillContent.Should().Contain("No measurable activity in the last 24h."); + skillContent.Should().Contain("Do not invent activity."); + + // Username substitution must remain intact (other tests check it under the saved-user + // / derived-user paths; this assertion guards the no-args path). + skillContent.Should().Contain("Primary GitHub username: alice"); + } + + [Fact] + public void TryBuildDailyReportSpec_RepoAllowlist_SwitchesToPerRepoQueryGuidance() + { + // Per issue #423: when `repositories=` is provided, the prompt must steer the LLM toward + // per-repo `/repos/{owner}/{repo}/...` calls and explicitly refuse the global-search + // path (those endpoints don't filter to a repo allowlist cleanly). A regression here + // would silently degrade multi-repo dailies back to noisy global-search behavior. + var ok = AgentBuilderTemplates.TryBuildDailyReportSpec( + githubUsername: "alice", + repositories: "acme/api, acme/web", + out var spec, + out var error); + + ok.Should().BeTrue(); + error.Should().BeNull(); + spec.Should().NotBeNull(); + + var skillContent = spec!.SkillContent; + skillContent.Should().Contain("Repository scope: acme/api, acme/web"); + skillContent.Should().Contain("Repository allowlist provided"); + skillContent.Should().Contain("/repos/{owner}/{repo}/pulls"); + skillContent.Should().Contain("/repos/{owner}/{repo}/actions/runs"); + skillContent.Should().Contain("Do NOT collapse into one global query."); + + // The execution prompt is what the runner sends per-trigger; it must echo the + // per-repo constraint so the LLM sees it on every run, not only at agent-create time. + spec.ExecutionPrompt.Should().Contain("acme/api, acme/web"); + spec.ExecutionPrompt.Should().Contain("one pass per repo"); + } + [Fact] public async Task ExecuteAsync_CreateAgent_RejectsGroupChats() { From e408253307bc5266a587183725373f3d8524569c Mon Sep 17 00:00:00 2001 From: eanzhao Date: Mon, 27 Apr 2026 21:27:27 +0800 Subject: [PATCH 2/4] address PR #458 review: tighten daily prompt against six review issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - §6 CI section: explicit OMIT in no-repo mode + the no-repo branch now states the CI gap inline; per-repo `/actions/runs` drops the `{default_branch}` placeholder and filters `conclusion=failure` + `created_at` client-side. - §8 Blockers: qualified as `always last unless the report is empty-day` and the empty-day fallback now explicitly says `do not append a Blockers line`. Resolves the contradiction a weaker model could synthesise both sides of. - Per-repo Shipped PRs: switch from `/repos/{owner}/{repo}/pulls?state=closed` to `/search/issues?q=repo:...+author:...+is:pr+is:merged+merged:>=...`. The list endpoint returned all contributors' closed-but-unmerged PRs (codex P1) and was unreliable on active repos with one-page pagination (eanzhao inline). Search form keys on merge time + author server-side. - §2 Shipped: add `/search/commits` queries in both modes so the "PRs merged AND commits authored" claim has a real source (codex P2). - Repo-mode Issues: add a `commenter:{username}` query so the "opened, closed, or commented on" schema isn't dropped by author-only filtering (codex P2). - Substitution variables: document `{username}`, `{iso_date}`, and `{owner}/{repo}` with concrete examples + a "do not leave literal placeholders" footer. The LLM was previously expected to infer the username substitution from "Primary GitHub username:" alone. - Tests: pin section 7 (Trend) slot in the order-pinning assertion (the schema position is load-bearing even when the section is optional). Pin the new commit query, the per-repo commenter query, the empty-day Blockers carve-out, the substitution-variable docs, and assert the dropped `/repos/{owner}/{repo}/pulls` + `{default_branch}` shapes are gone. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../AgentBuilderTemplates.cs | 34 ++++++----- .../AgentBuilderToolTests.cs | 58 ++++++++++++++++--- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs index f998a6104..ea77fed36 100644 --- a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs +++ b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs @@ -125,45 +125,53 @@ private static string BuildDailyReportSkillPrompt(string normalizedUser, IReadOn .AppendLine("Each section has a hard line budget. If a section has zero data OR the source is unavailable, OMIT THE SECTION ENTIRELY (header and body) — do not pad with `no activity` or filler.") .AppendLine() .AppendLine("1. Title (1 line) — `Daily report — {username} — last 24h`.") - .AppendLine("2. Shipped (≤6 lines) — PRs merged in the last 24h and commits to the default branch. Format `- [owner/repo#NNN] title`.") + .AppendLine("2. Shipped (≤6 lines) — PRs merged AND commits authored by the user in the window. Format `- [owner/repo#NNN] title` for PRs, `- [owner/repo@sha7] subject` for commits.") .AppendLine("3. In flight (≤6 lines) — open PRs authored by the user. Append `(stale)` when the PR has had no activity for >24h.") .AppendLine("4. Reviews (≤4 lines) — PRs the user reviewed in the window. Include kind counts, e.g. `approved 2 / requested-changes 1 / commented 3`.") .AppendLine("5. Issues (≤4 lines) — issues opened, closed, or commented on by the user.") - .AppendLine("6. CI (≤3 lines) — failing builds on the default branch of the tracked repos (omit entirely when none are red).") + .AppendLine("6. CI (≤3 lines) — failing GitHub Actions runs on the tracked repos. Best-effort and only feasible in repo-allowlist mode; OMIT this section in no-repo mode (the global search endpoints do not expose Actions run conclusions).") .AppendLine("7. Trend (1 line, optional) — running totals vs the prior 24h, e.g. `Trend: shipped 3 (+1), reviews 5 (-2)`. Omit when the prior-window data could not be fetched.") - .AppendLine("8. Blockers (1 line, always last) — `Blockers: ` or `No blockers.` Auto-detect from: PRs >24h waiting on a review, CI red >2h, issues with labels `blocked` or `needs-info`.") + .AppendLine("8. Blockers (1 line, always last unless the report is empty-day) — `Blockers: ` or `No blockers.` Auto-detect from: PRs >24h waiting on a review, CI red >2h, issues with labels `blocked` or `needs-info`.") .AppendLine() - .AppendLine("If the entire 24h window has no measurable activity across all sources, return ONLY the title line followed by `No measurable activity in the last 24h.` and nothing else.") + .AppendLine("If the entire 24h window has no measurable activity across all sources, return ONLY the title line followed by `No measurable activity in the last 24h.` and nothing else (do not append a Blockers line in this case).") .AppendLine("Do not invent activity. Do not paraphrase issue or PR titles into different wording. Keep each line short — Feishu text messages have a body cap, prefer trimming trailing detail over exceeding it.") .AppendLine() .AppendLine("# Suggested GitHub proxy calls") .AppendLine(); + prompt + .AppendLine($"Substitution variables in the URLs below: `{{username}}` → `{normalizedUser}`; `{{iso_date}}` → start of the 24h window in ISO 8601 UTC (e.g. `2026-04-26T09:00:00Z`); `{{owner}}/{{repo}}` → each entry from the repository allowlist. Always substitute these literally before sending.") + .AppendLine(); + if (repoList.Count == 0) { prompt - .AppendLine("Repository allowlist not provided — use the global search endpoints (substitute `{iso_date}` with the start of the 24h window in ISO 8601 UTC):") + .AppendLine("Repository allowlist not provided — use the global search endpoints:") .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:merged+merged:>={iso_date} // shipped PRs") + .AppendLine("- GET /search/commits?q=author:{username}+author-date:>={iso_date} // shipped commits") .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:open // in flight") .AppendLine("- GET /search/issues?q=reviewed-by:{username}+updated:>={iso_date} // reviews") .AppendLine("- GET /search/issues?q=author:{username}+is:issue+created:>={iso_date} // issues opened") .AppendLine("- GET /search/issues?q=author:{username}+is:issue+is:closed+closed:>={iso_date} // issues closed") - .AppendLine("- GET /search/issues?q=commenter:{username}+updated:>={iso_date} // issues commented"); + .AppendLine("- GET /search/issues?q=commenter:{username}+updated:>={iso_date} // issues commented") + .AppendLine("// CI section is omitted in no-repo mode: the global /search/* endpoints do not expose Actions run conclusions, and per-repo /actions/runs requires a known repo. Skip section 6 entirely."); } else { prompt - .AppendLine("Repository allowlist provided — run these per-repo (replace `{owner}/{repo}` with each entry, substitute `{iso_date}` with the start of the 24h window in ISO 8601 UTC). Do NOT collapse into one global query.") - .AppendLine("- GET /repos/{owner}/{repo}/pulls?state=closed&per_page=20 // filter merged_at >= {iso_date} client-side for shipped") - .AppendLine("- GET /repos/{owner}/{repo}/pulls?state=open&per_page=20 // filter author = {username} client-side for in flight") - .AppendLine("- GET /search/issues?q=reviewed-by:{username}+repo:{owner}/{repo}+updated:>={iso_date}") - .AppendLine("- GET /search/issues?q=author:{username}+repo:{owner}/{repo}+is:issue+updated:>={iso_date}") - .AppendLine("- GET /repos/{owner}/{repo}/actions/runs?branch={default_branch}&per_page=10 // filter conclusion=failure for CI section"); + .AppendLine("Repository allowlist provided — run these per-repo (one search per allowlist entry; do NOT collapse into one global query):") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:merged+merged:>={iso_date} // shipped PRs (search keys on merge time + author, reliable across pagination)") + .AppendLine("- GET /search/commits?q=repo:{owner}/{repo}+author:{username}+author-date:>={iso_date} // shipped commits") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:open // in flight") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+reviewed-by:{username}+updated:>={iso_date} // reviews") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:issue+updated:>={iso_date} // issues authored (created/closed)") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+commenter:{username}+is:issue+updated:>={iso_date} // issues commented") + .AppendLine("- GET /repos/{owner}/{repo}/actions/runs?per_page=10 // CI: filter `conclusion=failure` and `created_at >= {iso_date}` client-side; do NOT add a `branch=` filter (default branch varies; trim noise client-side instead)"); } prompt .AppendLine() - .AppendLine("If a query returns 4xx, 5xx, or empty, treat that source as zero for the affected section and continue — do not retry, do not fall back to invented data."); + .AppendLine("If a query returns 4xx, 5xx, or empty, treat that source as zero for the affected section and continue — do not retry, do not fall back to invented data. Do not leave any literal `{username}` / `{iso_date}` / `{owner}/{repo}` placeholders in outbound URLs."); return prompt.ToString(); } diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs index 27e149658..e856a9f70 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs @@ -73,9 +73,11 @@ public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_And var skillContent = spec!.SkillContent; - // Structural sections must all be named in order; downstream LLM instructions hinge on - // the section labels matching exactly so the "Format `- [owner/repo#NNN] title`" - // micro-instructions land. + // All eight section slots must be pinned in order — the section position itself is + // load-bearing for the LLM's emission order, even when section 7 (Trend) is optional + // (the schema slot still needs to exist so a model that fetches the prior window + // emits it in the right place). Skipping any number here would let copy edits + // silently drop or reorder a section. skillContent.Should().Contain("# Output sections"); skillContent.Should().Contain("1. Title"); skillContent.Should().Contain("2. Shipped"); @@ -83,6 +85,7 @@ public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_And skillContent.Should().Contain("4. Reviews"); skillContent.Should().Contain("5. Issues"); skillContent.Should().Contain("6. CI"); + skillContent.Should().Contain("7. Trend"); skillContent.Should().Contain("8. Blockers"); // Empty-handling rules — the bug we're guarding against is the LLM padding sections @@ -91,18 +94,37 @@ public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_And skillContent.Should().Contain("No measurable activity in the last 24h."); skillContent.Should().Contain("Do not invent activity."); + // Empty-day Blockers carve-out (codex review of PR #458): the §8 schema says + // "always last" but the empty-day fallback says "return ONLY the title line". This + // pin keeps both qualifiers present so a copy-edit can't reintroduce the + // contradiction by dropping either one. + skillContent.Should().Contain("always last unless the report is empty-day"); + skillContent.Should().Contain("do not append a Blockers line in this case"); + + // Substitution-variable documentation must be present and tied to the actual + // username; otherwise the LLM may emit literal `{username}` placeholders in URLs. + skillContent.Should().Contain("`{username}` → `alice`"); + skillContent.Should().Contain("`{iso_date}` → start of the 24h window"); + // Username substitution must remain intact (other tests check it under the saved-user // / derived-user paths; this assertion guards the no-args path). skillContent.Should().Contain("Primary GitHub username: alice"); + + // No-repo mode must include a commit query (Shipped section claims to cover commits) + // and must explicitly skip the CI section (no global Actions run endpoint exists). + skillContent.Should().Contain("/search/commits?q=author:{username}+author-date:>={iso_date}"); + skillContent.Should().Contain("CI section is omitted in no-repo mode"); } [Fact] public void TryBuildDailyReportSpec_RepoAllowlist_SwitchesToPerRepoQueryGuidance() { // Per issue #423: when `repositories=` is provided, the prompt must steer the LLM toward - // per-repo `/repos/{owner}/{repo}/...` calls and explicitly refuse the global-search - // path (those endpoints don't filter to a repo allowlist cleanly). A regression here - // would silently degrade multi-repo dailies back to noisy global-search behavior. + // per-repo searches and explicitly refuse the collapsed-allowlist global query. PR #458 + // review further required: shipped PRs must filter by author + merge time (search-API + // form, not /pulls?state=closed which is keyed on update time and ignores author), + // commit shipping must have its own source, repo-scoped issues must include the + // commenter case, and the CI query must not embed a {default_branch} placeholder. var ok = AgentBuilderTemplates.TryBuildDailyReportSpec( githubUsername: "alice", repositories: "acme/api, acme/web", @@ -116,9 +138,27 @@ public void TryBuildDailyReportSpec_RepoAllowlist_SwitchesToPerRepoQueryGuidance var skillContent = spec!.SkillContent; skillContent.Should().Contain("Repository scope: acme/api, acme/web"); skillContent.Should().Contain("Repository allowlist provided"); - skillContent.Should().Contain("/repos/{owner}/{repo}/pulls"); - skillContent.Should().Contain("/repos/{owner}/{repo}/actions/runs"); - skillContent.Should().Contain("Do NOT collapse into one global query."); + skillContent.Should().Contain("do NOT collapse into one global query"); + + // Shipped PRs in repo mode: search-API form keyed on author + merge time. The previous + // /repos/{owner}/{repo}/pulls?state=closed shape (a) returned closed-but-unmerged PRs + // and (b) had no reliable pagination across active repos — codex P1 + eanzhao inline + // both flagged this. Guard against regression. + skillContent.Should().Contain("/search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:merged+merged:>={iso_date}"); + skillContent.Should().NotContain("/repos/{owner}/{repo}/pulls?state=closed"); + + // Shipped commits in repo mode (Shipped section schema includes commits). + skillContent.Should().Contain("/search/commits?q=repo:{owner}/{repo}+author:{username}+author-date:>={iso_date}"); + + // Issues commented on in repo mode (codex P2: schema says "opened, closed, or + // commented on" but author-only query drops the commenter case). + skillContent.Should().Contain("/search/issues?q=repo:{owner}/{repo}+commenter:{username}+is:issue+updated:>={iso_date}"); + + // CI query must NOT embed a {default_branch} placeholder (the LLM has no way to fill + // it without an extra round-trip and a literal `{default_branch}` would land in the + // outbound URL). Filter conclusion + created_at client-side instead. + skillContent.Should().NotContain("{default_branch}"); + skillContent.Should().Contain("/repos/{owner}/{repo}/actions/runs?per_page=10"); // The execution prompt is what the runner sends per-trigger; it must echo the // per-repo constraint so the LLM sees it on every run, not only at agent-create time. From 36ea37beb1524c81642285eec942d2398842ef85 Mon Sep 17 00:00:00 2001 From: eanzhao Date: Mon, 27 Apr 2026 21:47:50 +0800 Subject: [PATCH 3/4] address PR #458 follow-up: don't collapse 4xx/5xx into empty-day fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eanzhao P1: the previous "4xx/5xx/empty → treat as zero" instruction silently masked transport/auth failures as healthy empty-day reports. A revoked GitHub OAuth grant or proxy outage would produce `No measurable activity in the last 24h.` with the agent left in a Running state — which is exactly the bug referenced in #439. Replace the catch-all with a tiered classification: - 2xx with an empty list → genuine zero data; section is omitted as before. - 4xx / 5xx / tool error envelope (`{"error": true, ...}`, revoked grant, proxy timeout) → source UNAVAILABLE, append `Source health: ...` footer below Blockers naming the failed sources. - Empty-day fallback (`No measurable activity in the last 24h.`) is now ONLY valid when EVERY source returned 2xx. If ANY source failed, emit at least the title plus the `Source health:` footer. Pin this distinction with three new test assertions plus a `NotContain` on the old wording, so future copy edits can't quietly regress the failure-handling behavior. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../AgentBuilderTemplates.cs | 8 +++++++- .../AgentBuilderToolTests.cs | 11 +++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs index ea77fed36..6f93972dd 100644 --- a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs +++ b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs @@ -171,7 +171,13 @@ private static string BuildDailyReportSkillPrompt(string normalizedUser, IReadOn prompt .AppendLine() - .AppendLine("If a query returns 4xx, 5xx, or empty, treat that source as zero for the affected section and continue — do not retry, do not fall back to invented data. Do not leave any literal `{username}` / `{iso_date}` / `{owner}/{repo}` placeholders in outbound URLs."); + .AppendLine("# Source health — distinguish empty results from source failures") + .AppendLine() + .AppendLine("Do NOT collapse transport, auth, or proxy failures into the empty-day fallback. Classify every tool result before mapping it to a section:") + .AppendLine("- 2xx with an empty list / no matching items → genuine zero data; the section is omitted per the schema.") + .AppendLine("- 4xx / 5xx / tool error envelope (e.g. `{\"error\": true, ...}`, revoked OAuth grant, proxy timeout) → the SOURCE is UNAVAILABLE, not zero. Append a final `Source health: ` line at the very bottom of the report (after Blockers).") + .AppendLine("- The empty-day fallback (`No measurable activity in the last 24h.`) is ONLY valid when EVERY source returned 2xx. If ANY source failed, emit at least the title plus a `Source health:` footer — silently masking credential expiration as `No measurable activity` is the bug we are guarding against.") + .AppendLine("- Do not retry. Do not fall back to invented data. Do not leave any literal `{username}` / `{iso_date}` / `{owner}/{repo}` placeholders in outbound URLs."); return prompt.ToString(); } diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs index e856a9f70..4e0d64228 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs @@ -101,6 +101,17 @@ public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_And skillContent.Should().Contain("always last unless the report is empty-day"); skillContent.Should().Contain("do not append a Blockers line in this case"); + // Source-health distinction (eanzhao P1 review of PR #458, refs issue #439): + // collapsing 4xx/5xx/error-shaped tool results into "zero data" silently masks + // revoked OAuth grants and proxy outages as healthy empty-day reports. Pin the + // 2xx-empty vs source-failure distinction AND the rule that the empty-day fallback + // only applies when every source returned 2xx, so a copy edit cannot regress this + // back to the original "4xx/5xx/empty → treat as zero" wording. + skillContent.Should().Contain("2xx with an empty list"); + skillContent.Should().Contain("Source health:"); + skillContent.Should().Contain("ONLY valid when EVERY source returned 2xx"); + skillContent.Should().NotContain("4xx, 5xx, or empty, treat that source as zero"); + // Substitution-variable documentation must be present and tied to the actual // username; otherwise the LLM may emit literal `{username}` placeholders in URLs. skillContent.Should().Contain("`{username}` → `alice`"); From 7c9c8e6bed389c32318adc8ab086c4de9d963e41 Mon Sep 17 00:00:00 2001 From: eanzhao Date: Mon, 27 Apr 2026 22:02:56 +0800 Subject: [PATCH 4/4] =?UTF-8?q?address=20PR=20#458=20follow-up:=20promote?= =?UTF-8?q?=20Source=20health=20to=20explicit=20=C2=A79=20footer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eanzhao P2: §8 Blockers said "always last unless empty-day" but the source-health rule required `Source health:` "at the very bottom after Blockers" — two incompatible ordering rules in exactly the failure case #439 cares about. A weaker model could drop the Source health footer or place it before Blockers. Make Source health an explicit §9 footer in the schema list: - §8 Blockers: drop the "always last" qualifier; pin as `Position-locked at slot 8; the only section that may sit below it is the §9 Source health footer.` - §9 Source health: 1-line conditional footer, emitted only when at least one source returned non-2xx. When emitted, always the final line. - Empty-day fallback: explicitly forbid emitting §8 Blockers AND §9 Source health on a genuine empty day (every source 2xx with no items). If any source failed, the empty-day path is invalid — emit at least title + §9. - The "Source health — distinguish empty results from source failures" detail block is reframed as the rule for emitting §9, not a parallel ordering claim. Test pins: assert `9. Source health` in the order-pinning list, pin the new "Position-locked at slot 8" + "only section that may sit below it" wording on §8, and pin the strict empty-day forbidding both §8 and §9. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../AgentBuilderTemplates.cs | 13 +++++----- .../AgentBuilderToolTests.cs | 24 +++++++++++-------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs index 6f93972dd..c7ef95427 100644 --- a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs +++ b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs @@ -131,9 +131,10 @@ private static string BuildDailyReportSkillPrompt(string normalizedUser, IReadOn .AppendLine("5. Issues (≤4 lines) — issues opened, closed, or commented on by the user.") .AppendLine("6. CI (≤3 lines) — failing GitHub Actions runs on the tracked repos. Best-effort and only feasible in repo-allowlist mode; OMIT this section in no-repo mode (the global search endpoints do not expose Actions run conclusions).") .AppendLine("7. Trend (1 line, optional) — running totals vs the prior 24h, e.g. `Trend: shipped 3 (+1), reviews 5 (-2)`. Omit when the prior-window data could not be fetched.") - .AppendLine("8. Blockers (1 line, always last unless the report is empty-day) — `Blockers: ` or `No blockers.` Auto-detect from: PRs >24h waiting on a review, CI red >2h, issues with labels `blocked` or `needs-info`.") + .AppendLine("8. Blockers (1 line) — `Blockers: ` or `No blockers.` Auto-detect from: PRs >24h waiting on a review, CI red >2h, issues with labels `blocked` or `needs-info`. Position-locked at slot 8; the only section that may sit below it is the §9 Source health footer.") + .AppendLine("9. Source health (1 line, footer) — `Source health: `. Emit ONLY when at least one source returned a non-2xx / error-shaped tool result. When emitted, this is always the final line — below Blockers, below everything.") .AppendLine() - .AppendLine("If the entire 24h window has no measurable activity across all sources, return ONLY the title line followed by `No measurable activity in the last 24h.` and nothing else (do not append a Blockers line in this case).") + .AppendLine("If EVERY source returned 2xx with no matching items (genuine empty day), return ONLY the title line followed by `No measurable activity in the last 24h.` and nothing else — do NOT emit Blockers or Source health. If ANY source failed, you are NOT on the empty-day path: emit at least the title line plus the §9 Source health footer (any other sections that have 2xx data render normally; §8 Blockers is also emitted).") .AppendLine("Do not invent activity. Do not paraphrase issue or PR titles into different wording. Keep each line short — Feishu text messages have a body cap, prefer trimming trailing detail over exceeding it.") .AppendLine() .AppendLine("# Suggested GitHub proxy calls") @@ -171,12 +172,12 @@ private static string BuildDailyReportSkillPrompt(string normalizedUser, IReadOn prompt .AppendLine() - .AppendLine("# Source health — distinguish empty results from source failures") + .AppendLine("# Source health — when to emit the §9 footer") .AppendLine() .AppendLine("Do NOT collapse transport, auth, or proxy failures into the empty-day fallback. Classify every tool result before mapping it to a section:") - .AppendLine("- 2xx with an empty list / no matching items → genuine zero data; the section is omitted per the schema.") - .AppendLine("- 4xx / 5xx / tool error envelope (e.g. `{\"error\": true, ...}`, revoked OAuth grant, proxy timeout) → the SOURCE is UNAVAILABLE, not zero. Append a final `Source health: ` line at the very bottom of the report (after Blockers).") - .AppendLine("- The empty-day fallback (`No measurable activity in the last 24h.`) is ONLY valid when EVERY source returned 2xx. If ANY source failed, emit at least the title plus a `Source health:` footer — silently masking credential expiration as `No measurable activity` is the bug we are guarding against.") + .AppendLine("- 2xx with an empty list / no matching items → genuine zero data; the section is omitted per the schema. Does NOT trigger §9.") + .AppendLine("- 4xx / 5xx / tool error envelope (e.g. `{\"error\": true, ...}`, revoked OAuth grant, proxy timeout) → the SOURCE is UNAVAILABLE, not zero. Add the source name + short reason to the §9 Source health footer.") + .AppendLine("- The empty-day fallback (`No measurable activity in the last 24h.`) is ONLY valid when EVERY source returned 2xx. If ANY source failed, you are NOT on the empty-day path — emit the title plus the §9 Source health footer at minimum. Silently masking credential expiration as `No measurable activity` is the bug we are guarding against.") .AppendLine("- Do not retry. Do not fall back to invented data. Do not leave any literal `{username}` / `{iso_date}` / `{owner}/{repo}` placeholders in outbound URLs."); return prompt.ToString(); diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs index 4e0d64228..1061fdbf1 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs @@ -73,11 +73,10 @@ public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_And var skillContent = spec!.SkillContent; - // All eight section slots must be pinned in order — the section position itself is + // All nine section slots must be pinned in order — the section position itself is // load-bearing for the LLM's emission order, even when section 7 (Trend) is optional - // (the schema slot still needs to exist so a model that fetches the prior window - // emits it in the right place). Skipping any number here would let copy edits - // silently drop or reorder a section. + // and section 9 (Source health) is conditional. Skipping any number here would let + // copy edits silently drop or reorder a section. skillContent.Should().Contain("# Output sections"); skillContent.Should().Contain("1. Title"); skillContent.Should().Contain("2. Shipped"); @@ -87,6 +86,7 @@ public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_And skillContent.Should().Contain("6. CI"); skillContent.Should().Contain("7. Trend"); skillContent.Should().Contain("8. Blockers"); + skillContent.Should().Contain("9. Source health"); // Empty-handling rules — the bug we're guarding against is the LLM padding sections // with "no activity in this area" boilerplate when sources are silent. @@ -94,12 +94,16 @@ public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_And skillContent.Should().Contain("No measurable activity in the last 24h."); skillContent.Should().Contain("Do not invent activity."); - // Empty-day Blockers carve-out (codex review of PR #458): the §8 schema says - // "always last" but the empty-day fallback says "return ONLY the title line". This - // pin keeps both qualifiers present so a copy-edit can't reintroduce the - // contradiction by dropping either one. - skillContent.Should().Contain("always last unless the report is empty-day"); - skillContent.Should().Contain("do not append a Blockers line in this case"); + // Section ordering must be unambiguous when both §8 Blockers and §9 Source health are + // present (eanzhao P2 review of PR #458, second pass): the previous "always last" + // qualifier on §8 conflicted with "Source health at the very bottom after Blockers". + // Promote §9 to a real schema slot and pin §8 as position-locked at slot 8 with §9 + // as the only section permitted below. + skillContent.Should().Contain("Position-locked at slot 8"); + skillContent.Should().Contain("the only section that may sit below it is the §9 Source health footer"); + // The empty-day fallback must explicitly forbid both §8 Blockers AND §9 Source health + // emission, so a weaker model cannot synthesize a footer onto a genuine empty day. + skillContent.Should().Contain("do NOT emit Blockers or Source health"); // Source-health distinction (eanzhao P1 review of PR #458, refs issue #439): // collapsing 4xx/5xx/error-shaped tool results into "zero data" silently masks