diff --git a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs index 9a9d63289..c7ef95427 100644 --- a/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs +++ b/agents/Aevatar.GAgents.ChannelRuntime/AgentBuilderTemplates.cs @@ -41,31 +41,11 @@ public static bool TryBuildDailyReportSpec( } var repoList = NormalizeRepositories(repositories); - var repoConstraint = repoList.Count == 0 - ? "Search across the user's recent GitHub activity." - : $"Focus on these repositories first: {string.Join(", ", repoList)}."; - - var skillPrompt = new StringBuilder() - .AppendLine("You are Aevatar Daily Report Runner.") - .AppendLine("Each run produces one concise Feishu-ready update for the user's recent work.") - .AppendLine("Use NyxID-backed tools only. Prefer nyxid_proxy with service slug `api-github` for GitHub data access.") - .AppendLine("Required output format:") - .AppendLine("1. A short title line") - .AppendLine("2. 3-6 concise bullet points") - .AppendLine("3. One closing line with blockers or `No blockers.`") - .AppendLine() - .AppendLine($"Primary GitHub username: {normalizedUser}") - .AppendLine(repoConstraint) - .AppendLine("Suggested GitHub proxy calls:") - .AppendLine("- GET /search/commits?q=author:{username}+author-date:>={iso_date}") - .AppendLine("- GET /search/issues?q=author:{username}+updated:>={iso_date}") - .AppendLine("- GET /search/issues?q=commenter:{username}+updated:>={iso_date}") - .AppendLine("If there is no meaningful activity, say so plainly instead of inventing progress.") - .ToString(); + var skillPrompt = BuildDailyReportSkillPrompt(normalizedUser, repoList); var executionPrompt = repoList.Count == 0 - ? $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Return plain text only." - : $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Prioritize repositories: {string.Join(", ", repoList)}. Return plain text only."; + ? $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Follow the section schema in the system prompt. Return plain text only." + : $"Run the daily report for GitHub user `{normalizedUser}` covering the last 24 hours. Restrict source queries to these repositories (one pass per repo, do not collapse to a global search): {string.Join(", ", repoList)}. Follow the section schema in the system prompt. Return plain text only."; spec = new DailyReportTemplateSpec( "daily_report", @@ -122,6 +102,87 @@ public static bool TryBuildSocialMediaSpec( return true; } + // Daily report system prompt is treated as a fetch-and-summarize SPECIFICATION rather than a + // freeform creative brief: explicit section order, hard per-section line budgets, and an + // "omit if empty" rule. See issue #423 for the rationale (current single-paragraph output is + // too thin and pads when sources are silent). + private static string BuildDailyReportSkillPrompt(string normalizedUser, IReadOnlyList repoList) + { + var repoScope = repoList.Count == 0 + ? "Repository scope: not pinned. Use the global GitHub search endpoints listed below." + : $"Repository scope: {string.Join(", ", repoList)}. Run the per-repo endpoints once per repo; do NOT fold the list into a global search query (the /search/* endpoints don't filter to a repo allowlist cleanly)."; + + var prompt = new StringBuilder() + .AppendLine("You are Aevatar Daily Report Runner.") + .AppendLine("Each run produces one Feishu-ready summary of the user's recent GitHub work over the last 24 hours.") + .AppendLine("Use NyxID-backed tools only. Prefer nyxid_proxy with service slug `api-github` for GitHub data access.") + .AppendLine() + .AppendLine($"Primary GitHub username: {normalizedUser}") + .AppendLine(repoScope) + .AppendLine() + .AppendLine("# Output sections (emit in this exact order)") + .AppendLine() + .AppendLine("Each section has a hard line budget. If a section has zero data OR the source is unavailable, OMIT THE SECTION ENTIRELY (header and body) — do not pad with `no activity` or filler.") + .AppendLine() + .AppendLine("1. Title (1 line) — `Daily report — {username} — last 24h`.") + .AppendLine("2. Shipped (≤6 lines) — PRs merged AND commits authored by the user in the window. Format `- [owner/repo#NNN] title` for PRs, `- [owner/repo@sha7] subject` for commits.") + .AppendLine("3. In flight (≤6 lines) — open PRs authored by the user. Append `(stale)` when the PR has had no activity for >24h.") + .AppendLine("4. Reviews (≤4 lines) — PRs the user reviewed in the window. Include kind counts, e.g. `approved 2 / requested-changes 1 / commented 3`.") + .AppendLine("5. Issues (≤4 lines) — issues opened, closed, or commented on by the user.") + .AppendLine("6. CI (≤3 lines) — failing GitHub Actions runs on the tracked repos. Best-effort and only feasible in repo-allowlist mode; OMIT this section in no-repo mode (the global search endpoints do not expose Actions run conclusions).") + .AppendLine("7. Trend (1 line, optional) — running totals vs the prior 24h, e.g. `Trend: shipped 3 (+1), reviews 5 (-2)`. Omit when the prior-window data could not be fetched.") + .AppendLine("8. Blockers (1 line) — `Blockers: ` or `No blockers.` Auto-detect from: PRs >24h waiting on a review, CI red >2h, issues with labels `blocked` or `needs-info`. Position-locked at slot 8; the only section that may sit below it is the §9 Source health footer.") + .AppendLine("9. Source health (1 line, footer) — `Source health: `. Emit ONLY when at least one source returned a non-2xx / error-shaped tool result. When emitted, this is always the final line — below Blockers, below everything.") + .AppendLine() + .AppendLine("If EVERY source returned 2xx with no matching items (genuine empty day), return ONLY the title line followed by `No measurable activity in the last 24h.` and nothing else — do NOT emit Blockers or Source health. If ANY source failed, you are NOT on the empty-day path: emit at least the title line plus the §9 Source health footer (any other sections that have 2xx data render normally; §8 Blockers is also emitted).") + .AppendLine("Do not invent activity. Do not paraphrase issue or PR titles into different wording. Keep each line short — Feishu text messages have a body cap, prefer trimming trailing detail over exceeding it.") + .AppendLine() + .AppendLine("# Suggested GitHub proxy calls") + .AppendLine(); + + prompt + .AppendLine($"Substitution variables in the URLs below: `{{username}}` → `{normalizedUser}`; `{{iso_date}}` → start of the 24h window in ISO 8601 UTC (e.g. `2026-04-26T09:00:00Z`); `{{owner}}/{{repo}}` → each entry from the repository allowlist. Always substitute these literally before sending.") + .AppendLine(); + + if (repoList.Count == 0) + { + prompt + .AppendLine("Repository allowlist not provided — use the global search endpoints:") + .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:merged+merged:>={iso_date} // shipped PRs") + .AppendLine("- GET /search/commits?q=author:{username}+author-date:>={iso_date} // shipped commits") + .AppendLine("- GET /search/issues?q=author:{username}+is:pr+is:open // in flight") + .AppendLine("- GET /search/issues?q=reviewed-by:{username}+updated:>={iso_date} // reviews") + .AppendLine("- GET /search/issues?q=author:{username}+is:issue+created:>={iso_date} // issues opened") + .AppendLine("- GET /search/issues?q=author:{username}+is:issue+is:closed+closed:>={iso_date} // issues closed") + .AppendLine("- GET /search/issues?q=commenter:{username}+updated:>={iso_date} // issues commented") + .AppendLine("// CI section is omitted in no-repo mode: the global /search/* endpoints do not expose Actions run conclusions, and per-repo /actions/runs requires a known repo. Skip section 6 entirely."); + } + else + { + prompt + .AppendLine("Repository allowlist provided — run these per-repo (one search per allowlist entry; do NOT collapse into one global query):") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:merged+merged:>={iso_date} // shipped PRs (search keys on merge time + author, reliable across pagination)") + .AppendLine("- GET /search/commits?q=repo:{owner}/{repo}+author:{username}+author-date:>={iso_date} // shipped commits") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:open // in flight") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+reviewed-by:{username}+updated:>={iso_date} // reviews") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+author:{username}+is:issue+updated:>={iso_date} // issues authored (created/closed)") + .AppendLine("- GET /search/issues?q=repo:{owner}/{repo}+commenter:{username}+is:issue+updated:>={iso_date} // issues commented") + .AppendLine("- GET /repos/{owner}/{repo}/actions/runs?per_page=10 // CI: filter `conclusion=failure` and `created_at >= {iso_date}` client-side; do NOT add a `branch=` filter (default branch varies; trim noise client-side instead)"); + } + + prompt + .AppendLine() + .AppendLine("# Source health — when to emit the §9 footer") + .AppendLine() + .AppendLine("Do NOT collapse transport, auth, or proxy failures into the empty-day fallback. Classify every tool result before mapping it to a section:") + .AppendLine("- 2xx with an empty list / no matching items → genuine zero data; the section is omitted per the schema. Does NOT trigger §9.") + .AppendLine("- 4xx / 5xx / tool error envelope (e.g. `{\"error\": true, ...}`, revoked OAuth grant, proxy timeout) → the SOURCE is UNAVAILABLE, not zero. Add the source name + short reason to the §9 Source health footer.") + .AppendLine("- The empty-day fallback (`No measurable activity in the last 24h.`) is ONLY valid when EVERY source returned 2xx. If ANY source failed, you are NOT on the empty-day path — emit the title plus the §9 Source health footer at minimum. Silently masking credential expiration as `No measurable activity` is the bug we are guarding against.") + .AppendLine("- Do not retry. Do not fall back to invented data. Do not leave any literal `{username}` / `{iso_date}` / `{owner}/{repo}` placeholders in outbound URLs."); + + return prompt.ToString(); + } + private static string BuildSocialMediaWorkflowId(string agentId) => $"social-media-{SanitizeSegment(agentId)}"; diff --git a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs index 34f3ae433..1061fdbf1 100644 --- a/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs +++ b/test/Aevatar.GAgents.ChannelRuntime.Tests/AgentBuilderToolTests.cs @@ -54,6 +54,133 @@ public async Task ExecuteAsync_ListTemplates_ReturnsDailyReportTemplate() } } + [Fact] + public void TryBuildDailyReportSpec_SkillContent_PinsStructuredSectionSchema_AndOmitWhenEmptyRule() + { + // Pinning test for issue #423: the daily prompt is treated as a fetch-and-summarize + // SPEC, not a freeform brief. This test fails fast on copy edits that would silently + // regress the multi-section schema, the per-section line budgets, the "omit empty + // section" rule, or the "no measurable activity" empty-day fallback. + var ok = AgentBuilderTemplates.TryBuildDailyReportSpec( + githubUsername: "alice", + repositories: null, + out var spec, + out var error); + + ok.Should().BeTrue(); + error.Should().BeNull(); + spec.Should().NotBeNull(); + + var skillContent = spec!.SkillContent; + + // All nine section slots must be pinned in order — the section position itself is + // load-bearing for the LLM's emission order, even when section 7 (Trend) is optional + // and section 9 (Source health) is conditional. Skipping any number here would let + // copy edits silently drop or reorder a section. + skillContent.Should().Contain("# Output sections"); + skillContent.Should().Contain("1. Title"); + skillContent.Should().Contain("2. Shipped"); + skillContent.Should().Contain("3. In flight"); + skillContent.Should().Contain("4. Reviews"); + skillContent.Should().Contain("5. Issues"); + skillContent.Should().Contain("6. CI"); + skillContent.Should().Contain("7. Trend"); + skillContent.Should().Contain("8. Blockers"); + skillContent.Should().Contain("9. Source health"); + + // Empty-handling rules — the bug we're guarding against is the LLM padding sections + // with "no activity in this area" boilerplate when sources are silent. + skillContent.Should().Contain("OMIT THE SECTION ENTIRELY"); + skillContent.Should().Contain("No measurable activity in the last 24h."); + skillContent.Should().Contain("Do not invent activity."); + + // Section ordering must be unambiguous when both §8 Blockers and §9 Source health are + // present (eanzhao P2 review of PR #458, second pass): the previous "always last" + // qualifier on §8 conflicted with "Source health at the very bottom after Blockers". + // Promote §9 to a real schema slot and pin §8 as position-locked at slot 8 with §9 + // as the only section permitted below. + skillContent.Should().Contain("Position-locked at slot 8"); + skillContent.Should().Contain("the only section that may sit below it is the §9 Source health footer"); + // The empty-day fallback must explicitly forbid both §8 Blockers AND §9 Source health + // emission, so a weaker model cannot synthesize a footer onto a genuine empty day. + skillContent.Should().Contain("do NOT emit Blockers or Source health"); + + // Source-health distinction (eanzhao P1 review of PR #458, refs issue #439): + // collapsing 4xx/5xx/error-shaped tool results into "zero data" silently masks + // revoked OAuth grants and proxy outages as healthy empty-day reports. Pin the + // 2xx-empty vs source-failure distinction AND the rule that the empty-day fallback + // only applies when every source returned 2xx, so a copy edit cannot regress this + // back to the original "4xx/5xx/empty → treat as zero" wording. + skillContent.Should().Contain("2xx with an empty list"); + skillContent.Should().Contain("Source health:"); + skillContent.Should().Contain("ONLY valid when EVERY source returned 2xx"); + skillContent.Should().NotContain("4xx, 5xx, or empty, treat that source as zero"); + + // Substitution-variable documentation must be present and tied to the actual + // username; otherwise the LLM may emit literal `{username}` placeholders in URLs. + skillContent.Should().Contain("`{username}` → `alice`"); + skillContent.Should().Contain("`{iso_date}` → start of the 24h window"); + + // Username substitution must remain intact (other tests check it under the saved-user + // / derived-user paths; this assertion guards the no-args path). + skillContent.Should().Contain("Primary GitHub username: alice"); + + // No-repo mode must include a commit query (Shipped section claims to cover commits) + // and must explicitly skip the CI section (no global Actions run endpoint exists). + skillContent.Should().Contain("/search/commits?q=author:{username}+author-date:>={iso_date}"); + skillContent.Should().Contain("CI section is omitted in no-repo mode"); + } + + [Fact] + public void TryBuildDailyReportSpec_RepoAllowlist_SwitchesToPerRepoQueryGuidance() + { + // Per issue #423: when `repositories=` is provided, the prompt must steer the LLM toward + // per-repo searches and explicitly refuse the collapsed-allowlist global query. PR #458 + // review further required: shipped PRs must filter by author + merge time (search-API + // form, not /pulls?state=closed which is keyed on update time and ignores author), + // commit shipping must have its own source, repo-scoped issues must include the + // commenter case, and the CI query must not embed a {default_branch} placeholder. + var ok = AgentBuilderTemplates.TryBuildDailyReportSpec( + githubUsername: "alice", + repositories: "acme/api, acme/web", + out var spec, + out var error); + + ok.Should().BeTrue(); + error.Should().BeNull(); + spec.Should().NotBeNull(); + + var skillContent = spec!.SkillContent; + skillContent.Should().Contain("Repository scope: acme/api, acme/web"); + skillContent.Should().Contain("Repository allowlist provided"); + skillContent.Should().Contain("do NOT collapse into one global query"); + + // Shipped PRs in repo mode: search-API form keyed on author + merge time. The previous + // /repos/{owner}/{repo}/pulls?state=closed shape (a) returned closed-but-unmerged PRs + // and (b) had no reliable pagination across active repos — codex P1 + eanzhao inline + // both flagged this. Guard against regression. + skillContent.Should().Contain("/search/issues?q=repo:{owner}/{repo}+author:{username}+is:pr+is:merged+merged:>={iso_date}"); + skillContent.Should().NotContain("/repos/{owner}/{repo}/pulls?state=closed"); + + // Shipped commits in repo mode (Shipped section schema includes commits). + skillContent.Should().Contain("/search/commits?q=repo:{owner}/{repo}+author:{username}+author-date:>={iso_date}"); + + // Issues commented on in repo mode (codex P2: schema says "opened, closed, or + // commented on" but author-only query drops the commenter case). + skillContent.Should().Contain("/search/issues?q=repo:{owner}/{repo}+commenter:{username}+is:issue+updated:>={iso_date}"); + + // CI query must NOT embed a {default_branch} placeholder (the LLM has no way to fill + // it without an extra round-trip and a literal `{default_branch}` would land in the + // outbound URL). Filter conclusion + created_at client-side instead. + skillContent.Should().NotContain("{default_branch}"); + skillContent.Should().Contain("/repos/{owner}/{repo}/actions/runs?per_page=10"); + + // The execution prompt is what the runner sends per-trigger; it must echo the + // per-repo constraint so the LLM sees it on every run, not only at agent-create time. + spec.ExecutionPrompt.Should().Contain("acme/api, acme/web"); + spec.ExecutionPrompt.Should().Contain("one pass per repo"); + } + [Fact] public async Task ExecuteAsync_CreateAgent_RejectsGroupChats() {