From a9f3c56179418677b4f9a283c7bfebcbe66b87cc Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 8 Mar 2026 17:05:49 +0100 Subject: [PATCH 1/4] Integrate agent transcripts into the DAG (Phase C, Steps 1-2) Agent (sidechain) entries are now part of the DAG instead of being partitioned out and reordered heuristically. _integrate_agent_entries() parents agent roots to their anchor tool_result and assigns synthetic session IDs ({sessionId}#agent-{agentId}) so they form separate DAG-lines spliced at the correct junction point. Co-Authored-By: Claude Opus 4.6 --- claude_code_log/converter.py | 76 +++++++-- claude_code_log/dag.py | 9 +- claude_code_log/renderer.py | 97 ++++++----- dev-docs/dag.md | 12 +- test/test_dag_integration.py | 273 +++++++++++++++++++++++++++++- work/phase-c-agent-transcripts.md | 81 +++++++++ 6 files changed, 477 insertions(+), 71 deletions(-) create mode 100644 work/phase-c-agent-transcripts.md diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 119a5cd..8de1cf1 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -411,6 +411,49 @@ def load_transcript( return messages +def _integrate_agent_entries(messages: list[TranscriptEntry]) -> None: + """Parent agent entries and assign synthetic session IDs. + + Agent (sidechain) entries share sessionId with their parent session + but form separate conversation threads. This function: + + 1. Builds a map of agentId -> anchor UUID (the main-session User entry + whose agentId matches, i.e. the tool_result that references the agent) + 2. For each agent's root entry (parentUuid=None, isSidechain=True), + sets parentUuid to the anchor UUID + 3. Assigns a synthetic sessionId ("{sessionId}#agent-{agentId}") to all + agent entries so they form separate DAG-lines + + Mutates entries in place (Pydantic v2 models are mutable by default). + """ + # Build agentId -> anchor UUID map from main-session entries + agent_anchors: dict[str, str] = {} + for msg in messages: + if not isinstance(msg, BaseTranscriptEntry): + continue + if msg.isSidechain: + continue + # Main-session entries with agentId reference an agent transcript + if msg.agentId: + agent_anchors[msg.agentId] = msg.uuid + + if not agent_anchors: + return + + # Process sidechain entries: parent roots and assign synthetic sessionIds + for msg in messages: + if not isinstance(msg, BaseTranscriptEntry): + continue + if not msg.isSidechain or not msg.agentId: + continue + agent_id = msg.agentId + # Assign synthetic session ID to separate from main session + msg.sessionId = f"{msg.sessionId}#agent-{agent_id}" + # Parent the root entry to the anchor + if msg.parentUuid is None and agent_id in agent_anchors: + msg.parentUuid = agent_anchors[agent_id] + + def load_directory_transcripts( directory_path: Path, cache_manager: Optional["CacheManager"] = None, @@ -441,31 +484,28 @@ def load_directory_transcripts( progress_chain = _scan_progress_chains(directory_path) _repair_parent_chains(all_messages, progress_chain) - # Partition: sidechain entries excluded from DAG (Phase C scope) - sidechain_entries = [e for e in all_messages if getattr(e, "isSidechain", False)] - main_entries = [e for e in all_messages if not getattr(e, "isSidechain", False)] + # Parent agent entries and assign synthetic session IDs so they + # form separate DAG-lines spliced at their anchor points. + _integrate_agent_entries(all_messages) - # Collect sidechain UUIDs so DAG build can suppress orphan warnings - # for parents that exist in sidechain data (will be integrated in Phase C) - sidechain_uuids: set[str] = { - e.uuid for e in sidechain_entries if isinstance(e, BaseTranscriptEntry) - } - # Also scan unloaded subagent files (e.g. aprompt_suggestion agents - # that are never referenced via agentId in the main session) - sidechain_uuids |= _scan_sidechain_uuids(directory_path) + # Collect UUIDs from unloaded subagent files (e.g. aprompt_suggestion + # agents never referenced via agentId) to suppress orphan warnings + unloaded_sidechain_uuids = _scan_sidechain_uuids(directory_path) # Build DAG and traverse (entries grouped by session, depth-first) - tree = build_dag_from_entries(main_entries, sidechain_uuids=sidechain_uuids) + tree = build_dag_from_entries( + all_messages, sidechain_uuids=unloaded_sidechain_uuids + ) dag_ordered = traverse_session_tree(tree) # Re-add summaries/queue-ops (excluded from DAG since they lack uuid) non_dag_entries: list[TranscriptEntry] = [ e - for e in main_entries + for e in all_messages if isinstance(e, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)) ] - return dag_ordered + sidechain_entries + non_dag_entries, tree + return dag_ordered + non_dag_entries, tree # ============================================================================= @@ -1598,12 +1638,16 @@ def _generate_individual_session_files( # Pre-compute warmup sessions to exclude them warmup_session_ids = get_warmup_session_ids(messages) - # Find all unique session IDs (excluding warmup sessions) + # Find all unique session IDs (excluding warmup and agent sessions) session_ids: set[str] = set() for message in messages: if hasattr(message, "sessionId"): session_id: str = getattr(message, "sessionId") - if session_id and session_id not in warmup_session_ids: + if ( + session_id + and session_id not in warmup_session_ids + and "#agent-" not in session_id + ): session_ids.add(session_id) # Get session data from cache for better titles diff --git a/claude_code_log/dag.py b/claude_code_log/dag.py index c07edb0..cf90aa4 100644 --- a/claude_code_log/dag.py +++ b/claude_code_log/dag.py @@ -49,6 +49,7 @@ class SessionDAGLine: attachment_uuid: Optional[str] = None # UUID in parent where this attaches is_branch: bool = False # True for within-session fork branches original_session_id: Optional[str] = None # Original session_id before fork split + is_sidechain: bool = False # True for agent transcript sessions @dataclass @@ -138,8 +139,9 @@ def build_dag( """Populate children_uuids on each node. Mutates nodes in place. Warns about orphan nodes (parentUuid points outside loaded data) - and validates acyclicity. Parents known to be in sidechain data - (Phase C scope) are silently promoted to root without warning. + and validates acyclicity. Parents known to be in unloaded sidechain + data (e.g. aprompt_suggestion agents) are silently promoted to root + without warning. """ _sidechain_uuids = sidechain_uuids or set() @@ -639,7 +641,8 @@ def build_dag_from_entries( Convenience function that runs Steps 1-4 in sequence. ``sidechain_uuids`` suppresses orphan warnings for parents known - to be in sidechain data (not yet integrated, Phase C scope). + to be in unloaded sidechain data (e.g. aprompt_suggestion agents + that are never referenced via agentId in the main session). """ nodes = build_message_index(entries) build_dag(nodes, sidechain_uuids=sidechain_uuids) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index c316561..66a2eec 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -851,6 +851,9 @@ def prepare_session_navigation( session_nav: list[dict[str, Any]] = [] for session_id in session_order: + # Skip agent sidechain sessions (they appear inline, not in nav) + if "#agent-" in session_id: + continue session_info = sessions[session_id] # Skip empty sessions (agent-only, no user messages) @@ -2019,6 +2022,14 @@ def _render_messages( for message in messages: message_type = message.type + # Determine if this message belongs to an agent sidechain session. + # Agent messages use the parent session's render_session_id so they + # stay grouped with the main session (no separate session header). + msg_session_id = getattr(message, "sessionId", "") or "" + agent_parent_session: Optional[str] = ( + msg_session_id.split("#agent-")[0] if "#agent-" in msg_session_id else None + ) + # Check if this message starts a new branch (within-session fork) # Must happen before system/summary handling so branch state is # correct when tagging those messages with render_session_id. @@ -2101,8 +2112,9 @@ def _render_messages( system_content = create_system_message(message) if system_content: system_msg = TemplateMessage(system_content) - if current_render_session: - system_msg.render_session_id = current_render_session + effective_session = agent_parent_session or current_render_session + if effective_session: + system_msg.render_session_id = effective_session ctx.register(system_msg) continue @@ -2148,43 +2160,46 @@ def _render_messages( session_summary = sessions.get(session_id, {}).get("summary") # Add session header if this is a new session + # Skip headers for agent sidechain sessions (they appear inline) + is_agent_session = "#agent-" in session_id if session_id not in seen_sessions: seen_sessions.add(session_id) - current_session_summary = session_summary - session_title = ( - f"{current_session_summary} • {session_id[:8]}" - if current_session_summary - else session_id[:8] - ) + if not is_agent_session: + current_session_summary = session_summary + session_title = ( + f"{current_session_summary} • {session_id[:8]}" + if current_session_summary + else session_id[:8] + ) - # Create meta with session_id for the session header - session_header_meta = MessageMeta( - session_id=session_id, - timestamp="", - uuid="", - ) - hier = (session_hierarchy or {}).get(session_id, {}) - parent_sid = hier.get("parent_session_id") - parent_msg_idx = ( - ctx.session_first_message.get(parent_sid) if parent_sid else None - ) - session_header_content = SessionHeaderMessage( - session_header_meta, - title=session_title, - session_id=session_id, - summary=current_session_summary, - parent_session_id=parent_sid, - parent_session_summary=(session_summaries or {}).get(parent_sid) - if parent_sid - else None, - parent_message_index=parent_msg_idx, - depth=hier.get("depth", 0), - attachment_uuid=hier.get("attachment_uuid"), - ) - # Register and track session's first message - session_header = TemplateMessage(session_header_content) - msg_index = ctx.register(session_header) - ctx.session_first_message[session_id] = msg_index + # Create meta with session_id for the session header + session_header_meta = MessageMeta( + session_id=session_id, + timestamp="", + uuid="", + ) + hier = (session_hierarchy or {}).get(session_id, {}) + parent_sid = hier.get("parent_session_id") + parent_msg_idx = ( + ctx.session_first_message.get(parent_sid) if parent_sid else None + ) + session_header_content = SessionHeaderMessage( + session_header_meta, + title=session_title, + session_id=session_id, + summary=current_session_summary, + parent_session_id=parent_sid, + parent_session_summary=(session_summaries or {}).get(parent_sid) + if parent_sid + else None, + parent_message_index=parent_msg_idx, + depth=hier.get("depth", 0), + attachment_uuid=hier.get("attachment_uuid"), + ) + # Register and track session's first message + session_header = TemplateMessage(session_header_content) + msg_index = ctx.register(session_header) + ctx.session_first_message[session_id] = msg_index # Extract token usage for assistant messages # Only show token usage for the first message with each requestId to avoid duplicates @@ -2242,8 +2257,9 @@ def _render_messages( continue chunk_msg = TemplateMessage(content_model) - if current_render_session: - chunk_msg.render_session_id = current_render_session + effective_session = agent_parent_session or current_render_session + if effective_session: + chunk_msg.render_session_id = effective_session ctx.register(chunk_msg) else: @@ -2292,8 +2308,9 @@ def _render_messages( continue tool_msg = TemplateMessage(tool_result.content) - if current_render_session: - tool_msg.render_session_id = current_render_session + effective_session = agent_parent_session or current_render_session + if effective_session: + tool_msg.render_session_id = effective_session ctx.register(tool_msg) return ctx diff --git a/dev-docs/dag.md b/dev-docs/dag.md index 021051d..a4cfec3 100644 --- a/dev-docs/dag.md +++ b/dev-docs/dag.md @@ -350,12 +350,16 @@ validate DAG construction against known transcripts. 4. Generate session headers with navigation links (forward/back) 5. Update session index from flat to hierarchical -### Phase C: Agent Transcript Rework +### Phase C: Agent Transcript Rework (Steps 1-2 done) -1. Implement parenting strategies for each agent type -2. Replace `_reorder_sidechain_template_messages` with DAG-line splicing +1. ~~Implement parenting strategies for each agent type~~ — Done: + `_integrate_agent_entries()` parents agent roots to anchors and assigns + synthetic session IDs (`{sessionId}#agent-{agentId}`) +2. ~~Replace `_reorder_sidechain_template_messages` with DAG-line splicing~~ + — Done: agents are now DAG-ordered; the old function is kept as fallback 3. Simplify or remove `_cleanup_sidechain_duplicates` (dedup now - happens at DAG level) + happens at DAG level) — TODO +4. Agent tool renderer (separate PR, `dev/user-sidechain` branch) — TODO ### Phase D: Async Agent and Teammate Support diff --git a/test/test_dag_integration.py b/test/test_dag_integration.py index cacae7b..92ceb19 100644 --- a/test/test_dag_integration.py +++ b/test/test_dag_integration.py @@ -63,9 +63,10 @@ def _make_assistant_entry( parent_uuid: str | None = None, text: str = "reply", is_sidechain: bool = False, + agent_id: str | None = None, ) -> dict[str, Any]: """Helper to create an assistant transcript entry dict.""" - return { + entry: dict[str, Any] = { "type": "assistant", "timestamp": timestamp, "parentUuid": parent_uuid, @@ -86,6 +87,9 @@ def _make_assistant_entry( "usage": {"input_tokens": 10, "output_tokens": 5}, }, } + if agent_id is not None: + entry["agentId"] = agent_id + return entry # ============================================================================= @@ -123,7 +127,7 @@ def test_load_directory_dag_ordering(self, tmp_path: Path) -> None: assert uuids == ["a", "b", "c", "d", "e", "f", "g", "h"] def test_load_directory_with_sidechains(self, tmp_path: Path) -> None: - """Sidechain entries should be present after DAG-ordered main entries.""" + """Sidechain entries are integrated into DAG at their structural position.""" main_entries = [ _make_user_entry("a", "s1", "2025-07-01T10:00:00.000Z", None, "Start"), _make_assistant_entry("b", "s1", "2025-07-01T10:01:00.000Z", "a"), @@ -144,12 +148,9 @@ def test_load_directory_with_sidechains(self, tmp_path: Path) -> None: result, _ = load_directory_transcripts(tmp_path, silent=True) uuids = [getattr(e, "uuid", None) for e in result] - # Main entries first (DAG ordered), then sidechain - assert "a" in uuids - assert "b" in uuids - assert "sc1" in uuids - # Sidechain should be after main entries - assert uuids.index("sc1") > uuids.index("b") + # Sidechain is now part of the DAG: sc1 is a child of a (tool-result + # side-branch), stitched before the continuation child b + assert uuids == ["a", "sc1", "b"] def test_load_directory_with_summaries(self, tmp_path: Path) -> None: """Summary entries should be preserved in output.""" @@ -727,3 +728,259 @@ def test_within_fork_coverage(self) -> None: # All entries should be covered total_in_daglines = sum(len(dl.uuids) for dl in tree.sessions.values()) assert total_in_daglines == len(tree.nodes) + + +# ============================================================================= +# Test: Agent transcript DAG integration +# ============================================================================= + + +class TestAgentDagIntegration: + """Test that agent (sidechain) transcripts are integrated into the DAG.""" + + def test_agent_entries_parented_to_anchor(self, tmp_path: Path) -> None: + """Agent root entry gets parentUuid pointing to the anchor tool_result.""" + # Main session: user → assistant(tool_use Agent) → user(tool_result, agentId) + main_entries = [ + _make_user_entry("u1", "s1", "2025-07-01T10:00:00.000Z", None, "Start"), + _make_assistant_entry("a1", "s1", "2025-07-01T10:01:00.000Z", "u1"), + # User entry carrying tool_result with agentId reference + _make_user_entry( + "u2", + "s1", + "2025-07-01T10:02:00.000Z", + "a1", + "tool result", + agent_id="agent-abc", + ), + _make_assistant_entry("a2", "s1", "2025-07-01T10:03:00.000Z", "u2"), + ] + # Agent file entries (all sidechain) + agent_entries = [ + _make_user_entry( + "ag1", + "s1", + "2025-07-01T10:01:30.000Z", + None, + "Agent prompt", + is_sidechain=True, + agent_id="agent-abc", + ), + _make_assistant_entry( + "ag2", + "s1", + "2025-07-01T10:01:40.000Z", + "ag1", + "Agent reply", + is_sidechain=True, + agent_id="agent-abc", + ), + ] + + _write_jsonl(tmp_path / "session.jsonl", main_entries + agent_entries) + + result, tree = load_directory_transcripts(tmp_path, silent=True) + uuids = [getattr(e, "uuid", None) for e in result] + + # Agent entries should be in the DAG, placed at the junction point + assert "ag1" in uuids + assert "ag2" in uuids + # Main session entries should be in order + assert uuids.index("u1") < uuids.index("a1") + assert uuids.index("a1") < uuids.index("u2") + assert uuids.index("u2") < uuids.index("a2") + # Agent entries should appear between the anchor (u2) and + # continuation (a2) — the agent DAG-line is a child session + # traversed at the junction point + assert uuids.index("u2") < uuids.index("ag1") + assert uuids.index("ag2") < uuids.index("a2") + + def test_agent_session_in_tree(self, tmp_path: Path) -> None: + """Agent transcript creates a synthetic child session in the tree.""" + main_entries = [ + _make_user_entry("u1", "s1", "2025-07-01T10:00:00.000Z", None, "Start"), + _make_user_entry( + "u2", + "s1", + "2025-07-01T10:02:00.000Z", + "u1", + "tool result", + agent_id="agent-xyz", + ), + ] + agent_entries = [ + _make_user_entry( + "ag1", + "s1", + "2025-07-01T10:01:00.000Z", + None, + "Agent prompt", + is_sidechain=True, + agent_id="agent-xyz", + ), + _make_assistant_entry( + "ag2", + "s1", + "2025-07-01T10:01:10.000Z", + "ag1", + "Agent reply", + is_sidechain=True, + agent_id="agent-xyz", + ), + ] + + _write_jsonl(tmp_path / "session.jsonl", main_entries + agent_entries) + + _, tree = load_directory_transcripts(tmp_path, silent=True) + + # Should have synthetic agent session + agent_sids = [sid for sid in tree.sessions if "#agent-" in sid] + assert len(agent_sids) == 1 + assert agent_sids[0] == "s1#agent-agent-xyz" + + # Agent session should be a child of the main session + agent_dag_line = tree.sessions[agent_sids[0]] + assert agent_dag_line.parent_session_id == "s1" + assert agent_dag_line.attachment_uuid == "u2" + + # Main session should be a root + assert "s1" in tree.roots + assert agent_sids[0] not in tree.roots + + def test_agent_no_session_header(self, tmp_path: Path) -> None: + """Agent sessions don't generate session headers in rendering.""" + from claude_code_log.renderer import generate_template_messages + from claude_code_log.models import SessionHeaderMessage + + main_entries = [ + _make_user_entry("u1", "s1", "2025-07-01T10:00:00.000Z", None, "Start"), + _make_user_entry( + "u2", + "s1", + "2025-07-01T10:02:00.000Z", + "u1", + "tool result", + agent_id="agent-xyz", + ), + ] + agent_entries = [ + _make_user_entry( + "ag1", + "s1", + "2025-07-01T10:01:00.000Z", + None, + "Agent prompt", + is_sidechain=True, + agent_id="agent-xyz", + ), + _make_assistant_entry( + "ag2", + "s1", + "2025-07-01T10:01:10.000Z", + "ag1", + "Agent reply", + is_sidechain=True, + agent_id="agent-xyz", + ), + ] + + _write_jsonl(tmp_path / "session.jsonl", main_entries + agent_entries) + + messages, session_tree = load_directory_transcripts(tmp_path, silent=True) + root_messages, session_nav, context = generate_template_messages( + messages, session_tree=session_tree + ) + + # Only one session header (for the main session), not for the agent + headers = [ + m for m in context.messages if isinstance(m.content, SessionHeaderMessage) + ] + assert len(headers) == 1 + header_content = headers[0].content + assert isinstance(header_content, SessionHeaderMessage) + assert header_content.session_id == "s1" + + def test_multiple_agents_ordered(self, tmp_path: Path) -> None: + """Multiple agents are each placed at their respective anchor points.""" + main_entries = [ + _make_user_entry("u1", "s1", "2025-07-01T10:00:00.000Z", None, "Start"), + _make_assistant_entry("a1", "s1", "2025-07-01T10:01:00.000Z", "u1"), + # First agent anchor + _make_user_entry( + "u2", + "s1", + "2025-07-01T10:02:00.000Z", + "a1", + "result1", + agent_id="agent-1", + ), + _make_assistant_entry("a2", "s1", "2025-07-01T10:03:00.000Z", "u2"), + # Second agent anchor + _make_user_entry( + "u3", + "s1", + "2025-07-01T10:04:00.000Z", + "a2", + "result2", + agent_id="agent-2", + ), + _make_assistant_entry("a3", "s1", "2025-07-01T10:05:00.000Z", "u3"), + ] + agent1_entries = [ + _make_user_entry( + "ag1-1", + "s1", + "2025-07-01T10:01:30.000Z", + None, + "Agent1 prompt", + is_sidechain=True, + agent_id="agent-1", + ), + _make_assistant_entry( + "ag1-2", + "s1", + "2025-07-01T10:01:40.000Z", + "ag1-1", + "Agent1 reply", + is_sidechain=True, + agent_id="agent-1", + ), + ] + agent2_entries = [ + _make_user_entry( + "ag2-1", + "s1", + "2025-07-01T10:03:30.000Z", + None, + "Agent2 prompt", + is_sidechain=True, + agent_id="agent-2", + ), + _make_assistant_entry( + "ag2-2", + "s1", + "2025-07-01T10:03:40.000Z", + "ag2-1", + "Agent2 reply", + is_sidechain=True, + agent_id="agent-2", + ), + ] + + _write_jsonl( + tmp_path / "session.jsonl", + main_entries + agent1_entries + agent2_entries, + ) + + result, tree = load_directory_transcripts(tmp_path, silent=True) + uuids = [getattr(e, "uuid", None) for e in result] + + # Each agent should appear after its anchor and before the next main entry + assert uuids.index("ag1-1") > uuids.index("u2") + assert uuids.index("ag1-2") < uuids.index("a2") + assert uuids.index("ag2-1") > uuids.index("u3") + assert uuids.index("ag2-2") < uuids.index("a3") + + # Two synthetic agent sessions + agent_sids = [sid for sid in tree.sessions if "#agent-" in sid] + assert len(agent_sids) == 2 diff --git a/work/phase-c-agent-transcripts.md b/work/phase-c-agent-transcripts.md new file mode 100644 index 0000000..f5c5ced --- /dev/null +++ b/work/phase-c-agent-transcripts.md @@ -0,0 +1,81 @@ +# Phase C: Agent Transcript Rework + +## Status: Steps 1-2 Complete (DAG Integration) + +## What Changed + +### Step 1: Agent Data Shapes (Analysis Complete) + +Key findings from real data analysis: + +- **Agent entries share `sessionId`** with their parent session +- All agent entries have `isSidechain: true` and `agentId` +- First entry always has `parentUuid: null` (top-level agents) +- Internal `parentUuid` chains form the same fork patterns as main sessions + (tool-result side-branches) +- `agentId` reference in main session: either entry-level `agentId` (old Task + tool) or `toolUseResult.agentId` (new Agent tool, copied to entry level by + converter.py parsing code) + +### Step 2: DAG-Level Agent Integration (Implemented) + +**`converter.py` — `_integrate_agent_entries()`**: +1. Builds `agentId -> anchor_uuid` map from main-session entries with `agentId` +2. For each sidechain entry: assigns synthetic `sessionId` + (`{sessionId}#agent-{agentId}`) so agents form separate DAG-lines +3. Parents root entries (`parentUuid=None`) to the anchor UUID + +**Effect**: Agent entries are included in the DAG. The existing DAG machinery +(build_dag, extract_session_dag_lines, build_session_tree, traverse_session_tree) +handles them as child sessions of the main session, spliced at the anchor point. + +**Key constraint**: `entry.sessionId` on disk / in cache is NEVER mutated. +The synthetic ID is only assigned in-memory during `load_directory_transcripts()`. + +### Renderer Changes + +- Agent sessions (`#agent-` in session_id) **don't get session headers** +- Agent messages use parent session's `render_session_id` for correct grouping + in `_reorder_session_template_messages()` +- Agent sessions excluded from session navigation and individual file generation + +### What Was Kept (Not Removed Yet) + +- `_reorder_sidechain_template_messages()` — now a no-op for properly + integrated agents (they're already in DAG order), but kept as fallback + for any edge cases with old-style data +- `_cleanup_sidechain_duplicates()` — still needed for Task tool dedup + (first user message = Task input, last assistant = Task output) +- `sidechain_uuids` parameter in `build_dag()` — still needed for unloaded + subagent files (e.g. aprompt_suggestion agents never referenced via agentId) + +## Remaining Steps + +### Step 3: Session Tree Integration (Partially Done) + +Agent DAG-lines already appear as child sessions in the tree. The +`traverse_session_tree()` naturally visits them at the junction point. +What's left: +- Verify rendering hierarchy (levels 4/5) works correctly for all cases +- Test with projects that have nested agents (agent spawning sub-agents) + +### Step 4: Rendering Cleanup + +Once confident in DAG ordering: +- Remove `_reorder_sidechain_template_messages()` (currently a no-op for + integrated agents) +- Simplify `_cleanup_sidechain_duplicates()` — dedup may be handleable at + DAG level + +### Step 5: Agent Tool Renderer (separate PR, `dev/user-sidechain`) + +- Specialized rendering for Agent tool_use/tool_result (like old Task tool had) +- Sidechain user messages rendered as markdown (already on `dev/user-sidechain`) + +## Test Coverage + +4 new integration tests in `TestAgentDagIntegration`: +- `test_agent_entries_parented_to_anchor` — agent root gets parentUuid to anchor +- `test_agent_session_in_tree` — synthetic session created, tree structure correct +- `test_agent_no_session_header` — no session header generated for agents +- `test_multiple_agents_ordered` — multiple agents placed at respective anchors From 853fab0e7098b76cfb12953b4e1a53020639133c Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 8 Mar 2026 18:20:56 +0100 Subject: [PATCH 2/4] Add branch-scoped agent test and fix render_session_id lookup Use session hierarchy to determine the correct parent session for agent messages (may be a branch pseudo-session when the anchor is inside a within-session fork). Fallback to extracting from synthetic ID. Co-Authored-By: Claude Opus 4.6 --- claude_code_log/renderer.py | 15 +++++-- test/test_dag_integration.py | 86 ++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 4 deletions(-) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 66a2eec..8f26e71 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -2024,11 +2024,18 @@ def _render_messages( # Determine if this message belongs to an agent sidechain session. # Agent messages use the parent session's render_session_id so they - # stay grouped with the main session (no separate session header). + # stay grouped with the correct session (trunk or branch). msg_session_id = getattr(message, "sessionId", "") or "" - agent_parent_session: Optional[str] = ( - msg_session_id.split("#agent-")[0] if "#agent-" in msg_session_id else None - ) + agent_parent_session: Optional[str] = None + if "#agent-" in msg_session_id: + # Use session hierarchy to find the actual parent (may be a branch + # pseudo-session if the anchor is inside a within-session fork) + if session_hierarchy: + hier = session_hierarchy.get(msg_session_id, {}) + agent_parent_session = hier.get("parent_session_id") + if not agent_parent_session: + # Fallback: extract original session from synthetic ID + agent_parent_session = msg_session_id.split("#agent-")[0] # Check if this message starts a new branch (within-session fork) # Must happen before system/summary handling so branch state is diff --git a/test/test_dag_integration.py b/test/test_dag_integration.py index 92ceb19..4f9bb8e 100644 --- a/test/test_dag_integration.py +++ b/test/test_dag_integration.py @@ -984,3 +984,89 @@ def test_multiple_agents_ordered(self, tmp_path: Path) -> None: # Two synthetic agent sessions agent_sids = [sid for sid in tree.sessions if "#agent-" in sid] assert len(agent_sids) == 2 + + def test_agent_in_branch(self, tmp_path: Path) -> None: + """Agent anchored inside a within-session fork attaches to the branch.""" + from claude_code_log.renderer import generate_template_messages + + # Trunk: u1 → a1 (fork point) + # Branch 1: b1_u (rewind from a1, different timestamp) → b1_a + # with agent anchored at b1_u + # Branch 2: b2_u (rewind from a1, different timestamp) + main_entries = [ + _make_user_entry("u1", "s1", "2025-07-01T10:00:00.000Z", None, "Start"), + _make_assistant_entry("a1", "s1", "2025-07-01T10:01:00.000Z", "u1"), + # Branch 1: user rewind from a1 + _make_user_entry( + "b1_u", + "s1", + "2025-07-01T10:02:00.000Z", + "a1", + "Branch 1", + agent_id="agent-b1", + ), + _make_assistant_entry( + "b1_a", + "s1", + "2025-07-01T10:03:00.000Z", + "b1_u", + ), + # Branch 2: user rewind from a1 (different timestamp = real fork) + _make_user_entry( + "b2_u", + "s1", + "2025-07-01T10:04:00.000Z", + "a1", + "Branch 2", + ), + ] + agent_entries = [ + _make_user_entry( + "ag1", + "s1", + "2025-07-01T10:02:30.000Z", + None, + "Agent in branch", + is_sidechain=True, + agent_id="agent-b1", + ), + _make_assistant_entry( + "ag2", + "s1", + "2025-07-01T10:02:40.000Z", + "ag1", + "Agent reply", + is_sidechain=True, + agent_id="agent-b1", + ), + ] + + _write_jsonl(tmp_path / "session.jsonl", main_entries + agent_entries) + + result, tree = load_directory_transcripts(tmp_path, silent=True) + + # Agent session's parent should be the branch pseudo-session, not trunk + agent_sids = [sid for sid in tree.sessions if "#agent-" in sid] + assert len(agent_sids) == 1 + agent_dl = tree.sessions[agent_sids[0]] + # The branch pseudo-session has format "s1@{child_uuid[:12]}" + assert agent_dl.parent_session_id is not None + assert "@" in agent_dl.parent_session_id, ( + f"Agent should be child of branch, got parent={agent_dl.parent_session_id}" + ) + assert agent_dl.attachment_uuid == "b1_u" + + # End-to-end rendering: agent messages should appear in the branch, + # not get regrouped under the trunk + messages, session_tree = load_directory_transcripts(tmp_path, silent=True) + root_messages, session_nav, context = generate_template_messages( + messages, session_tree=session_tree + ) + + # Verify message ordering: agent messages should be in the branch + # block (after b1_u anchor, before branch 2's b2_u) + msg_uuids = {m.meta.uuid: m.message_index for m in context.messages} + assert "ag1" in msg_uuids + assert "b1_u" in msg_uuids + assert "b2_u" in msg_uuids + assert msg_uuids["b1_u"] < msg_uuids["ag1"] < msg_uuids["b2_u"] # type: ignore[operator] From 000817f0b8c3a56937584ee7a24eb17d5e6be8d1 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 8 Mar 2026 18:50:49 +0100 Subject: [PATCH 3/4] Guard pagination and cache paths against synthetic agent session IDs Extract `is_agent_session()` helper to `utils.py` and use it to filter agent sessions from: session metadata building, cache population, pagination session collection, and message grouping for pages. Agent messages in pagination are grouped under their parent session. Co-Authored-By: Claude Opus 4.6 --- claude_code_log/converter.py | 27 ++++++++++++++++++++------- claude_code_log/renderer.py | 9 +++++---- claude_code_log/utils.py | 9 +++++++++ 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 8de1cf1..019a835 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -16,6 +16,7 @@ from .utils import ( format_timestamp_range, get_project_display_name, + is_agent_session, should_use_as_session_starter, create_session_preview, get_warmup_session_ids, @@ -790,6 +791,8 @@ def _build_session_data_from_messages( session_id = getattr(message, "sessionId", "") if not session_id or session_id in warmup_session_ids: continue + if is_agent_session(session_id): + continue if session_id not in sessions: sessions[session_id] = { @@ -917,14 +920,20 @@ def _generate_paginated_html( if orphan_path.exists(): orphan_path.unlink() - # Group messages by session for fast lookup + # Group messages by session for fast lookup (agent messages grouped + # under their parent session since they don't have their own pages) messages_by_session: Dict[str, List[TranscriptEntry]] = {} for msg in messages: session_id = getattr(msg, "sessionId", None) if session_id: - if session_id not in messages_by_session: - messages_by_session[session_id] = [] - messages_by_session[session_id].append(msg) + key = ( + session_id.split("#agent-")[0] + if is_agent_session(session_id) + else session_id + ) + if key not in messages_by_session: + messages_by_session[key] = [] + messages_by_session[key].append(msg) first_page_path = output_dir / _get_page_html_path(1) @@ -1225,7 +1234,11 @@ def convert_jsonl_to( current_session_ids: set[str] = set() for message in messages: session_id = getattr(message, "sessionId", "") - if session_id and session_id not in warmup_session_ids: + if ( + session_id + and session_id not in warmup_session_ids + and not is_agent_session(session_id) + ): current_session_ids.add(session_id) session_data = { session_id: session_cache @@ -1420,7 +1433,7 @@ def _update_cache_with_session_data( message, SummaryTranscriptEntry ): session_id = getattr(message, "sessionId", "") - if not session_id: + if not session_id or is_agent_session(session_id): continue if session_id not in sessions_cache_data: @@ -1646,7 +1659,7 @@ def _generate_individual_session_files( if ( session_id and session_id not in warmup_session_ids - and "#agent-" not in session_id + and not is_agent_session(session_id) ): session_ids.add(session_id) diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 8f26e71..1d080e0 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -67,6 +67,7 @@ format_timestamp, format_timestamp_range, get_project_display_name, + is_agent_session, should_skip_message, should_use_as_session_starter, create_session_preview, @@ -852,7 +853,7 @@ def prepare_session_navigation( for session_id in session_order: # Skip agent sidechain sessions (they appear inline, not in nav) - if "#agent-" in session_id: + if is_agent_session(session_id): continue session_info = sessions[session_id] @@ -2027,7 +2028,7 @@ def _render_messages( # stay grouped with the correct session (trunk or branch). msg_session_id = getattr(message, "sessionId", "") or "" agent_parent_session: Optional[str] = None - if "#agent-" in msg_session_id: + if is_agent_session(msg_session_id): # Use session hierarchy to find the actual parent (may be a branch # pseudo-session if the anchor is inside a within-session fork) if session_hierarchy: @@ -2168,10 +2169,10 @@ def _render_messages( # Add session header if this is a new session # Skip headers for agent sidechain sessions (they appear inline) - is_agent_session = "#agent-" in session_id + is_agent = is_agent_session(session_id) if session_id not in seen_sessions: seen_sessions.add(session_id) - if not is_agent_session: + if not is_agent: current_session_summary = session_summary session_title = ( f"{current_session_summary} • {session_id[:8]}" diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index 82de8b2..def5464 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -288,6 +288,15 @@ def _extract_file_path(content: str) -> str | None: return text_content +def is_agent_session(session_id: str) -> bool: + """Check if a session ID is a synthetic agent session. + + Agent sessions use the format ``{sessionId}#agent-{agentId}``, + assigned by ``_integrate_agent_entries()`` during DAG construction. + """ + return "#agent-" in session_id + + def get_warmup_session_ids(messages: list[TranscriptEntry]) -> set[str]: """Get set of session IDs that are warmup-only sessions. From 61b7ff1a53cac8295c3d20e549872c955b8b1e4d Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Sun, 8 Mar 2026 20:24:07 +0100 Subject: [PATCH 4/4] Coalesce agent messages into parent session aggregates Instead of dropping agent messages from session metadata, remap their synthetic session IDs to the parent session using get_parent_session_id(). This ensures message counts, token totals, and timestamps in pagination and cache include agent contributions. Add regression test verifying agent messages fold into parent session's message_count and token totals via _build_session_data_from_messages(). Co-Authored-By: Claude Opus 4.6 --- claude_code_log/converter.py | 17 ++++----- claude_code_log/renderer.py | 3 +- claude_code_log/utils.py | 9 +++++ test/test_dag_integration.py | 67 ++++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 12 deletions(-) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 019a835..e3fa788 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -15,6 +15,7 @@ from .utils import ( format_timestamp_range, + get_parent_session_id, get_project_display_name, is_agent_session, should_use_as_session_starter, @@ -788,11 +789,9 @@ def _build_session_data_from_messages( ): continue - session_id = getattr(message, "sessionId", "") + session_id = get_parent_session_id(getattr(message, "sessionId", "")) if not session_id or session_id in warmup_session_ids: continue - if is_agent_session(session_id): - continue if session_id not in sessions: sessions[session_id] = { @@ -926,11 +925,7 @@ def _generate_paginated_html( for msg in messages: session_id = getattr(msg, "sessionId", None) if session_id: - key = ( - session_id.split("#agent-")[0] - if is_agent_session(session_id) - else session_id - ) + key = get_parent_session_id(session_id) if key not in messages_by_session: messages_by_session[key] = [] messages_by_session[key].append(msg) @@ -1432,8 +1427,8 @@ def _update_cache_with_session_data( if hasattr(message, "sessionId") and not isinstance( message, SummaryTranscriptEntry ): - session_id = getattr(message, "sessionId", "") - if not session_id or is_agent_session(session_id): + session_id = get_parent_session_id(getattr(message, "sessionId", "")) + if not session_id: continue if session_id not in sessions_cache_data: @@ -1469,7 +1464,7 @@ def _update_cache_with_session_data( if message.type == "assistant" and hasattr(message, "message"): assistant_message = getattr(message, "message") request_id = getattr(message, "requestId", None) - session_id = getattr(message, "sessionId", "") + session_id = get_parent_session_id(getattr(message, "sessionId", "")) if ( hasattr(assistant_message, "usage") diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 1d080e0..a7377c0 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -66,6 +66,7 @@ from .utils import ( format_timestamp, format_timestamp_range, + get_parent_session_id, get_project_display_name, is_agent_session, should_skip_message, @@ -2036,7 +2037,7 @@ def _render_messages( agent_parent_session = hier.get("parent_session_id") if not agent_parent_session: # Fallback: extract original session from synthetic ID - agent_parent_session = msg_session_id.split("#agent-")[0] + agent_parent_session = get_parent_session_id(msg_session_id) # Check if this message starts a new branch (within-session fork) # Must happen before system/summary handling so branch state is diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index def5464..11a97a6 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -297,6 +297,15 @@ def is_agent_session(session_id: str) -> bool: return "#agent-" in session_id +def get_parent_session_id(session_id: str) -> str: + """Return the parent session ID for an agent session, or the ID itself. + + For ``{sessionId}#agent-{agentId}`` returns ``{sessionId}``. + For non-agent sessions returns the input unchanged. + """ + return session_id.split("#agent-")[0] if "#agent-" in session_id else session_id + + def get_warmup_session_ids(messages: list[TranscriptEntry]) -> set[str]: """Get set of session IDs that are warmup-only sessions. diff --git a/test/test_dag_integration.py b/test/test_dag_integration.py index 4f9bb8e..9973b24 100644 --- a/test/test_dag_integration.py +++ b/test/test_dag_integration.py @@ -12,6 +12,7 @@ from claude_code_log.converter import ( load_directory_transcripts, + _build_session_data_from_messages, _scan_progress_chains, _repair_parent_chains, ) @@ -1070,3 +1071,69 @@ def test_agent_in_branch(self, tmp_path: Path) -> None: assert "b1_u" in msg_uuids assert "b2_u" in msg_uuids assert msg_uuids["b1_u"] < msg_uuids["ag1"] < msg_uuids["b2_u"] # type: ignore[operator] + + def test_agent_messages_coalesced_into_parent_session(self, tmp_path: Path) -> None: + """Agent message counts and tokens fold into parent session aggregates. + + Regression test: agent messages must not be dropped from session + metadata used for pagination and cache. They should be counted under + the parent session. + """ + # Main session: user → assistant(anchor, agentId) → user(next) + main_entries = [ + _make_user_entry("u1", "s1", "2025-01-01T00:00:00Z", text="Hello"), + _make_assistant_entry( + "a1", + "s1", + "2025-01-01T00:00:01Z", + parent_uuid="u1", + agent_id="ag1", + ), + _make_user_entry( + "u2", + "s1", + "2025-01-01T00:00:05Z", + parent_uuid="a1", + text="Continue", + ), + ] + # Agent sidechain: 2 entries (user + assistant) + agent_entries = [ + _make_user_entry( + "ag_u1", + "s1", + "2025-01-01T00:00:02Z", + is_sidechain=True, + agent_id="ag1", + text="agent task", + ), + _make_assistant_entry( + "ag_a1", + "s1", + "2025-01-01T00:00:03Z", + parent_uuid="ag_u1", + is_sidechain=True, + agent_id="ag1", + ), + ] + + _write_jsonl(tmp_path / "session.jsonl", main_entries + agent_entries) + + messages, _tree = load_directory_transcripts(tmp_path, silent=True) + + # Build session data (used for pagination page assignment) + session_data = _build_session_data_from_messages(messages) + + # Only the parent session should exist — no agent-synthetic session + assert "s1" in session_data + assert not any("#agent-" in sid for sid in session_data) + + s1 = session_data["s1"] + # message_count should include both main (3) and agent (2) entries + assert s1.message_count == 5 + + # Token totals should include agent assistant entry (10 input, 5 output) + # Main has 1 assistant (a1: 10 input, 5 output) + # Agent has 1 assistant (ag_a1: 10 input, 5 output) + assert s1.total_input_tokens == 20 + assert s1.total_output_tokens == 10