Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 76 additions & 24 deletions claude_code_log/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

from .utils import (
format_timestamp_range,
get_parent_session_id,
get_project_display_name,
is_agent_session,
should_use_as_session_starter,
create_session_preview,
get_warmup_session_ids,
Expand Down Expand Up @@ -411,6 +413,49 @@ def load_transcript(
return messages


def _integrate_agent_entries(messages: list[TranscriptEntry]) -> None:
"""Parent agent entries and assign synthetic session IDs.

Agent (sidechain) entries share sessionId with their parent session
but form separate conversation threads. This function:

1. Builds a map of agentId -> anchor UUID (the main-session User entry
whose agentId matches, i.e. the tool_result that references the agent)
2. For each agent's root entry (parentUuid=None, isSidechain=True),
sets parentUuid to the anchor UUID
3. Assigns a synthetic sessionId ("{sessionId}#agent-{agentId}") to all
agent entries so they form separate DAG-lines

Mutates entries in place (Pydantic v2 models are mutable by default).
"""
# Build agentId -> anchor UUID map from main-session entries
agent_anchors: dict[str, str] = {}
for msg in messages:
if not isinstance(msg, BaseTranscriptEntry):
continue
if msg.isSidechain:
continue
# Main-session entries with agentId reference an agent transcript
if msg.agentId:
agent_anchors[msg.agentId] = msg.uuid

if not agent_anchors:
return

# Process sidechain entries: parent roots and assign synthetic sessionIds
for msg in messages:
if not isinstance(msg, BaseTranscriptEntry):
continue
if not msg.isSidechain or not msg.agentId:
continue
agent_id = msg.agentId
# Assign synthetic session ID to separate from main session
msg.sessionId = f"{msg.sessionId}#agent-{agent_id}"
# Parent the root entry to the anchor
if msg.parentUuid is None and agent_id in agent_anchors:
msg.parentUuid = agent_anchors[agent_id]
Comment on lines +416 to +456
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Nested agent transcripts still won't get anchored.

load_transcript() already recurses into agent files, but agent_anchors is built only from non-sidechain entries. If an agent launches another agent, that inner anchor lives in a sidechain entry, so the nested root keeps parentUuid=None and becomes a separate root instead of attaching under the outer agent branch.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@claude_code_log/converter.py` around lines 416 - 456, The agent anchoring
logic in _integrate_agent_entries only collects anchors from non-sidechain
entries so nested agents (whose anchor is in a sidechain) remain unanchored;
update the agent_anchors build to include any entry with an agentId (i.e., check
msg.agentId regardless of msg.isSidechain) and when multiple candidates exist
prefer a non-sidechain anchor over a sidechain one (so preserve an existing
non-sidechain anchor and only set/override when the current candidate is a
better choice), keeping references to BaseTranscriptEntry, agentId, isSidechain,
agent_anchors, parentUuid, and sessionId to locate the change.



def load_directory_transcripts(
directory_path: Path,
cache_manager: Optional["CacheManager"] = None,
Expand Down Expand Up @@ -441,31 +486,28 @@ def load_directory_transcripts(
progress_chain = _scan_progress_chains(directory_path)
_repair_parent_chains(all_messages, progress_chain)

# Partition: sidechain entries excluded from DAG (Phase C scope)
sidechain_entries = [e for e in all_messages if getattr(e, "isSidechain", False)]
main_entries = [e for e in all_messages if not getattr(e, "isSidechain", False)]
# Parent agent entries and assign synthetic session IDs so they
# form separate DAG-lines spliced at their anchor points.
_integrate_agent_entries(all_messages)

# Collect sidechain UUIDs so DAG build can suppress orphan warnings
# for parents that exist in sidechain data (will be integrated in Phase C)
sidechain_uuids: set[str] = {
e.uuid for e in sidechain_entries if isinstance(e, BaseTranscriptEntry)
}
# Also scan unloaded subagent files (e.g. aprompt_suggestion agents
# that are never referenced via agentId in the main session)
sidechain_uuids |= _scan_sidechain_uuids(directory_path)
# Collect UUIDs from unloaded subagent files (e.g. aprompt_suggestion
# agents never referenced via agentId) to suppress orphan warnings
unloaded_sidechain_uuids = _scan_sidechain_uuids(directory_path)

# Build DAG and traverse (entries grouped by session, depth-first)
tree = build_dag_from_entries(main_entries, sidechain_uuids=sidechain_uuids)
tree = build_dag_from_entries(
all_messages, sidechain_uuids=unloaded_sidechain_uuids
)
dag_ordered = traverse_session_tree(tree)

# Re-add summaries/queue-ops (excluded from DAG since they lack uuid)
non_dag_entries: list[TranscriptEntry] = [
e
for e in main_entries
for e in all_messages
if isinstance(e, (SummaryTranscriptEntry, QueueOperationTranscriptEntry))
]

return dag_ordered + sidechain_entries + non_dag_entries, tree
return dag_ordered + non_dag_entries, tree


# =============================================================================
Expand Down Expand Up @@ -747,7 +789,7 @@ def _build_session_data_from_messages(
):
continue

session_id = getattr(message, "sessionId", "")
session_id = get_parent_session_id(getattr(message, "sessionId", ""))
if not session_id or session_id in warmup_session_ids:
continue

Expand Down Expand Up @@ -877,14 +919,16 @@ def _generate_paginated_html(
if orphan_path.exists():
orphan_path.unlink()

# Group messages by session for fast lookup
# Group messages by session for fast lookup (agent messages grouped
# under their parent session since they don't have their own pages)
messages_by_session: Dict[str, List[TranscriptEntry]] = {}
for msg in messages:
session_id = getattr(msg, "sessionId", None)
if session_id:
if session_id not in messages_by_session:
messages_by_session[session_id] = []
messages_by_session[session_id].append(msg)
key = get_parent_session_id(session_id)
if key not in messages_by_session:
messages_by_session[key] = []
messages_by_session[key].append(msg)

first_page_path = output_dir / _get_page_html_path(1)

Expand Down Expand Up @@ -1185,7 +1229,11 @@ def convert_jsonl_to(
current_session_ids: set[str] = set()
for message in messages:
session_id = getattr(message, "sessionId", "")
if session_id and session_id not in warmup_session_ids:
if (
session_id
and session_id not in warmup_session_ids
and not is_agent_session(session_id)
):
current_session_ids.add(session_id)
session_data = {
session_id: session_cache
Expand Down Expand Up @@ -1379,7 +1427,7 @@ def _update_cache_with_session_data(
if hasattr(message, "sessionId") and not isinstance(
message, SummaryTranscriptEntry
):
session_id = getattr(message, "sessionId", "")
session_id = get_parent_session_id(getattr(message, "sessionId", ""))
if not session_id:
continue

Expand Down Expand Up @@ -1416,7 +1464,7 @@ def _update_cache_with_session_data(
if message.type == "assistant" and hasattr(message, "message"):
assistant_message = getattr(message, "message")
request_id = getattr(message, "requestId", None)
session_id = getattr(message, "sessionId", "")
session_id = get_parent_session_id(getattr(message, "sessionId", ""))

if (
hasattr(assistant_message, "usage")
Expand Down Expand Up @@ -1598,12 +1646,16 @@ def _generate_individual_session_files(
# Pre-compute warmup sessions to exclude them
warmup_session_ids = get_warmup_session_ids(messages)

# Find all unique session IDs (excluding warmup sessions)
# Find all unique session IDs (excluding warmup and agent sessions)
session_ids: set[str] = set()
for message in messages:
if hasattr(message, "sessionId"):
session_id: str = getattr(message, "sessionId")
if session_id and session_id not in warmup_session_ids:
if (
session_id
and session_id not in warmup_session_ids
and not is_agent_session(session_id)
):
Comment on lines +1649 to +1658
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

The no-cache project-summary fallback still leaks synthetic agent sessions.

Filtering agent sessions out here fixes individual session files, but process_projects_hierarchy() falls back to _collect_project_sessions() when cached project data is missing, and that helper still groups by raw sessionId. In that path, #agent-... pseudo-sessions will still show up as standalone project-index sessions, which contradicts the inline-only behavior.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@claude_code_log/converter.py` around lines 1649 - 1658, The fallback path
still groups by raw sessionId and thus leaks synthetic agent sessions; update
_collect_project_sessions() (or the process_projects_hierarchy() fallback call)
to exclude any session_id where is_agent_session(session_id) is true and to skip
warmup_session_ids, mirroring the filtering used earlier (session_id not in
warmup_session_ids and not is_agent_session(session_id)); ensure the grouping
step uses the filtered session_ids variable so pseudo-sessions like "#agent-..."
are never emitted as standalone project-index sessions.

session_ids.add(session_id)

# Get session data from cache for better titles
Expand Down
9 changes: 6 additions & 3 deletions claude_code_log/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class SessionDAGLine:
attachment_uuid: Optional[str] = None # UUID in parent where this attaches
is_branch: bool = False # True for within-session fork branches
original_session_id: Optional[str] = None # Original session_id before fork split
is_sidechain: bool = False # True for agent transcript sessions


@dataclass
Expand Down Expand Up @@ -138,8 +139,9 @@ def build_dag(
"""Populate children_uuids on each node. Mutates nodes in place.

Warns about orphan nodes (parentUuid points outside loaded data)
and validates acyclicity. Parents known to be in sidechain data
(Phase C scope) are silently promoted to root without warning.
and validates acyclicity. Parents known to be in unloaded sidechain
data (e.g. aprompt_suggestion agents) are silently promoted to root
without warning.
"""
_sidechain_uuids = sidechain_uuids or set()

Expand Down Expand Up @@ -639,7 +641,8 @@ def build_dag_from_entries(

Convenience function that runs Steps 1-4 in sequence.
``sidechain_uuids`` suppresses orphan warnings for parents known
to be in sidechain data (not yet integrated, Phase C scope).
to be in unloaded sidechain data (e.g. aprompt_suggestion agents
that are never referenced via agentId in the main session).
"""
nodes = build_message_index(entries)
build_dag(nodes, sidechain_uuids=sidechain_uuids)
Expand Down
106 changes: 66 additions & 40 deletions claude_code_log/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@
from .utils import (
format_timestamp,
format_timestamp_range,
get_parent_session_id,
get_project_display_name,
is_agent_session,
should_skip_message,
should_use_as_session_starter,
create_session_preview,
Expand Down Expand Up @@ -851,6 +853,9 @@ def prepare_session_navigation(
session_nav: list[dict[str, Any]] = []

for session_id in session_order:
# Skip agent sidechain sessions (they appear inline, not in nav)
if is_agent_session(session_id):
continue
session_info = sessions[session_id]

# Skip empty sessions (agent-only, no user messages)
Expand Down Expand Up @@ -2019,6 +2024,21 @@ def _render_messages(
for message in messages:
message_type = message.type

# Determine if this message belongs to an agent sidechain session.
# Agent messages use the parent session's render_session_id so they
# stay grouped with the correct session (trunk or branch).
msg_session_id = getattr(message, "sessionId", "") or ""
agent_parent_session: Optional[str] = None
if is_agent_session(msg_session_id):
# Use session hierarchy to find the actual parent (may be a branch
# pseudo-session if the anchor is inside a within-session fork)
if session_hierarchy:
hier = session_hierarchy.get(msg_session_id, {})
agent_parent_session = hier.get("parent_session_id")
if not agent_parent_session:
# Fallback: extract original session from synthetic ID
agent_parent_session = get_parent_session_id(msg_session_id)

# Check if this message starts a new branch (within-session fork)
# Must happen before system/summary handling so branch state is
# correct when tagging those messages with render_session_id.
Expand Down Expand Up @@ -2101,8 +2121,9 @@ def _render_messages(
system_content = create_system_message(message)
if system_content:
system_msg = TemplateMessage(system_content)
if current_render_session:
system_msg.render_session_id = current_render_session
effective_session = agent_parent_session or current_render_session
if effective_session:
system_msg.render_session_id = effective_session
ctx.register(system_msg)
continue

Expand Down Expand Up @@ -2148,43 +2169,46 @@ def _render_messages(
session_summary = sessions.get(session_id, {}).get("summary")

# Add session header if this is a new session
# Skip headers for agent sidechain sessions (they appear inline)
is_agent = is_agent_session(session_id)
if session_id not in seen_sessions:
seen_sessions.add(session_id)
current_session_summary = session_summary
session_title = (
f"{current_session_summary} • {session_id[:8]}"
if current_session_summary
else session_id[:8]
)
if not is_agent:
current_session_summary = session_summary
session_title = (
f"{current_session_summary} • {session_id[:8]}"
if current_session_summary
else session_id[:8]
)

# Create meta with session_id for the session header
session_header_meta = MessageMeta(
session_id=session_id,
timestamp="",
uuid="",
)
hier = (session_hierarchy or {}).get(session_id, {})
parent_sid = hier.get("parent_session_id")
parent_msg_idx = (
ctx.session_first_message.get(parent_sid) if parent_sid else None
)
session_header_content = SessionHeaderMessage(
session_header_meta,
title=session_title,
session_id=session_id,
summary=current_session_summary,
parent_session_id=parent_sid,
parent_session_summary=(session_summaries or {}).get(parent_sid)
if parent_sid
else None,
parent_message_index=parent_msg_idx,
depth=hier.get("depth", 0),
attachment_uuid=hier.get("attachment_uuid"),
)
# Register and track session's first message
session_header = TemplateMessage(session_header_content)
msg_index = ctx.register(session_header)
ctx.session_first_message[session_id] = msg_index
# Create meta with session_id for the session header
session_header_meta = MessageMeta(
session_id=session_id,
timestamp="",
uuid="",
)
hier = (session_hierarchy or {}).get(session_id, {})
parent_sid = hier.get("parent_session_id")
parent_msg_idx = (
ctx.session_first_message.get(parent_sid) if parent_sid else None
)
session_header_content = SessionHeaderMessage(
session_header_meta,
title=session_title,
session_id=session_id,
summary=current_session_summary,
parent_session_id=parent_sid,
parent_session_summary=(session_summaries or {}).get(parent_sid)
if parent_sid
else None,
parent_message_index=parent_msg_idx,
depth=hier.get("depth", 0),
attachment_uuid=hier.get("attachment_uuid"),
)
# Register and track session's first message
session_header = TemplateMessage(session_header_content)
msg_index = ctx.register(session_header)
ctx.session_first_message[session_id] = msg_index

# Extract token usage for assistant messages
# Only show token usage for the first message with each requestId to avoid duplicates
Expand Down Expand Up @@ -2242,8 +2266,9 @@ def _render_messages(
continue

chunk_msg = TemplateMessage(content_model)
if current_render_session:
chunk_msg.render_session_id = current_render_session
effective_session = agent_parent_session or current_render_session
if effective_session:
chunk_msg.render_session_id = effective_session
ctx.register(chunk_msg)

else:
Expand Down Expand Up @@ -2292,8 +2317,9 @@ def _render_messages(
continue

tool_msg = TemplateMessage(tool_result.content)
if current_render_session:
tool_msg.render_session_id = current_render_session
effective_session = agent_parent_session or current_render_session
if effective_session:
tool_msg.render_session_id = effective_session
ctx.register(tool_msg)

return ctx
Expand Down
18 changes: 18 additions & 0 deletions claude_code_log/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,24 @@ def _extract_file_path(content: str) -> str | None:
return text_content


def is_agent_session(session_id: str) -> bool:
"""Check if a session ID is a synthetic agent session.

Agent sessions use the format ``{sessionId}#agent-{agentId}``,
assigned by ``_integrate_agent_entries()`` during DAG construction.
"""
return "#agent-" in session_id


def get_parent_session_id(session_id: str) -> str:
"""Return the parent session ID for an agent session, or the ID itself.

For ``{sessionId}#agent-{agentId}`` returns ``{sessionId}``.
For non-agent sessions returns the input unchanged.
"""
return session_id.split("#agent-")[0] if "#agent-" in session_id else session_id


def get_warmup_session_ids(messages: list[TranscriptEntry]) -> set[str]:
"""Get set of session IDs that are warmup-only sessions.

Expand Down
Loading