-
Notifications
You must be signed in to change notification settings - Fork 73
Integrate agent transcripts into the DAG (Phase C) #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev/dag
Are you sure you want to change the base?
Changes from all commits
a9f3c56
853fab0
000817f
61b7ff1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,7 +15,9 @@ | |
|
|
||
| from .utils import ( | ||
| format_timestamp_range, | ||
| get_parent_session_id, | ||
| get_project_display_name, | ||
| is_agent_session, | ||
| should_use_as_session_starter, | ||
| create_session_preview, | ||
| get_warmup_session_ids, | ||
|
|
@@ -411,6 +413,49 @@ def load_transcript( | |
| return messages | ||
|
|
||
|
|
||
| def _integrate_agent_entries(messages: list[TranscriptEntry]) -> None: | ||
| """Parent agent entries and assign synthetic session IDs. | ||
|
|
||
| Agent (sidechain) entries share sessionId with their parent session | ||
| but form separate conversation threads. This function: | ||
|
|
||
| 1. Builds a map of agentId -> anchor UUID (the main-session User entry | ||
| whose agentId matches, i.e. the tool_result that references the agent) | ||
| 2. For each agent's root entry (parentUuid=None, isSidechain=True), | ||
| sets parentUuid to the anchor UUID | ||
| 3. Assigns a synthetic sessionId ("{sessionId}#agent-{agentId}") to all | ||
| agent entries so they form separate DAG-lines | ||
|
|
||
| Mutates entries in place (Pydantic v2 models are mutable by default). | ||
| """ | ||
| # Build agentId -> anchor UUID map from main-session entries | ||
| agent_anchors: dict[str, str] = {} | ||
| for msg in messages: | ||
| if not isinstance(msg, BaseTranscriptEntry): | ||
| continue | ||
| if msg.isSidechain: | ||
| continue | ||
| # Main-session entries with agentId reference an agent transcript | ||
| if msg.agentId: | ||
| agent_anchors[msg.agentId] = msg.uuid | ||
|
|
||
| if not agent_anchors: | ||
| return | ||
|
|
||
| # Process sidechain entries: parent roots and assign synthetic sessionIds | ||
| for msg in messages: | ||
| if not isinstance(msg, BaseTranscriptEntry): | ||
| continue | ||
| if not msg.isSidechain or not msg.agentId: | ||
| continue | ||
| agent_id = msg.agentId | ||
| # Assign synthetic session ID to separate from main session | ||
| msg.sessionId = f"{msg.sessionId}#agent-{agent_id}" | ||
| # Parent the root entry to the anchor | ||
| if msg.parentUuid is None and agent_id in agent_anchors: | ||
| msg.parentUuid = agent_anchors[agent_id] | ||
|
Comment on lines
+416
to
+456
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nested agent transcripts still won't get anchored.
🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def load_directory_transcripts( | ||
| directory_path: Path, | ||
| cache_manager: Optional["CacheManager"] = None, | ||
|
|
@@ -441,31 +486,28 @@ def load_directory_transcripts( | |
| progress_chain = _scan_progress_chains(directory_path) | ||
| _repair_parent_chains(all_messages, progress_chain) | ||
|
|
||
| # Partition: sidechain entries excluded from DAG (Phase C scope) | ||
| sidechain_entries = [e for e in all_messages if getattr(e, "isSidechain", False)] | ||
| main_entries = [e for e in all_messages if not getattr(e, "isSidechain", False)] | ||
| # Parent agent entries and assign synthetic session IDs so they | ||
| # form separate DAG-lines spliced at their anchor points. | ||
| _integrate_agent_entries(all_messages) | ||
|
|
||
| # Collect sidechain UUIDs so DAG build can suppress orphan warnings | ||
| # for parents that exist in sidechain data (will be integrated in Phase C) | ||
| sidechain_uuids: set[str] = { | ||
| e.uuid for e in sidechain_entries if isinstance(e, BaseTranscriptEntry) | ||
| } | ||
| # Also scan unloaded subagent files (e.g. aprompt_suggestion agents | ||
| # that are never referenced via agentId in the main session) | ||
| sidechain_uuids |= _scan_sidechain_uuids(directory_path) | ||
| # Collect UUIDs from unloaded subagent files (e.g. aprompt_suggestion | ||
| # agents never referenced via agentId) to suppress orphan warnings | ||
| unloaded_sidechain_uuids = _scan_sidechain_uuids(directory_path) | ||
|
|
||
| # Build DAG and traverse (entries grouped by session, depth-first) | ||
| tree = build_dag_from_entries(main_entries, sidechain_uuids=sidechain_uuids) | ||
| tree = build_dag_from_entries( | ||
| all_messages, sidechain_uuids=unloaded_sidechain_uuids | ||
| ) | ||
| dag_ordered = traverse_session_tree(tree) | ||
|
|
||
| # Re-add summaries/queue-ops (excluded from DAG since they lack uuid) | ||
| non_dag_entries: list[TranscriptEntry] = [ | ||
| e | ||
| for e in main_entries | ||
| for e in all_messages | ||
| if isinstance(e, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)) | ||
| ] | ||
|
|
||
| return dag_ordered + sidechain_entries + non_dag_entries, tree | ||
| return dag_ordered + non_dag_entries, tree | ||
|
|
||
|
|
||
| # ============================================================================= | ||
|
|
@@ -747,7 +789,7 @@ def _build_session_data_from_messages( | |
| ): | ||
| continue | ||
|
|
||
| session_id = getattr(message, "sessionId", "") | ||
| session_id = get_parent_session_id(getattr(message, "sessionId", "")) | ||
| if not session_id or session_id in warmup_session_ids: | ||
| continue | ||
|
|
||
|
|
@@ -877,14 +919,16 @@ def _generate_paginated_html( | |
| if orphan_path.exists(): | ||
| orphan_path.unlink() | ||
|
|
||
| # Group messages by session for fast lookup | ||
| # Group messages by session for fast lookup (agent messages grouped | ||
| # under their parent session since they don't have their own pages) | ||
| messages_by_session: Dict[str, List[TranscriptEntry]] = {} | ||
| for msg in messages: | ||
| session_id = getattr(msg, "sessionId", None) | ||
| if session_id: | ||
| if session_id not in messages_by_session: | ||
| messages_by_session[session_id] = [] | ||
| messages_by_session[session_id].append(msg) | ||
| key = get_parent_session_id(session_id) | ||
| if key not in messages_by_session: | ||
| messages_by_session[key] = [] | ||
| messages_by_session[key].append(msg) | ||
|
|
||
| first_page_path = output_dir / _get_page_html_path(1) | ||
|
|
||
|
|
@@ -1185,7 +1229,11 @@ def convert_jsonl_to( | |
| current_session_ids: set[str] = set() | ||
| for message in messages: | ||
| session_id = getattr(message, "sessionId", "") | ||
| if session_id and session_id not in warmup_session_ids: | ||
| if ( | ||
| session_id | ||
| and session_id not in warmup_session_ids | ||
| and not is_agent_session(session_id) | ||
| ): | ||
| current_session_ids.add(session_id) | ||
| session_data = { | ||
| session_id: session_cache | ||
|
|
@@ -1379,7 +1427,7 @@ def _update_cache_with_session_data( | |
| if hasattr(message, "sessionId") and not isinstance( | ||
| message, SummaryTranscriptEntry | ||
| ): | ||
| session_id = getattr(message, "sessionId", "") | ||
| session_id = get_parent_session_id(getattr(message, "sessionId", "")) | ||
| if not session_id: | ||
| continue | ||
|
|
||
|
|
@@ -1416,7 +1464,7 @@ def _update_cache_with_session_data( | |
| if message.type == "assistant" and hasattr(message, "message"): | ||
| assistant_message = getattr(message, "message") | ||
| request_id = getattr(message, "requestId", None) | ||
| session_id = getattr(message, "sessionId", "") | ||
| session_id = get_parent_session_id(getattr(message, "sessionId", "")) | ||
|
|
||
| if ( | ||
| hasattr(assistant_message, "usage") | ||
|
|
@@ -1598,12 +1646,16 @@ def _generate_individual_session_files( | |
| # Pre-compute warmup sessions to exclude them | ||
| warmup_session_ids = get_warmup_session_ids(messages) | ||
|
|
||
| # Find all unique session IDs (excluding warmup sessions) | ||
| # Find all unique session IDs (excluding warmup and agent sessions) | ||
| session_ids: set[str] = set() | ||
| for message in messages: | ||
| if hasattr(message, "sessionId"): | ||
| session_id: str = getattr(message, "sessionId") | ||
| if session_id and session_id not in warmup_session_ids: | ||
| if ( | ||
| session_id | ||
| and session_id not in warmup_session_ids | ||
| and not is_agent_session(session_id) | ||
| ): | ||
|
Comment on lines
+1649
to
+1658
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The no-cache project-summary fallback still leaks synthetic agent sessions. Filtering agent sessions out here fixes individual session files, but 🤖 Prompt for AI Agents |
||
| session_ids.add(session_id) | ||
|
|
||
| # Get session data from cache for better titles | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.