Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/mcprobe/agents/adk.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def _process_function_responses(
) -> list[ToolCall]:
"""Process function responses and create ToolCall objects."""
tool_calls: list[ToolCall] = []
response_time = time.time()

for fr in function_responses:
call_id = fr.id or fr.name or "unknown"
if call_id in pending_calls:
Expand All @@ -66,12 +68,14 @@ def _process_function_responses(
# Response without matching call - use response info
name = fr.name or "unknown"
params = {}
start = time.time()
start = response_time

tool_calls.append(
ToolCall(
tool_name=name,
parameters=params,
called_at=start,
responded_at=response_time,
result=fr.response,
latency_ms=(time.time() - start) * 1000,
)
Expand Down Expand Up @@ -138,13 +142,16 @@ async def send_message(self, message: str) -> AgentResponse:

# Handle any calls that never got responses
for _id, (name, params, start) in pending_calls.items():
now = time.time()
tool_calls.append(
ToolCall(
tool_name=name,
parameters=params,
result=None,
error="No response received",
latency_ms=(time.time() - start) * 1000,
latency_ms=(now - start) * 1000,
called_at=start,
responded_at=None, # Never responded
)
)
except Exception as e:
Expand Down
35 changes: 30 additions & 5 deletions src/mcprobe/judge/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,25 @@
"""


def _format_timestamp(ts: float | None) -> str:
"""Format epoch timestamp as readable time."""
if ts is None:
return "N/A"
from datetime import UTC, datetime # noqa: PLC0415

dt = datetime.fromtimestamp(ts, tz=UTC)
return dt.strftime("%H:%M:%S.%f")[:-3] # HH:MM:SS.mmm


def format_conversation_transcript(
turns: list[ConversationTurn],
truncate_results: int | None = None,
) -> str:
"""Format conversation turns into a readable transcript.

For assistant turns with tool calls, shows tool calls with timestamps
BEFORE the response text to accurately reflect execution order.

Args:
turns: List of conversation turns.
truncate_results: If set, truncate tool results to this many characters.
Expand All @@ -159,17 +172,29 @@ def format_conversation_transcript(
lines: list[str] = []
for turn in turns:
role = turn.role.upper()
lines.append(f"[{role}]: {turn.content}")
if turn.tool_calls:
turn_time = _format_timestamp(turn.timestamp)

if turn.role == "assistant" and turn.tool_calls:
# Show tool calls first with timestamps, then response
lines.append(f"[{role}] @ {turn_time}:")
for tc in turn.tool_calls:
lines.append(f" -> Tool call: {tc.tool_name}({tc.parameters})")
called = _format_timestamp(tc.called_at)
responded = _format_timestamp(tc.responded_at)
lines.append(
f" -> Tool called @ {called}: {tc.tool_name}({tc.parameters})"
)
if tc.error:
lines.append(f" Error: {tc.error}")
lines.append(f" Error @ {responded}: {tc.error}")
else:
result_str = str(tc.result)
if truncate_results and len(result_str) > truncate_results:
result_str = result_str[:truncate_results] + "... [truncated]"
lines.append(f" Result: {result_str}")
lines.append(f" Result @ {responded}: {result_str}")
lines.append(f" Response: {turn.content}")
else:
# User turns or assistant turns without tool calls
lines.append(f"[{role}] @ {turn_time}: {turn.content}")

return "\n".join(lines)


Expand Down
2 changes: 2 additions & 0 deletions src/mcprobe/models/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ class ToolCall(BaseModel):
result: Any
latency_ms: float
error: str | None = None
called_at: float | None = None # Epoch time when tool was called
responded_at: float | None = None # Epoch time when tool responded


class AgentResponse(BaseModel):
Expand Down