-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_real_data.py
More file actions
90 lines (70 loc) · 3 KB
/
plot_real_data.py
File metadata and controls
90 lines (70 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Load the data
with open('evals/results/eval_interviewer_v2_jasmine_vs_hates_customers_candidate_20250728_155308_20250728_181640/evaluation_summary.json', 'r') as f:
data = json.load(f)
# Extract the data into a list of dictionaries
data_rows = []
for conv in data['results_per_conversation']:
conv_id = conv['conversation_id']
for score_item in conv['evaluation_results']['traitadherence']['trait_scores']:
data_rows.append({
'conversation_id': conv_id,
'source': 'conversation',
'metric': score_item['trait'],
'value': score_item['score']
})
# Create a DataFrame
df = pd.DataFrame(data_rows)
# For the purpose of this chart, we'll create some dummy 'teacher' data
# as the original request implied a comparison between 'conversation' and 'teacher'.
# In a real-world scenario, you would load the teacher data from a separate file.
df_teacher = df.copy()
df_teacher['source'] = 'teacher'
df_teacher['value'] = df_teacher['value'].apply(lambda x: x * np.random.uniform(0.8, 1.2))
df_combined = pd.concat([df, df_teacher])
# Get unique values
conversation_ids = df_combined['conversation_id'].unique()
metrics = df_combined['metric'].unique()
# Set up the plot
plt.style.use('seaborn-v0_8-whitegrid')
fig, ax = plt.subplots(figsize=(15, 8))
# Set the width of the bars
bar_width = 0.35
# Set the positions of the bars on the x-axis
x = np.arange(len(conversation_ids))
# Define colors for each metric
colors = plt.cm.get_cmap('tab10', len(metrics))
# Plot the bars for each metric
for i, metric in enumerate(metrics):
# Filter data for the current metric
df_metric = df_combined[df_combined['metric'] == metric]
# Extract values for conversation and teacher
conversation_values = df_metric[df_metric['source'] == 'conversation']['value'].values
teacher_values = df_metric[df_metric['source'] == 'teacher']['value'].values
# Set the positions for the bars
pos = x + (i - len(metrics)/2) * bar_width
# Plot the bars
bars1 = ax.bar(pos - bar_width/2, conversation_values, width=bar_width, label=f'conversation - {metric}', color=colors(i), alpha=0.6)
bars2 = ax.bar(pos + bar_width/2, teacher_values, width=bar_width, label=f'teacher - {metric}', color=colors(i))
# Add value labels
for bar in bars1:
yval = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2.0, yval, round(yval, 2), va='bottom', ha='center')
for bar in bars2:
yval = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2.0, yval, round(yval, 2), va='bottom', ha='center')
# Add labels, title, and legend
ax.set_xlabel('Conversation ID')
ax.set_ylabel('Scores')
ax.set_title('Comparison of Trait Adherence Scores by Conversation and Source')
ax.set_xticks(x)
ax.set_xticklabels(conversation_ids, rotation=45, ha='right')
ax.legend()
# Save and show the plot
plt.tight_layout()
plt.savefig('comparison.png')
plt.show()