forked from gensyn-ai/codeassist
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathstart_zero_style.py
More file actions
117 lines (105 loc) · 3.62 KB
/
start_zero_style.py
File metadata and controls
117 lines (105 loc) · 3.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
"""
Start Zero Style App Script
Opens a new browser tab with the zero-style app using episode data.
Accepts command-line arguments for episode ID and timestep.
"""
import argparse
import webbrowser as browser
import sys
import urllib.parse
def parse_args():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
description="Start a zero-style simulation using episode data"
)
parser.add_argument(
"--episode",
type=str,
required=True,
help="Episode ID (directory name in state-service/episodes)",
)
parser.add_argument(
"--timestep",
type=int,
required=True,
help="Timestep number (line number in JSONL file, 1-indexed)",
)
parser.add_argument(
"--port",
type=int,
default=3003,
help="Port number where the zero-style UI is running (default: 3003)",
)
parser.add_argument(
"--episodes-dir",
type=str,
default="./persistent-data/state-service/episodes",
help="Path to episodes directory (default: ./persistent-data/state-service/episodes)",
)
parser.add_argument(
"--max-assistant-actions",
type=int,
default=2,
help="Number of assistant actions to allow before stopping the simulation",
)
parser.add_argument(
"--max-human-actions",
type=int,
default=1,
help="Number of human policy actions to run after assistant actions",
)
parser.add_argument(
"--assistant-noise-prob",
type=float,
default=0.05,
help="Probability of replacing an assistant action with a random exploratory action",
)
parser.add_argument(
"--assistant-noise-top-k",
type=int,
default=3,
help="When exploration triggers, sample uniformly from the assistant policy's top-k actions",
)
return parser.parse_args()
def main():
args = parse_args()
try:
if args.max_assistant_actions <= 0:
raise ValueError("--max-assistant-actions must be a positive integer")
if args.max_human_actions < 0:
raise ValueError("--max-human-actions must be zero or a positive integer")
if args.assistant_noise_top_k <= 0:
raise ValueError("--assistant-noise-top-k must be a positive integer")
# Just pass episode and timestep - let the app load the data
params = {
"episode": args.episode,
"timestep": str(args.timestep),
"maxAssistantActions": str(args.max_assistant_actions),
"maxHumanActions": str(args.max_human_actions),
"assistantNoiseProb": str(args.assistant_noise_prob),
"assistantNoiseTopK": str(args.assistant_noise_top_k),
}
# Build URL
query_string = urllib.parse.urlencode(params)
url = f"http://localhost:{args.port}?{query_string}"
print("🎯 Starting zero-style simulation from episode data...")
print(f"📍 Opening browser to http://localhost:{args.port}")
print(f"📁 Episode: {args.episode}")
print(f"⏱️ Timestep: {args.timestep}")
print("")
try:
browser.open(url)
print("✅ Browser opened!")
print(
f"🎯 Zero-style simulation will run with episode data from timestep {args.timestep}"
)
print("")
except Exception as e:
print(f"❌ Error opening browser: {e}")
sys.exit(1)
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()