codeassist/start_zero_style.py at main · ariznode/codeassist · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
"""
Start Zero Style App Script
Opens a new browser tab with the zero-style app using episode data.
Accepts command-line arguments for episode ID and timestep.
"""

import argparse
import webbrowser as browser
import sys
import urllib.parse


def parse_args():
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser(
        description="Start a zero-style simulation using episode data"
    )
    parser.add_argument(
        "--episode",
        type=str,
        required=True,
        help="Episode ID (directory name in state-service/episodes)",
    )
    parser.add_argument(
        "--timestep",
        type=int,
        required=True,
        help="Timestep number (line number in JSONL file, 1-indexed)",
    )
    parser.add_argument(
        "--port",
        type=int,
        default=3003,
        help="Port number where the zero-style UI is running (default: 3003)",
    )
    parser.add_argument(
        "--episodes-dir",
        type=str,
        default="./persistent-data/state-service/episodes",
        help="Path to episodes directory (default: ./persistent-data/state-service/episodes)",
    )
    parser.add_argument(
        "--max-assistant-actions",
        type=int,
        default=2,
        help="Number of assistant actions to allow before stopping the simulation",
    )
    parser.add_argument(
        "--max-human-actions",
        type=int,
        default=1,
        help="Number of human policy actions to run after assistant actions",
    )
    parser.add_argument(
        "--assistant-noise-prob",
        type=float,
        default=0.05,
        help="Probability of replacing an assistant action with a random exploratory action",
    )
    parser.add_argument(
        "--assistant-noise-top-k",
        type=int,
        default=3,
        help="When exploration triggers, sample uniformly from the assistant policy's top-k actions",
    )
    return parser.parse_args()


def main():
    args = parse_args()

    try:
        if args.max_assistant_actions <= 0:
            raise ValueError("--max-assistant-actions must be a positive integer")
        if args.max_human_actions < 0:
            raise ValueError("--max-human-actions must be zero or a positive integer")
        if args.assistant_noise_top_k <= 0:
            raise ValueError("--assistant-noise-top-k must be a positive integer")
        # Just pass episode and timestep - let the app load the data
        params = {
            "episode": args.episode,
            "timestep": str(args.timestep),
            "maxAssistantActions": str(args.max_assistant_actions),
            "maxHumanActions": str(args.max_human_actions),
            "assistantNoiseProb": str(args.assistant_noise_prob),
            "assistantNoiseTopK": str(args.assistant_noise_top_k),
        }

        # Build URL
        query_string = urllib.parse.urlencode(params)
        url = f"http://localhost:{args.port}?{query_string}"

        print("🎯 Starting zero-style simulation from episode data...")
        print(f"📍 Opening browser to http://localhost:{args.port}")
        print(f"📁 Episode: {args.episode}")
        print(f"⏱️  Timestep: {args.timestep}")
        print("")

        try:
            browser.open(url)
            print("✅ Browser opened!")
            print(
                f"🎯 Zero-style simulation will run with episode data from timestep {args.timestep}"
            )
            print("")
        except Exception as e:
            print(f"❌ Error opening browser: {e}")
            sys.exit(1)

    except Exception as e:
        print(f"❌ Error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()