-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathChassisConfig(NOTTESTEDYET).py
More file actions
179 lines (150 loc) · 6.79 KB
/
Copy pathChassisConfig(NOTTESTEDYET).py
File metadata and controls
179 lines (150 loc) · 6.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""
This script was GPT-generated
config_builder.py
-----------------
This script helps a user build a configuration YAML file for optimization experiments.
It is meant as a chassis (template) script — users can easily modify parameters, methods,
and experimental setups without touching the core logic.
🧩 Dependencies:
pip install pyyaml numpy
📄 Usage (example):
python config_builder.py --target product_A --output_dir ./configs/ --n_cycles 5
All key variables are annotated below so a new user can understand what each parameter means.
"""
import argparse
import datetime
import yaml
import numpy as np
from pathlib import Path
from itertools import islice
def build_config(args):
"""
Builds the configuration dictionary and writes it to a .yml file
based on user-specified or default arguments.
"""
# === Basic Experiment Setup ===
target = args.target # name of the target system or product
output_dir = args.output_dir # where to save the configuration file
Path(output_dir).mkdir(parents=True, exist_ok=True)
run_id = args.run_id # run identifier
n_cycles = args.n_cycles # number of experimental cycles (iterations)
n_experiments = [args.n_experiments] * n_cycles # number of experiments per cycle
n_screened = n_experiments[0] * 2 # how many strains are screened
n_features = args.n_features # number of features (enzyme/promoter pairs)
n_engineered_positions = [args.n_engineered_positions] * n_cycles
# === Design Methods and Strategies ===
design_method_per_cycle = [
"library_transform",
*["ml_assisted_library_transform"] * (n_cycles - 1)
]
assert len(design_method_per_cycle) == n_cycles
screening_sampling_strategy = "stratified_sampling"
recommendation_method = ["greedy"] * n_cycles
# === Noise and Model Settings ===
noise_percentage = args.noise_percentage
noise_type = args.noise_type
beta = args.beta
model_filepath = args.model_filepath
model_name = args.model_name
# === Promoter Values (modifiable range of promoter strengths) ===
promoter_values = [0.5, 1, 1.5, 2]
# === Output File Name Formatting ===
output_name = (
f"{target}_cycles{n_cycles}_"
f"{screening_sampling_strategy}_beta{beta}_"
f"Pstrength{max(promoter_values)}_S{n_screened}"
f"X{len(promoter_values)}N{n_experiments[0]}F{n_features}"
f"P{n_engineered_positions[0]}_run{run_id}"
)
# === Design Method Hyperparameters ===
base_hyperparams = {
"library_transform": {
"n_screened_strains": n_screened,
"sequencing_selection_method": "best_sampling"
},
"ml_assisted_library_transform": {
"n_screened_strains": n_screened,
"ml_method": "xgboost",
"beta": beta,
"data_strategy": "all", # could be changed to "recent" or "best_only"
"sequencing_selection_method": "best_sampling"
},
}
hyperparams = [base_hyperparams[m] for m in design_method_per_cycle]
# === Core Config Dictionary ===
config = {
"identifier": f"{datetime.date.today().isoformat()}_{model_name}_{target}_run{run_id}"
}
# === Define Parameter Names and Promoter Values ===
parameter_names_and_values = {
f"enzyme_{i}": promoter_values for i in range(1, n_features + 1)
}
# === Optimization Settings ===
config["optimization_settings"] = {
"model_filepath": f"{model_filepath}/{model_name}.xml",
"target": target,
"n_cycles": n_cycles,
"noise_percentage": noise_percentage,
"noise_type": noise_type,
"t_start": 0,
"t_end": 50,
"timepoints": 200,
"parameters_perturbation_values": parameter_names_and_values,
}
# === Cycle-by-Cycle Information ===
cycle_info = {}
for i in range(n_cycles):
design_build_test = {
"cycle_status": i,
"n_strains": n_experiments[i],
"n_engineered_positions": n_engineered_positions[i],
"design_method": design_method_per_cycle[i],
"design_method_hyperparams": hyperparams[i],
"noise_percentage": noise_percentage,
"noise_type": noise_type,
}
learn_recommend = {
"recommender_method": recommendation_method[i],
"recommender_method_hyperparams": None,
}
cycle_info[f"cycle_{i}"] = {
"design_build_test": design_build_test,
"learn_recommend": learn_recommend,
}
config["cycles"] = cycle_info
# === Write to YAML ===
config_path = Path(output_dir) / f"{output_name}.yml"
with open(config_path, "w") as file:
yaml.dump(config, file, sort_keys=False)
print(f"✅ Config file created at: {config_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Build a YAML configuration file for optimization experiments."
)
# === Command-Line Arguments (with defaults & explanations) ===
parser.add_argument("--target", type=str, default="product_A",
help="Target product or pathway to optimize.")
parser.add_argument("--output_dir", type=str, default="./configs/",
help="Output directory for generated YAML files.")
parser.add_argument("--run_id", type=int, default=1,
help="Unique run identifier.")
parser.add_argument("--n_cycles", type=int, default=5,
help="Number of design-test-learn cycles.")
parser.add_argument("--n_experiments", type=int, default=50,
help="Number of experiments per cycle.")
parser.add_argument("--n_features", type=int, default=10,
help="Number of model parameters/features to include.")
parser.add_argument("--n_engineered_positions", type=int, default=6,
help="Number of engineered positions per cycle.")
parser.add_argument("--beta", type=float, default=10.0,
help="Beta parameter for ML-assisted design weighting.")
parser.add_argument("--noise_percentage", type=float, default=0.1,
help="Amount of noise to simulate in data (e.g. 0.1 = 10%).")
parser.add_argument("--noise_type", type=str, default="homoscedastic",
help="Type of noise: homoscedastic or heteroscedastic.")
parser.add_argument("--model_filepath", type=str, default="models/bioprocess_models",
help="Path to model files (without extension).")
parser.add_argument("--model_name", type=str, default="batch_model_pathwayA",
help="Model name used in configuration.")
args = parser.parse_args()
build_config(args)