BenchmarkProjectDSMF/scripts/ChassisConfig(NOTTESTEDYET).py at main · AbeelLab/BenchmarkProjectDSMF · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""
This script was GPT-generated

config_builder.py
-----------------
This script helps a user build a configuration YAML file for optimization experiments.
It is meant as a chassis (template) script — users can easily modify parameters, methods,
and experimental setups without touching the core logic.

🧩 Dependencies:
    pip install pyyaml numpy

📄 Usage (example):
    python config_builder.py --target product_A --output_dir ./configs/ --n_cycles 5

All key variables are annotated below so a new user can understand what each parameter means.
"""

import argparse
import datetime
import yaml
import numpy as np
from pathlib import Path
from itertools import islice


def build_config(args):
    """
    Builds the configuration dictionary and writes it to a .yml file
    based on user-specified or default arguments.
    """

    # === Basic Experiment Setup ===
    target = args.target                     # name of the target system or product
    output_dir = args.output_dir              # where to save the configuration file
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    run_id = args.run_id                      # run identifier
    n_cycles = args.n_cycles                  # number of experimental cycles (iterations)
    n_experiments = [args.n_experiments] * n_cycles  # number of experiments per cycle
    n_screened = n_experiments[0] * 2         # how many strains are screened
    n_features = args.n_features              # number of features (enzyme/promoter pairs)
    n_engineered_positions = [args.n_engineered_positions] * n_cycles

    # === Design Methods and Strategies ===
    design_method_per_cycle = [
        "library_transform",
        *["ml_assisted_library_transform"] * (n_cycles - 1)
    ]
    assert len(design_method_per_cycle) == n_cycles

    screening_sampling_strategy = "stratified_sampling"
    recommendation_method = ["greedy"] * n_cycles

    # === Noise and Model Settings ===
    noise_percentage = args.noise_percentage
    noise_type = args.noise_type
    beta = args.beta
    model_filepath = args.model_filepath
    model_name = args.model_name

    # === Promoter Values (modifiable range of promoter strengths) ===
    promoter_values = [0.5, 1, 1.5, 2]

    # === Output File Name Formatting ===
    output_name = (
        f"{target}_cycles{n_cycles}_"
        f"{screening_sampling_strategy}_beta{beta}_"
        f"Pstrength{max(promoter_values)}_S{n_screened}"
        f"X{len(promoter_values)}N{n_experiments[0]}F{n_features}"
        f"P{n_engineered_positions[0]}_run{run_id}"
    )

    # === Design Method Hyperparameters ===
    base_hyperparams = {
        "library_transform": {
            "n_screened_strains": n_screened,
            "sequencing_selection_method": "best_sampling"
        },
        "ml_assisted_library_transform": {
            "n_screened_strains": n_screened,
            "ml_method": "xgboost",
            "beta": beta,
            "data_strategy": "all",  # could be changed to "recent" or "best_only"
            "sequencing_selection_method": "best_sampling"
        },
    }

    hyperparams = [base_hyperparams[m] for m in design_method_per_cycle]

    # === Core Config Dictionary ===
    config = {
        "identifier": f"{datetime.date.today().isoformat()}_{model_name}_{target}_run{run_id}"
    }

    # === Define Parameter Names and Promoter Values ===
    parameter_names_and_values = {
        f"enzyme_{i}": promoter_values for i in range(1, n_features + 1)
    }

    # === Optimization Settings ===
    config["optimization_settings"] = {
        "model_filepath": f"{model_filepath}/{model_name}.xml",
        "target": target,
        "n_cycles": n_cycles,
        "noise_percentage": noise_percentage,
        "noise_type": noise_type,
        "t_start": 0,
        "t_end": 50,
        "timepoints": 200,
        "parameters_perturbation_values": parameter_names_and_values,
    }

    # === Cycle-by-Cycle Information ===
    cycle_info = {}
    for i in range(n_cycles):
        design_build_test = {
            "cycle_status": i,
            "n_strains": n_experiments[i],
            "n_engineered_positions": n_engineered_positions[i],
            "design_method": design_method_per_cycle[i],
            "design_method_hyperparams": hyperparams[i],
            "noise_percentage": noise_percentage,
            "noise_type": noise_type,
        }

        learn_recommend = {
            "recommender_method": recommendation_method[i],
            "recommender_method_hyperparams": None,
        }

        cycle_info[f"cycle_{i}"] = {
            "design_build_test": design_build_test,
            "learn_recommend": learn_recommend,
        }

    config["cycles"] = cycle_info

    # === Write to YAML ===
    config_path = Path(output_dir) / f"{output_name}.yml"
    with open(config_path, "w") as file:
        yaml.dump(config, file, sort_keys=False)

    print(f"✅ Config file created at: {config_path}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Build a YAML configuration file for optimization experiments."
    )

    # === Command-Line Arguments (with defaults & explanations) ===
    parser.add_argument("--target", type=str, default="product_A",
                        help="Target product or pathway to optimize.")
    parser.add_argument("--output_dir", type=str, default="./configs/",
                        help="Output directory for generated YAML files.")
    parser.add_argument("--run_id", type=int, default=1,
                        help="Unique run identifier.")
    parser.add_argument("--n_cycles", type=int, default=5,
                        help="Number of design-test-learn cycles.")
    parser.add_argument("--n_experiments", type=int, default=50,
                        help="Number of experiments per cycle.")
    parser.add_argument("--n_features", type=int, default=10,
                        help="Number of model parameters/features to include.")
    parser.add_argument("--n_engineered_positions", type=int, default=6,
                        help="Number of engineered positions per cycle.")
    parser.add_argument("--beta", type=float, default=10.0,
                        help="Beta parameter for ML-assisted design weighting.")
    parser.add_argument("--noise_percentage", type=float, default=0.1,
                        help="Amount of noise to simulate in data (e.g. 0.1 = 10%).")
    parser.add_argument("--noise_type", type=str, default="homoscedastic",
                        help="Type of noise: homoscedastic or heteroscedastic.")
    parser.add_argument("--model_filepath", type=str, default="models/bioprocess_models",
                        help="Path to model files (without extension).")
    parser.add_argument("--model_name", type=str, default="batch_model_pathwayA",
                        help="Model name used in configuration.")

    args = parser.parse_args()
    build_config(args)