From 1a0a193081d48660a8b7712850837ea2e35cdb6b Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 11 May 2026 09:54:33 +0200 Subject: [PATCH 01/14] add TMVA style training --- src/eventdisplay_ml/data_processing.py | 1 + src/eventdisplay_ml/features.py | 29 ++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index b9fa59b..a845fb7 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -1195,6 +1195,7 @@ def extra_columns(df, analysis_type, training, index, tel_config=None, observato "EChi2S": _to_numpy_1d(df["EChi2S"], np.float32), "EmissionHeight": _to_numpy_1d(df["EmissionHeight"], np.float32), "EmissionHeightChi2": _to_numpy_1d(df["EmissionHeightChi2"], np.float32), + "SizeSecondMax": _to_numpy_1d(df["SizeSecondMax"], np.float32), } if not training: data["ze_bin"] = _to_numpy_1d(df["ze_bin"], np.float32) diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py index b4818d7..9a069d6 100644 --- a/src/eventdisplay_ml/features.py +++ b/src/eventdisplay_ml/features.py @@ -136,8 +136,33 @@ def _regression_features(training): return var -def _classification_features(): - """Classification features.""" +def _classification_features(tmva_style=False): + """ + Classification features. + + Parameters + ---------- + tmva_style : bool, optional + If True, return features matching TMVA BDT input (default: False). + + Returns + ------- + list + List of feature names. + """ + if tmva_style: + return [ + "DispNImages", + "EChi2S", + "EmissionHeight", + "EmissionHeightChi2", + "MSCW", + "MSCL", + "SizeSecondMax", + "ArrayPointing_Elevation", + "ArrayPointing_Azimuth", + ] + var_tel = telescope_features("classification") var_array = [ "DispNImages", From 754ed46f4c237e8560afe24c516e9048f5c9acf8 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 11 May 2026 10:11:34 +0200 Subject: [PATCH 02/14] add tmva style --- src/eventdisplay_ml/data_processing.py | 34 +++++++++++++++++--------- src/eventdisplay_ml/features.py | 25 +++++++++++++++++++ 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index a845fb7..a66886c 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -797,7 +797,12 @@ def load_training_data(model_configs, file_list, analysis_type): input_files = utils.read_input_file_list(file_list) - branch_list = features_module.features(analysis_type, training=True) + tmva_style = model_configs.get("tmva_style", False) + if tmva_style and analysis_type == "classification": + _logger.info("Using TMVA-style features for classification") + branch_list = features_module.features_tmva_style(analysis_type, training=True) + else: + branch_list = features_module.features(analysis_type, training=True) _logger.info(f"Branch list: {branch_list}") if max_events is not None and max_events > 0: max_events_per_file = max_events // len(input_files) @@ -860,17 +865,22 @@ def load_training_data(model_configs, file_list, analysis_type): f" (fraction retained: {len(df) / n_before:.4f})" ) - df_flat = flatten_telescope_data_vectorized( - df, - tel_config["max_tel_id"] + 1, - features_module.telescope_features(analysis_type), - analysis_type, - training=True, - tel_config=tel_config, - observatory=model_configs.get("observatory", "veritas"), - max_tel_per_type=model_configs.get("max_tel_per_type", None), - preview_rows=model_configs.get("preview_rows", 20), - ) + # For TMVA-style classification, skip telescope flattening (use event-level features only) + if tmva_style and analysis_type == "classification": + _logger.info("Converting to pandas (no telescope flattening for TMVA style)") + df_flat = ak.to_pandas(df) + else: + df_flat = flatten_telescope_data_vectorized( + df, + tel_config["max_tel_id"] + 1, + features_module.telescope_features(analysis_type), + analysis_type, + training=True, + tel_config=tel_config, + observatory=model_configs.get("observatory", "veritas"), + max_tel_per_type=model_configs.get("max_tel_per_type", None), + preview_rows=model_configs.get("preview_rows", 20), + ) # Filter out events with invalid energy reconstruction for stereo training if analysis_type == "stereo_analysis": diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py index 9a069d6..acfbf12 100644 --- a/src/eventdisplay_ml/features.py +++ b/src/eventdisplay_ml/features.py @@ -1,6 +1,31 @@ """Features used for XGB training and prediction.""" +def features_tmva_style(analysis_type, training=True): + """ + Get TMVA-style features for classification analysis. + + Parameters + ---------- + analysis_type : str + Type of analysis. + training : bool, optional + If True (default), return features including target features. + If False, return only non-target features (i.e. features used + for prediction). + + Returns + ------- + list + List of feature names. + """ + if analysis_type == "stereo_analysis": + return _regression_features(training) + if "classification" in analysis_type: + return _classification_features(tmva_style=True) + raise ValueError(f"Unknown analysis type: {analysis_type}") + + def target_features(analysis_type): """ Get target features based on analysis type. From da2b99eebc0e59c0cf2b1f05ff79ed75577e663d Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 11 May 2026 10:38:15 +0200 Subject: [PATCH 03/14] optional fields --- src/eventdisplay_ml/data_processing.py | 13 +++++++++-- ...test_classification_apply_interpolation.py | 22 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index a66886c..33bb01b 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -130,7 +130,15 @@ def _resolve_branch_aliases(tree, branch_list): resolved = [b for b in resolved if b not in synthesized] # Drop missing optional branches - optional = {"fpointing_dx", "fpointing_dy", "E", "Erec", "ErecS", "nlowgain"} + optional = { + "fpointing_dx", + "fpointing_dy", + "E", + "Erec", + "ErecS", + "nlowgain", + "SizeSecondMax", + } final = [b for b in resolved if b not in optional or b in keys] return final, rename @@ -1205,8 +1213,9 @@ def extra_columns(df, analysis_type, training, index, tel_config=None, observato "EChi2S": _to_numpy_1d(df["EChi2S"], np.float32), "EmissionHeight": _to_numpy_1d(df["EmissionHeight"], np.float32), "EmissionHeightChi2": _to_numpy_1d(df["EmissionHeightChi2"], np.float32), - "SizeSecondMax": _to_numpy_1d(df["SizeSecondMax"], np.float32), } + if _has_field(df, "SizeSecondMax"): + data["SizeSecondMax"] = _to_numpy_1d(df["SizeSecondMax"], np.float32) if not training: data["ze_bin"] = _to_numpy_1d(df["ze_bin"], np.float32) diff --git a/tests/test_classification_apply_interpolation.py b/tests/test_classification_apply_interpolation.py index a0d0e67..198d71e 100644 --- a/tests/test_classification_apply_interpolation.py +++ b/tests/test_classification_apply_interpolation.py @@ -70,3 +70,25 @@ def test_apply_classification_models_interpolates_probabilities_and_thresholds(m # Thresholds are interpolated the same way: [0.5, 0.7] np.testing.assert_array_equal(is_gamma[50], np.array([0, 1], dtype=np.uint8)) + + +def test_extra_columns_skip_tmva_only_size_second_max_when_branch_missing(): + """Standard classification should not synthesize the TMVA-only SizeSecondMax column.""" + df = pd.DataFrame( + { + "MSCW": [0.1, 0.2], + "MSCL": [0.3, 0.4], + "EChi2S": [1.0, 2.0], + "EmissionHeight": [10.0, 20.0], + "EmissionHeightChi2": [0.5, 0.25], + } + ) + + result = data_processing.extra_columns( + df, + analysis_type="classification", + training=True, + index=pd.RangeIndex(2), + ) + + assert "SizeSecondMax" not in result.columns From 7aae25ea268ced849884fb01409d0ad715ba17c4 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 11 May 2026 13:22:55 +0200 Subject: [PATCH 04/14] read tmva style --- src/eventdisplay_ml/config.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/eventdisplay_ml/config.py b/src/eventdisplay_ml/config.py index 860bbca..07b47db 100644 --- a/src/eventdisplay_ml/config.py +++ b/src/eventdisplay_ml/config.py @@ -149,12 +149,23 @@ def configure_training(analysis_type): model_parameters = utils.load_model_parameters( model_configs["model_parameters"], model_configs["energy_bin_number"] ) + tmva_style_raw = model_parameters.get("tmva_style", False) + if isinstance(tmva_style_raw, str): + model_configs["tmva_style"] = tmva_style_raw.strip().lower() in { + "1", + "true", + "yes", + "on", + } + else: + model_configs["tmva_style"] = bool(tmva_style_raw) model_configs["pre_cuts"] = pre_cuts_classification( e_min=np.power(10.0, model_parameters.get("energy_bins_log10_tev", []).get("E_min")), e_max=np.power(10.0, model_parameters.get("energy_bins_log10_tev", []).get("E_max")), ) model_configs["energy_bins_log10_tev"] = model_parameters.get("energy_bins_log10_tev", []) model_configs["zenith_bins_deg"] = model_parameters.get("zenith_bins_deg", []) + _logger.info(f"TMVA-style classification: {model_configs['tmva_style']}") _logger.info(f"Pre-cuts: {model_configs['pre_cuts']}") From e4248a4a22a84654c0728fda4d091b77082e9f99 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 11 May 2026 13:26:24 +0200 Subject: [PATCH 05/14] read tmva style --- src/eventdisplay_ml/data_processing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index 33bb01b..4b32a11 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -876,7 +876,9 @@ def load_training_data(model_configs, file_list, analysis_type): # For TMVA-style classification, skip telescope flattening (use event-level features only) if tmva_style and analysis_type == "classification": _logger.info("Converting to pandas (no telescope flattening for TMVA style)") - df_flat = ak.to_pandas(df) + # Build DataFrame directly to stay compatible across awkward versions + # (some versions do not provide ak.to_pandas). + df_flat = pd.DataFrame({name: _to_numpy_1d(df[name]) for name in df.fields}) else: df_flat = flatten_telescope_data_vectorized( df, From 1c05bb6be0f6435deff0a087fd784bb7606475ca Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Mon, 11 May 2026 14:23:54 +0200 Subject: [PATCH 06/14] add signal events --- src/eventdisplay_ml/evaluate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eventdisplay_ml/evaluate.py b/src/eventdisplay_ml/evaluate.py index 54167d8..8b0d4e5 100644 --- a/src/eventdisplay_ml/evaluate.py +++ b/src/eventdisplay_ml/evaluate.py @@ -41,8 +41,8 @@ def evaluation_efficiency(name, model, x_test, y_test): "threshold": thresholds, "signal_efficiency": eff_signal, "background_efficiency": eff_background, - "n_signal": n_signal, - "n_background": n_background, + "n_signal": n_signal * eff_signal, + "n_background": n_background * eff_background, } ) From f547ac07780663cf5f1a88c872c7f2d6cafb8307 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Tue, 12 May 2026 09:32:36 +0200 Subject: [PATCH 07/14] correct signal efficiency --- src/eventdisplay_ml/evaluate.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/eventdisplay_ml/evaluate.py b/src/eventdisplay_ml/evaluate.py index 8b0d4e5..fe93b17 100644 --- a/src/eventdisplay_ml/evaluate.py +++ b/src/eventdisplay_ml/evaluate.py @@ -36,6 +36,9 @@ def evaluation_efficiency(name, model, x_test, y_test): f"Background Efficiency: {eff_background[-1]:.4f}" ) + eff_signal = np.asarray(eff_signal, dtype=float) + eff_background = np.asarray(eff_background, dtype=float) + return pd.DataFrame( { "threshold": thresholds, From 6db8b3ee65296677172cc7c3791a9056e4ee1ecb Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Tue, 12 May 2026 17:08:02 +0200 Subject: [PATCH 08/14] set pointing variables correctly --- src/eventdisplay_ml/data_processing.py | 13 +++++++++++++ src/eventdisplay_ml/features.py | 3 +-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index 4b32a11..5f2524f 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -809,6 +809,11 @@ def load_training_data(model_configs, file_list, analysis_type): if tmva_style and analysis_type == "classification": _logger.info("Using TMVA-style features for classification") branch_list = features_module.features_tmva_style(analysis_type, training=True) + # ze_bin is a derived feature and not present in ROOT input. + # Read elevation as auxiliary input to derive ze_bin. + branch_list = [b for b in branch_list if b not in {"ze_bin", "ArrayPointing_Azimuth"}] + [ + "ArrayPointing_Elevation" + ] else: branch_list = features_module.features(analysis_type, training=True) _logger.info(f"Branch list: {branch_list}") @@ -932,6 +937,14 @@ def load_training_data(model_configs, file_list, analysis_type): for col_name, values in new_cols.items(): df_flat[col_name] = values + # For TMVA-style classification, keep pointing only as an intermediate + # to compute ze_bin, but do not expose raw pointing as ML features. + if tmva_style and analysis_type == "classification": + df_flat = df_flat.drop( + columns=["ArrayPointing_Elevation", "ArrayPointing_Azimuth"], + errors="ignore", + ) + # Filter out events with NaN in residuals (can't train on these) if analysis_type == "stereo_analysis": n_before_nan_filter = len(df_flat) diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py index acfbf12..0e576b4 100644 --- a/src/eventdisplay_ml/features.py +++ b/src/eventdisplay_ml/features.py @@ -184,8 +184,7 @@ def _classification_features(tmva_style=False): "MSCW", "MSCL", "SizeSecondMax", - "ArrayPointing_Elevation", - "ArrayPointing_Azimuth", + "ze_bin", ] var_tel = telescope_features("classification") From f1a05a4f063b8bcb67d29a280adb90f2a9aa2ee6 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 14 May 2026 10:20:08 +0200 Subject: [PATCH 09/14] features tmva style --- src/eventdisplay_ml/features.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/eventdisplay_ml/features.py b/src/eventdisplay_ml/features.py index 0e576b4..d4b277d 100644 --- a/src/eventdisplay_ml/features.py +++ b/src/eventdisplay_ml/features.py @@ -20,7 +20,7 @@ def features_tmva_style(analysis_type, training=True): List of feature names. """ if analysis_type == "stereo_analysis": - return _regression_features(training) + raise ValueError("TMVA-style features are only defined for classification analysis.") if "classification" in analysis_type: return _classification_features(tmva_style=True) raise ValueError(f"Unknown analysis type: {analysis_type}") @@ -92,7 +92,7 @@ def telescope_features(analysis_type): Parameters ---------- analysis_type : str - Type of analysis, e.g. ``"classification"`` or ``"stereo_analysis"``. + Type of analysis. Returns ------- @@ -176,6 +176,7 @@ def _classification_features(tmva_style=False): List of feature names. """ if tmva_style: + # Base features from ROOT file needed to compute derived features return [ "DispNImages", "EChi2S", @@ -184,7 +185,10 @@ def _classification_features(tmva_style=False): "MSCW", "MSCL", "SizeSecondMax", - "ze_bin", + "Xcore", + "Ycore", + "DispAbsSumWeigth", + "ArrayPointing_Elevation", ] var_tel = telescope_features("classification") @@ -211,6 +215,9 @@ def clip_intervals(): """ Get clip intervals for variables. + Clip intervals are defined based on physical bounds or ranges used + in the classical TMVA BDT analysis. + Returns ------- dict @@ -232,8 +239,8 @@ def clip_intervals(): # Energy-related variables - log10 transformation with lower bound "Erec": (energy_min, None), "ErecS": (energy_min, None), - "EChi2S": (energy_min, None), - "EmissionHeightChi2": (1e-6, None), + "EChi2S": (energy_min, 10000.0), # TMVA: -6 to 4 (before log10) + "EmissionHeightChi2": (1e-6, 10000.0), # TMVA: -11 to 4 (before log10) "img2_ang": (0.0, 360.0), # Per-telescope energy and size variables - log10 transformation with lower bound "size": (1, None), @@ -244,8 +251,13 @@ def clip_intervals(): # Derived variables - avoid numerical issues "size_dist2": (1.0, None), "tgrad_x": (-50.0, 50.0), + "MSCW": (-2.0, 2.0), + "MSCL": (-2.0, 5.0), + "SizeSecondMax": (1e-6, 100000.0), # TMVA: 0 to 5 (after log10) + "Core_Distance": (0.0, 1000.0), # sqrt(Xcore^2 + Ycore^2) + "DispAbsSumWeigth": (0.0, 5.0), # TMVA: 0 to 5 # Physical bounds - "EmissionHeight": (0, 120), # top of atmosphere + "EmissionHeight": (0, 100), # top of atmosphere "R_core": (-10, None), # badly reconstructed events } From 7acd328888ab97b1f2c36910817ea7ccf8f3dd2b Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 14 May 2026 10:24:41 +0200 Subject: [PATCH 10/14] correct tmva variables --- src/eventdisplay_ml/data_processing.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index 5f2524f..5e4cb21 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -51,7 +51,6 @@ def read_telescope_config(root_file): n_tel = int(telconfig_data["NTel"][0]) tel_ids = telconfig_data["TelID"] - # Keep array of mirror areas; avoid shadowing this name later mirror_area_arr = telconfig_data["MirrorArea"] tel_x = telconfig_data["TelX"] tel_y = telconfig_data["TelY"] @@ -1222,6 +1221,7 @@ def extra_columns(df, analysis_type, training, index, tel_config=None, observato tel_list_matrix = _to_dense_array(df["DispTelList_T"]) data["array_footprint"] = _calculate_array_footprint(tel_config, tel_list_matrix) elif "classification" in analysis_type: + n = len(index) data = { "MSCW": _to_numpy_1d(df["MSCW"], np.float32), "MSCL": _to_numpy_1d(df["MSCL"], np.float32), @@ -1231,6 +1231,17 @@ def extra_columns(df, analysis_type, training, index, tel_config=None, observato } if _has_field(df, "SizeSecondMax"): data["SizeSecondMax"] = _to_numpy_1d(df["SizeSecondMax"], np.float32) + # Add core distance: sqrt(Xcore^2 + Ycore^2) + if _has_field(df, "Xcore") and _has_field(df, "Ycore"): + xcore = _to_numpy_1d(df["Xcore"], np.float32) + ycore = _to_numpy_1d(df["Ycore"], np.float32) + data["Core_Distance"] = np.sqrt(xcore**2 + ycore**2).astype(np.float32) + else: + data["Core_Distance"] = np.full(n, DEFAULT_FILL_VALUE, dtype=np.float32) + if _has_field(df, "DispAbsSumWeigth"): + data["DispAbsSumWeigth"] = _to_numpy_1d(df["DispAbsSumWeigth"], np.float32) + else: + data["DispAbsSumWeigth"] = np.full(n, DEFAULT_FILL_VALUE, dtype=np.float32) if not training: data["ze_bin"] = _to_numpy_1d(df["ze_bin"], np.float32) @@ -1249,6 +1260,7 @@ def extra_columns(df, analysis_type, training, index, tel_config=None, observato "EmissionHeightChi2", "Erec", "ErecS", + "SizeSecondMax", ] apply_clip_intervals( df_extra, @@ -1283,6 +1295,8 @@ def energy_in_bins(df_chunk, bins): def energy_interpolation_bins(df_chunk, bins): """Compute neighboring energy bins and interpolation weights per event. + Allows to interpolate downstream using 'value = (1 - alpha) * value_lo + alpha * value_hi'. + Parameters ---------- df_chunk : pandas.DataFrame From 92e06ae057d5805dd32e125dba906bbff5bd854c Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 14 May 2026 10:34:05 +0200 Subject: [PATCH 11/14] unit tests --- tests/test_data_processing.py | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 tests/test_data_processing.py diff --git a/tests/test_data_processing.py b/tests/test_data_processing.py new file mode 100644 index 0000000..e565113 --- /dev/null +++ b/tests/test_data_processing.py @@ -0,0 +1,48 @@ +"""Tests for data_processing.""" + +import numpy as np +import pandas as pd + +from eventdisplay_ml.data_processing import energy_interpolation_bins, zenith_in_bins + + +def test_zenith_in_bins_numeric_edges_clips_and_handles_boundaries(): + """Numeric bin edges should clip out-of-range values and place edge values consistently.""" + zenith_angles = np.array([-5.0, 0.0, 9.9, 10.0, 19.9, 20.0, 42.0], dtype=float) + bins = [0.0, 10.0, 20.0, 30.0] + + result = zenith_in_bins(zenith_angles, bins) + + np.testing.assert_array_equal(result, np.array([0, 0, 0, 1, 1, 2, 2], dtype=np.int32)) + assert result.dtype == np.int32 + + +def test_zenith_in_bins_dict_bins_matches_numeric_definition(): + """Dict-style zenith bins should map to the same indices as explicit numeric edges.""" + zenith_angles = np.array([-5.0, 0.0, 9.9, 10.0, 19.9, 20.0, 42.0], dtype=float) + dict_bins = [ + {"Ze_min": 0.0, "Ze_max": 10.0}, + {"Ze_min": 10.0, "Ze_max": 20.0}, + {"Ze_min": 20.0, "Ze_max": 30.0}, + ] + + result = zenith_in_bins(zenith_angles, dict_bins) + + np.testing.assert_array_equal(result, np.array([0, 0, 0, 1, 1, 2, 2], dtype=np.int32)) + assert result.dtype == np.int32 + + +def test_energy_interpolation_bins_interpolates_and_clamps_with_invalid_events(): + """Interpolation bins should handle interior, edge, and invalid energies robustly.""" + df_chunk = pd.DataFrame({"Erec": [0.0, 0.1, 1.0, 10.0, 100.0]}) + bins = [ + {"E_min": -1.5, "E_max": -0.5}, + None, + {"E_min": 0.5, "E_max": 1.5}, + ] + + e_bin_lo, e_bin_hi, e_alpha = energy_interpolation_bins(df_chunk, bins) + + np.testing.assert_array_equal(e_bin_lo, np.array([-1, 0, 0, 0, 2], dtype=np.int32)) + np.testing.assert_array_equal(e_bin_hi, np.array([-1, 0, 2, 2, 2], dtype=np.int32)) + np.testing.assert_allclose(e_alpha, np.array([0.0, 0.0, 0.5, 1.0, 0.0], dtype=np.float32)) From 2eb6a86aef3c8cc7de6f79a5d5c2e39f2ad52a21 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 14 May 2026 10:58:42 +0200 Subject: [PATCH 12/14] changelog --- docs/changes/58.feature.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/changes/58.feature.md diff --git a/docs/changes/58.feature.md b/docs/changes/58.feature.md new file mode 100644 index 0000000..f345cfe --- /dev/null +++ b/docs/changes/58.feature.md @@ -0,0 +1 @@ +Add TMVA-style gamma/hadron separation with identical features as TMVA BDT classification analysis. From 0fe6e03108bdc4a1e4236ce3c0c7371c9c13334f Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 14 May 2026 11:14:01 +0200 Subject: [PATCH 13/14] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- docs/changes/58.feature.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changes/58.feature.md b/docs/changes/58.feature.md index f345cfe..c7f0210 100644 --- a/docs/changes/58.feature.md +++ b/docs/changes/58.feature.md @@ -1 +1 @@ -Add TMVA-style gamma/hadron separation with identical features as TMVA BDT classification analysis. +Add TMVA-style gamma/hadron separation with the same features as TMVA BDT classification analysis. From 3f5d12e3ebb3718ad6ccd68727662d98543623e8 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Thu, 14 May 2026 11:14:12 +0200 Subject: [PATCH 14/14] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/eventdisplay_ml/data_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/eventdisplay_ml/data_processing.py b/src/eventdisplay_ml/data_processing.py index 5e4cb21..a3801d2 100644 --- a/src/eventdisplay_ml/data_processing.py +++ b/src/eventdisplay_ml/data_processing.py @@ -1295,7 +1295,7 @@ def energy_in_bins(df_chunk, bins): def energy_interpolation_bins(df_chunk, bins): """Compute neighboring energy bins and interpolation weights per event. - Allows to interpolate downstream using 'value = (1 - alpha) * value_lo + alpha * value_hi'. + Allows downstream interpolation using 'value = (1 - alpha) * value_lo + alpha * value_hi'. Parameters ----------