Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
repos:
# Ruff
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.12
rev: v0.15.13
hooks:
- id: ruff
args: ["--fix"]
Expand Down
1 change: 1 addition & 0 deletions docs/changes/61.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add calculation and plotting of zenith-angle dependent signal and background efficiencies (classification mode).
47 changes: 41 additions & 6 deletions src/eventdisplay_ml/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,8 @@
_logger = logging.getLogger(__name__)


def evaluation_efficiency(name, model, x_test, y_test):
"""Calculate signal and background efficiency as a function of threshold."""
y_pred_proba = model.predict_proba(x_test)[:, 1]
thresholds = np.linspace(0, 1, 101)

def _efficiency_dataframe(name, y_pred_proba, y_test, thresholds, context_label=""):
"""Compute efficiency dataframe for a prediction/label slice."""
n_signal = (y_test == 1).sum()
n_background = (y_test == 0).sum()

Expand All @@ -31,7 +28,7 @@ def evaluation_efficiency(name, model, x_test, y_test):
eff_signal.append(((pred) & (y_test == 1)).sum() / n_signal if n_signal else 0)
eff_background.append(((pred) & (y_test == 0)).sum() / n_background if n_background else 0)
_logger.info(
f"{name} Threshold: {t:.2f} | "
f"{name}{context_label} Threshold: {t:.2f} | "
f"Signal Efficiency: {eff_signal[-1]:.4f} | "
f"Background Efficiency: {eff_background[-1]:.4f}"
)
Comment thread
GernotMaier marked this conversation as resolved.
Expand All @@ -50,6 +47,44 @@ def evaluation_efficiency(name, model, x_test, y_test):
)


def evaluation_efficiency(name, model, x_test, y_test, return_by_zenith=False):
"""Calculate signal/background efficiency for all events and optionally by zenith bin."""
y_pred_proba = model.predict_proba(x_test)[:, 1]
thresholds = np.linspace(0, 1, 101)

Comment thread
GernotMaier marked this conversation as resolved.
efficiency_all = _efficiency_dataframe(name, y_pred_proba, y_test, thresholds)
if not return_by_zenith:
return efficiency_all

efficiencies_by_zenith = {}
if "ze_bin" not in x_test.columns:
_logger.warning("Column 'ze_bin' missing in x_test; per-zenith efficiencies not computed.")
return efficiency_all, efficiencies_by_zenith

ze_bins = pd.Series(x_test["ze_bin"]).dropna().unique().tolist()
ze_bins = sorted(ze_bins)
for ze_bin in ze_bins:
mask = x_test["ze_bin"] == ze_bin
if not np.any(mask):
continue
try:
key = int(ze_bin)
except (TypeError, ValueError):
_logger.warning(
"Skipping non-integer ze_bin value in efficiency calculation: %s", ze_bin
)
continue
efficiencies_by_zenith[key] = _efficiency_dataframe(
name,
y_pred_proba[mask],
y_test[mask],
thresholds,
context_label=f" [ze{key}]",
)

return efficiency_all, efficiencies_by_zenith


def evaluate_classification_model(model, x_test, y_test, df, x_cols, name):
"""Evaluate the trained model on the test set and log performance metrics.

Expand Down
7 changes: 6 additions & 1 deletion src/eventdisplay_ml/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,12 @@ def train_classification(df, model_configs):
)
cfg["model"] = model
cfg["features"] = x_data.columns.tolist() # Store feature names for diagnostics
cfg["efficiency"] = evaluation_efficiency(name, model, x_test, y_test)
efficiency_all, efficiencies_by_zenith = evaluation_efficiency(
name, model, x_test, y_test, return_by_zenith=True
)
cfg["efficiency"] = efficiency_all
for ze_bin, ze_efficiency in efficiencies_by_zenith.items():
cfg[f"efficiency_ze{ze_bin}"] = ze_efficiency
cfg["shap_importance"] = shap_importance

return model_configs
Expand Down
Loading