From b4748aebbdbbaa04d8e0523850070d34a9a1d234 Mon Sep 17 00:00:00 2001 From: Tucker Hwang Date: Mon, 13 Apr 2026 22:55:24 -0700 Subject: [PATCH 1/3] Expand basic analysis framework --- alian/analysis/base/__init__.py | 20 +- alian/analysis/base/analysis.py | 166 +++++- alian/analysis/base/csubtractor.py | 2 +- alian/analysis/base/event.py | 99 ++++ alian/analysis/base/jet_finder.py | 104 ++++ alian/analysis/base/logs.py | 42 ++ alian/analysis/base/output.py | 128 +++++ alian/analysis/base/selection.py | 143 +++++ alian/analysis/base/selector.py | 803 +++++++++++++++++++++++++++++ alian/analysis/base/utils.py | 61 +++ 10 files changed, 1564 insertions(+), 4 deletions(-) create mode 100644 alian/analysis/base/event.py create mode 100644 alian/analysis/base/jet_finder.py create mode 100644 alian/analysis/base/logs.py create mode 100644 alian/analysis/base/output.py create mode 100644 alian/analysis/base/selection.py create mode 100644 alian/analysis/base/selector.py create mode 100644 alian/analysis/base/utils.py diff --git a/alian/analysis/base/__init__.py b/alian/analysis/base/__init__.py index bbf918b..7507403 100644 --- a/alian/analysis/base/__init__.py +++ b/alian/analysis/base/__init__.py @@ -1,2 +1,18 @@ -from .analysis import * -from .csubtractor import * +from .analysis import AnalysisBase, add_default_args +from .csubtractor import CEventSubtractor +from .event import Event +from .jet_finder import JetFinder +from .logs import set_up_logger +from .selection import EventSel, RCTSel, TrackSel, TrigSel +from .selector import AnalysisSelector +from .utils import delta_R, linbins, logbins, ndict, nested_dict, read_yaml + +__all__ = ["AnalysisBase", "add_default_args", + "CEventSubtractor", + "Event", + "JetFinder", + "set_up_logger", + "EventSel", "RCTSel", "TrackSel", "TrigSel", + "AnalysisSelector", + "delta_R", "linbins", "logbins", "ndict", "nested_dict", "read_yaml" + ] diff --git a/alian/analysis/base/analysis.py b/alian/analysis/base/analysis.py index 681a0fa..3159f03 100644 --- a/alian/analysis/base/analysis.py +++ b/alian/analysis/base/analysis.py @@ -1,4 +1,17 @@ -import heppyy +from time import perf_counter + +from alian.io import data_io +from ROOT import TFile + +import heppyy + +from .event import Event, get_selected_clusters, get_selected_tracks +from .jet_finder import JetFinder +from .logs import set_up_logger +from .output import Output +from .selector import AnalysisSelector +from .utils import is_slurm, read_yaml + class BaseAnalysis(heppyy.GenericObject): _defaults = {} @@ -8,3 +21,154 @@ def __init__(self, **kwargs): for k, val in self.__class__._defaults.items(): if not hasattr(self, k) or getattr(self, k) is None: setattr(self, k, val) + +class AnalysisBase: + """A base class for analysis tasks. + + This base class describes the basic event analysis structure. + At minimum, the method `analyze_event()` must be overridden. Depending + on the selectors defined in the configuration, clusters may or may not be + loaded into the event. + """ + _defaults = {} + + def __init__(self, input_file, output_file, cfg, tree_struct, nev = -1, lhc_run = 3): + self.input_file = input_file + self.output_file = output_file + self.cfg_file = cfg + self.tree_struct = tree_struct + self.nev = nev + self.lhc_run = lhc_run + + def run(self): + self.start_time = perf_counter() + # set up logger + self.logger = set_up_logger(__name__) + self.logger.info("Starting analysis!") + + self.logger.info("Configuring analysis pipeline...") + self.init() + self.logger.info("Analysis pipeline configured.") + self.analyze_events() + self.finalize() + self.save() + + self.note_time("Analysis complete") + + def init(self): + self.cfg = read_yaml(self.cfg_file) + # validate YAML config + self._validate_cfg(self.cfg) + self.logger.info("Config schema validated.") + # initialize data input + self.data_source = data_io.DataInput(self.input_file, + lhc_run = self.lhc_run, + yaml_file = self.tree_struct, + n_events = self.nev) + # initialize selectors for events, tracks, clusters + self.logger.info("Configuring selectors...") + self.selector = AnalysisSelector.load(self.cfg) + self.selector.dump() + self.logger.info("Selectors configured.") + if self.selector.is_active("cluster"): + self.logger.info("Cluster selector found, clusters will be loaded into events.") + self.load_clusters = True + else: + self.logger.info("Cluster selector not found, clusters will NOT be loaded into events.") + self.load_clusters = False + # initialize jet finder + if 'jet_finder' in self.cfg: + self.logger.info("Jet finder config found, jets will be loaded into events.") + self.load_jets = True + self.logger.info("Configuring jet finder...") + self.jet_finder = JetFinder.load(self.cfg) + self.jet_finder.dump() + self.logger.info("Jet finder configured.") + else: + self.logger.info("No configuration for jet finder, no jets will be loaded.") + self.load_jets = False + self.logger.info("Configuring output...") + self.init_output() + self.logger.info("Output configured.") + # initialize analysis parameters + self.logger.info("Configuring analysis parameters...") + self.init_analysis(self.cfg.get('analysis', {})) + self.dump() + self.logger.info("Analysis parameters configured.") + + def _validate_cfg(self, cfg: dict): + req_headers = ['selections', 'output'] + if headers_not_in_cfg := [h for h in req_headers if h not in cfg]: + raise KeyError(f"The following blocks must be present in the YAML configuration: {headers_not_in_cfg}") + + def init_analysis(self, analysis_cfg: dict): + """Initialize analysis configuration parameters. Override in analyses if necessary.""" + self.logger.info("No analysis configuration to parse.") + + def init_output(self): + self.output = Output.load(self.cfg) + self.hists = self.output.hists + self.trees = self.output.trees + + def analyze_events(self): + self.logger.info("Analyzing events...") + slurm_check = is_slurm() + for e in self.data_source.next_event(disable_bar = slurm_check): + # build the event only + self.build_event(e) + # skip if the event doesn't pass the selection + if not self.selector.event.selects(self.event): + continue + # build the rest of the event and analyze + self.build_event_objs(e) + self.analyze_event() + self.note_time("Events analyzed") + + def analyze_event(self): + raise NotImplementedError("Must implement an analyze_event method!") + + def build_event(self, event_struct): + """Build event as Event and store in self.event.""" + self.event = Event(event_struct) + def build_event_objs(self, event_struct): + """Build tracks and optionally clusters and jets from associated event.""" + self.tracks = get_selected_tracks(event_struct, self.selector.track) + if self.load_clusters: + self.clusters = get_selected_clusters(event_struct, self.selector.cluster) + if self.load_jets: + self.jets = self.jet_finder.find_jets(self.tracks) + + def finalize(self): + """Finalize analysis before saving output. Should be overriden in analyses if necessary.""" + self.logger.info("Nothing to finalize.") + def save(self): + self.logger.info(f"Saving output to: {self.output_file}") + with TFile(self.output_file, "RECREATE") as f: + self.output.save(f) + self.logger.info("Output saved.") + def __getattr__(self, attr): + if attr == "jets": + raise AttributeError("Jets not defined; include a `jet_finder` block in your config!") + if attr == "clusters": + raise AttributeError("Clusters not defined; include a `clusters` block in the `selections` section of your config! The block can be empty if you want no selections applied.") + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'") + + def dump(self): + cfg = "\n".join([f"\t{param}: {repr(getattr(self, param))}" for param in self._defaults]) + self.logger.info(f"{type(self).__name__} configuration:\n{cfg}", stacklevel = 2) + def note_time(self, msg): + self.logger.info(f"{msg}: ------- {self.fmt_time(perf_counter() - self.start_time)} -------", stacklevel = 2) + def fmt_time(self, seconds): + m, s = divmod(round(seconds), 60) + h, m = divmod(m, 60) + return f"{h:d}h {m:02d}m {s:02d}s" + +def add_default_args(parser): + parser.add_argument('-i', '--input-file', type = str, required = True, help = "Input file or file containing a list of input files.") + parser.add_argument("-o", "--output-file", type = str, default = "analysis.root", help = "File to write the analysis.") + parser.add_argument('-c', '--config-file', type = str, required = True, help = "Path to a YAML configuration file describing the cuts to be applied to the data.") + parser.add_argument("-n", "--nev", type = int, default = -1, help = "Number of entries to process, set to -1 for all events.") + parser.add_argument('-t', '--tree-struct', type = str, default = None, help = "Path to a YAML file describing the tree structure.") + parser.add_argument('--lhc-run', type = int, default = 3, help = 'LHC Run') + + return parser diff --git a/alian/analysis/base/csubtractor.py b/alian/analysis/base/csubtractor.py index 52793d3..e960285 100644 --- a/alian/analysis/base/csubtractor.py +++ b/alian/analysis/base/csubtractor.py @@ -6,7 +6,7 @@ fj = heppyy.load_cppyy('fastjet') std = heppyy.load_cppyy('std') from alian.utils import pprints -from alian.analysis.base import BaseAnalysis +from .analysis import BaseAnalysis class CEventSubtractor(BaseAnalysis): _defaults = { diff --git a/alian/analysis/base/event.py b/alian/analysis/base/event.py new file mode 100644 index 0000000..05db39d --- /dev/null +++ b/alian/analysis/base/event.py @@ -0,0 +1,99 @@ +import heppyy.util.fastjet_cppyy +from cppyy.gbl import fastjet as fj +from cppyy.gbl import std + +import heppyy + +from .selection import EventSel, RCTSel, TrigSel + +alian = heppyy.load_cppyy("alian") + +class Event: + def __init__(self, ev): + self.run_number = ev.data["run_number"] + self.multiplicity = ev.data["multiplicity"] + self.centrality = ev.data["centrality"] + self.occupancy = ev.data["occupancy"] + self.event_sel = EventSel(ev.data["event_sel"]) + self.trig_sel = TrigSel(ev.data["trig_sel"]) + self.rct = RCTSel(ev.data["rct"]) + + +def get_tracks(ev): + """Get all tracks from an event (no track selections applied).""" + return alian.numpy_ptetaphi_to_tracks( + ev.data["track_pt"], + ev.data["track_eta"], + ev.data["track_phi"], + ev.data["track_sel"], + 0, + ) + + +def get_selected_tracks(ev, selector): + """Get selected tracks from an event (track selections applied).""" + return std.vector[fj.PseudoJet]( + [ + t + for t in alian.numpy_ptetaphi_to_tracks( + ev.data["track_pt"], + ev.data["track_eta"], + ev.data["track_phi"], + ev.data["track_sel"], + 0, + ) + if selector.selects(t) + ] + ) + + +def get_clusters(ev): + """Get all clusters from an event (no cluster selections applied).""" + return alian.numpy_energyetaphi_to_clusters( + ev.data["cluster_energy"], + ev.data["cluster_eta"], + ev.data["cluster_phi"], + ev.data["cluster_m02"], + ev.data["cluster_m20"], + ev.data["cluster_ncells"], + ev.data["cluster_time"], + ev.data["cluster_exoticity"], + ev.data["cluster_dbc"], + ev.data["cluster_nlm"], + ev.data["cluster_defn"], + ev.data["cluster_matched_track_n"], + ev.data["cluster_matched_track_delta_eta"], + ev.data["cluster_matched_track_delta_phi"], + ev.data["cluster_matched_track_p"], + ev.data["cluster_matched_track_pt"], + ev.data["cluster_matched_track_sel"], + 0, + ) + + +def get_selected_clusters(ev, selector): + """Get selected clusters from an event (cluster selections applied).""" + return [ + c + for c in alian.numpy_energyetaphi_to_clusters( + ev.data["cluster_energy"], + ev.data["cluster_eta"], + ev.data["cluster_phi"], + ev.data["cluster_m02"], + ev.data["cluster_m20"], + ev.data["cluster_ncells"], + ev.data["cluster_time"], + ev.data["cluster_exoticity"], + ev.data["cluster_dbc"], + ev.data["cluster_nlm"], + ev.data["cluster_defn"], + ev.data["cluster_matched_track_n"], + ev.data["cluster_matched_track_delta_eta"], + ev.data["cluster_matched_track_delta_phi"], + ev.data["cluster_matched_track_p"], + ev.data["cluster_matched_track_pt"], + ev.data["cluster_matched_track_sel"], + 0, + ) + if selector.selects(c) + ] diff --git a/alian/analysis/base/jet_finder.py b/alian/analysis/base/jet_finder.py new file mode 100644 index 0000000..093d253 --- /dev/null +++ b/alian/analysis/base/jet_finder.py @@ -0,0 +1,104 @@ +from functools import singledispatchmethod +from pathlib import Path + +import heppyy + +from .logs import set_up_logger +from .utils import read_yaml + +alian = heppyy.load_cppyy("alian") +fj = heppyy.load_cppyy('fastjet') + +class JetFinder: + """Helper class for jet finding. + + This class stores jet finding parameters, initialized via YAML, and returns jets with + `find_jets()` + + Attributes: + R (str): The jet resolution / radius parameter. + alg (fastjet.JetDefinition): The jet definition. + pT_min (int): The minimum jet transverse momentum in GeV. + """ + + _defaults = { + 'R': 0.4, + 'alg': fj.antikt_algorithm, + 'pT_min': 10, + } + _defaults["eta_max"] = 0.9 - _defaults["R"] + + alg_registry = { + fj.antikt_algorithm: ['antikt', 'anti-kt', 'antikT', 'anti-kT', 'anti', 'ak', -1], + fj.kt_algorithm: ['kt', 'kT', 'k', 1], + fj.cambridge_algorithm: ['ca', 'CA', 'cambridge' 'cambridge-aachen', 'cambridge/aachen', 0], + } + alg_map = { # mapping from string to fj.JetAlgorithm + key: value + for value, keys in alg_registry.items() + for key in keys + } + + def __init__(self, alg = fj.antikt_algorithm, R = 0.4, eta_max = None, pT_min = 10): + self.logger = set_up_logger(__name__) + self.R = R + if eta_max is None: + self.eta_max = 0.9 - self.R + else: + self.eta_max = eta_max + self.alg = self._parse_jet_alg(alg) + self.jet_def = fj.JetDefinition(self.alg, self.R) + self.pT_min = pT_min + self.jet_selector = fj.SelectorAbsEtaMax(self.eta_max) * fj.SelectorPtMin(pT_min) + fj.ClusterSequence.print_banner() + # TODO: jet background subtraction + # self.area_def = fj.AreaDefinition(fj.active_area, fj.GhostedAreaSpec(self.eta_max + self.jet_R)) + # self.bg_estimator = fj.GridMedianBackgroundEstimator(self.bg_y_max, self.bg_grid_spacing) + + @singledispatchmethod + @classmethod + def load(cls, file, *args, **kwargs): + raise NotImplementedError(f'Cannot configure jet finder from type {type(file)}.') + @load.register(dict) + @classmethod + def _load(cls, cfg): + if "jet_finder" not in cfg: + raise KeyError("The JetFinder must be configured in a 'jet_finder' block in the YAML configuration!") + if cfg['jet_finder'] is None: + cfg["jet_finder"] = {} + + options = {**cls._defaults, **cfg["jet_finder"]} + + return cls(**options) + @load.register(str) + @load.register(Path) + @classmethod + def _load(cls, file): + cfg = read_yaml(file) + return cls.load(cfg) + + def dump(self): + """Dump all jet finding parameters.""" + cfg = "\n".join([f"\t{param}: {repr(getattr(self, param))}" for param in self._defaults]) + self.logger.info(f"JetFinder configuration:\n{cfg}", stacklevel = 2) + + def _parse_jet_alg(self, alg: str): + if isinstance(alg, fj.JetAlgorithm): + return alg + elif isinstance(alg, str): + try: + return self.alg_map[alg] + except KeyError: + valid_algs = sum(self.alg_registry.values(), []) + self.logger.error(f"Did not recognize jet algorithm '{alg}', defaulting to anti-kT. Valid options: {valid_algs}") + return fj.antikt_algorithm + else: + valid_algs = sum(self.alg_registry.values(), []) + self.logger.error(f"Cannot pass type {type(alg)} as jet algorithm, defaulting to anti-kT. Valid options: {valid_algs}") + return fj.antikt_algorithm + + def find_jets(self, tracks, m = 0.13957, index_offset = 0): + # NOTE: ClusterSequence must be attached somehow to self to keep it in scope + self.cs = fj.ClusterSequence(tracks, self.jet_def) + jets = fj.sorted_by_pt(self.jet_selector(self.cs.inclusive_jets())) + return jets diff --git a/alian/analysis/base/logs.py b/alian/analysis/base/logs.py new file mode 100644 index 0000000..cb26263 --- /dev/null +++ b/alian/analysis/base/logs.py @@ -0,0 +1,42 @@ +import logging + +from .utils import is_slurm + + +class ColoredFormatter(logging.Formatter): + COLORS = { + 'DEBUG': '\033[34m', + 'INFO': '\033[32m', + 'WARNING': '\033[33m', + 'ERROR': '\033[31m', + 'CRITICAL': '\033[35m' + } + RESET = '\033[0m' + + def format(self, record): + color = self.COLORS.get(record.levelname, '') + if color: + # Color the entire line + formatted_msg = super().format(record) + return f"{color}{formatted_msg}{self.RESET}" + return super().format(record) + +def set_up_logger(name, + handler_level = logging.INFO, + logger_level = logging.DEBUG, + log_fmt = '%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(funcName)s - %(message)s', + date_fmt = '%Y-%m-%d %H:%M:%S' + ): + logger = logging.getLogger(name) + handler = logging.StreamHandler() + + if is_slurm(): + formatter = logging.Formatter(log_fmt, datefmt = date_fmt) + else: + formatter = ColoredFormatter(log_fmt, datefmt = date_fmt) + + handler.setFormatter(formatter) + handler.setLevel(handler_level) + logger.addHandler(handler) + logger.setLevel(logger_level) + return logger \ No newline at end of file diff --git a/alian/analysis/base/output.py b/alian/analysis/base/output.py new file mode 100644 index 0000000..a037d59 --- /dev/null +++ b/alian/analysis/base/output.py @@ -0,0 +1,128 @@ +from functools import singledispatchmethod +from pathlib import Path + +import numpy as np +import ROOT as r + +from .logs import set_up_logger +from .utils import linbins, logbins, read_yaml + +r.TH1.AddDirectory(False) +r.TH1.SetDefaultSumw2() +r.TH2.SetDefaultSumw2() + +registry = { + r.TH1F: [1, '1', '1F', '1f'], + r.TH2F: [2, '2', '2F', '2f'], + r.TH3F: [3, '3', '3F', '3f'], + r.TH1D: ['1D', '1d'], + r.TH2D: ['2D', '2d'], + r.TH3D: ['3D', '3d'], + r.TH1I: ['1I', '1i'], + r.TH2I: ['2I', '2i'], + r.TH3I: ['3I', '3i'], +} + +# reverse registry mapping to map key to specific ROOT class +HISTOGRAM_REGISTRY = { + key: value + for value, keys in registry.items() + for key in keys +} + +class Output: + def __init__(self, bins, histograms, branches, trees): + self.logger = set_up_logger(__name__) + self.logger.info("Initializing histograms...", stacklevel = 2) + self._init_bins(bins) + self._init_histograms(histograms) + self.logger.info("Histograms initialized.", stacklevel = 2) + self.logger.info("Initializing trees...", stacklevel = 2) + self._init_branches(branches) + self._init_trees(trees) + self.logger.info("Trees initialized.", stacklevel = 2) + + @singledispatchmethod + @classmethod + def load(cls, file): + raise NotImplementedError(f'Cannot configure output from type {type(file)}.') + @load.register(dict) + @classmethod + def _load(cls, cfg): + if "output" not in cfg: + raise KeyError("Output must be configured in a 'output' block in the YAML configuration!") + if cfg["output"] is None: + raise KeyError("The 'output' block in the YAML configuration cannot be empty!") + + cfg_output = {} + fields = ["bins", "histograms", "branches", "trees"] + for field in fields: + if field not in cfg["output"] or cfg["output"][field] is None: + cfg_output[field] = {} + else: + cfg_output[field] = cfg["output"][field] + + return cls(**cfg_output) + @load.register(str) + @load.register(Path) + @classmethod + def _load(cls, file): + cfg = read_yaml(file) + return cls.load(cfg) + + def _init_bins(self, bins_cfg): + self._bins = {} + self._nbins = {} + for name, arglist in bins_cfg.items(): + self._bins[name] = self._calculate_bins(*arglist) + self._nbins[name] = len(self._bins[name]) - 1 + + def _calculate_bins(self, binning, *bin_info): + match binning: + case 'logarithmic' | 'logarithm' | 'log' | 'lg': + binmin, binmax, nbins = bin_info + return logbins(binmin, binmax, nbins) + case 'linear' | 'line' | 'lin' | 'ln': + binmin, binmax, nbins = bin_info + return linbins(binmin, binmax, nbins) + case 'custom' | 'cst' | 'c': + return np.array(bin_info[0], dtype = float) + case _: + raise ValueError(f"Binning type {binning} invalid.") + + def _init_histograms(self, hists_cfg): + self.hists = {} + names = [] + # for now, doesn't support nested structures, but could in the future + for tag, cfg in hists_cfg.items(): + htype, name, title, *bin_names = cfg + self._validate_bin_names(bin_names) + # interlace number of bins and bin arrays for each axis + binnings = [val for name in bin_names for val in (self._nbins[name], self._bins[name])] + root_hist_cls = HISTOGRAM_REGISTRY[htype] + self.hists[tag] = root_hist_cls(name, title, *binnings) + names.append(name) + duplicates = {name: names.count(name) for name in names if names.count(name) > 1} + if duplicates: + self.logger.error(f"Multiple ROOT histograms defined with the same name, they will overwrite each other: {list(duplicates.keys())}") + + def _validate_bin_names(self, bin_names): + """Check bin names from Histogram configs are defined.""" + invalid_names = [name for name in bin_names if name not in self._bins] + if invalid_names: + raise KeyError(f"The following binnings are undefined: {', '.join(invalid_names)}") + + def _init_branches(self, branches): + # TODO: implement TTrees as output objects + pass + + def _init_trees(self, trees): + # TODO: implement TTrees as output objects + self.trees = {} + + def save(self, tfile): + """Write output to an *already-opened* TFile.""" + for h in self.hists.values(): + tfile.WriteTObject(h) + for t in self.trees.values(): + tfile.WriteTObject(t) diff --git a/alian/analysis/base/selection.py b/alian/analysis/base/selection.py new file mode 100644 index 0000000..494a5a9 --- /dev/null +++ b/alian/analysis/base/selection.py @@ -0,0 +1,143 @@ +from enum import IntFlag, auto + +from numpy import number + + +class Selection(IntFlag): + @classmethod + def max_value(cls): + return 2 ** len(cls) - 1 + + def __invert__(self): + return self.__class__(self.__class__.max_value() - int(self)) + + def __format__(self, format_spec): + return str(self) + + @classmethod + def create(cls, value): + # can't put this inside _missing_ since _missing_ must return a Selection object + if value is None: + return None + return cls(value) + + @classmethod + def _missing_(cls, value): + if isinstance(value, str): + if value == "any": + return ~cls(0) + else: + res = cls(0) + for sel in value.split(','): + res |= cls[sel.strip()] + return res + elif isinstance(value, number): + return cls(value.item()) + elif isinstance(value, int) and value >= 0: + if value <= cls.max_value(): + return super()._missing_(value) + else: + raise ValueError(f"{cls.__name__}: {value} is out of range ({cls.max_value()}).") + elif isinstance(value, cls): + return value + else: + raise ValueError(f"{cls.__name__}: {value} is invalid. Valid selections: {cls._member_names_}") + +# selections from O2Physics::daily-20260302-0000 +class EventSel(Selection): + sel8 = auto() + sel7 = auto() + selKINT7 = auto() + selTVX = auto() + selNoTimeFrameBorder = auto() + selNoITSROFrameBorder = auto() + selNoSameBunchPileup = auto() + selIsGoodZvtxFT0vsPV = auto() + selNoCollInTimeRangeStandard = auto() + selNoCollInRofStandard = auto() + selUpcSingleGapA = auto() + selUpcSingleGapC = auto() + selUpcDoubleGap = auto() + sel8Full = sel8 | selNoSameBunchPileup + sel8FullPbPb = sel8 | selNoCollInTimeRangeStandard | selNoCollInRofStandard + selUnanchoredMC = selTVX + selMC = selTVX | selNoTimeFrameBorder + selMCFull = selTVX | selNoTimeFrameBorder | selNoSameBunchPileup + selMCFullPbPb = selTVX | selNoCollInTimeRangeStandard | selNoCollInRofStandard + sel7KINT7 = sel7 | selKINT7 + +# selections from O2Physics::daily-20260302-0000 +class TrigSel(Selection): + noTrigSel = auto() + JetChLowPt = auto() + JetChHighPt = auto() + TrackLowPt = auto() + TrackHighPt = auto() + JetD0ChLowPt = auto() + JetD0ChHighPt = auto() + JetLcChLowPt = auto() + JetLcChHighPt = auto() + EMCALReadout = auto() + JetFullHighPt = auto() + JetFullLowPt = auto() + JetNeutralHighPt = auto() + JetNeutralLowPt = auto() + GammaVeryHighPtEMCAL = auto() + GammaVeryHighPtDCAL = auto() + GammaHighPtEMCAL = auto() + GammaHighPtDCAL = auto() + GammaLowPtEMCAL = auto() + GammaLowPtDCAL = auto() + GammaVeryLowPtEMCAL = auto() + GammaVeryLowPtDCAL = auto() + +# selections from O2Physics::daily-20260302-0000 +class RCTSel(Selection): + CPVBad = auto() + EMCBad = auto() + EMCLimAccMCRepr = auto() + FDDBad = auto() + FT0Bad = auto() + FV0Bad = auto() + HMPBad = auto() + ITSBad = auto() + ITSLimAccMCRepr = auto() + MCHBad = auto() + MCHLimAccMCRepr = auto() + MFTBad = auto() + MFTLimAccMCRepr = auto() + MIDBad = auto() + MIDLimAccMCRepr = auto() + PHSBad = auto() + TOFBad = auto() + TOFLimAccMCRepr = auto() + TPCBadTracking = auto() + TPCBadPID = auto() + TPCLimAccMCRepr = auto() + TRDBad = auto() + ZDCBad = auto() + NRCTSelectionFlags = auto() + Dummy24 = auto() + Dummy25 = auto() + Dummy26 = auto() + Dummy27 = auto() + Dummy28 = auto() + Dummy29 = auto() + Dummy30 = auto() + CcdbObjectLoaded = auto() + CBT = FT0Bad | ITSBad | TPCBadTracking | TPCBadPID + CBT_LAMCRBad = CBT | ITSLimAccMCRepr | TPCLimAccMCRepr + CBT_hadronPID = CBT | TOFBad + CBT_hadronPID_LAMCRBad = CBT_hadronPID | ITSLimAccMCRepr | TPCLimAccMCRepr | TOFLimAccMCRepr + CBT_calo = CBT | EMCBad + CBT_calo_LAMCRBad = CBT_calo | ITSLimAccMCRepr | TPCLimAccMCRepr | EMCLimAccMCRepr + +# selections from O2Physics::daily-20260302-0000 +class TrackSel(Selection): + trackSign = auto() # 1 = positive, 0 = negative + globalTrack = auto() + qualityTrack = auto() + qualityTrackWDCA = auto() + hybridTrack = auto() + notBadMcTrack = auto() + embeddedTrack = auto() diff --git a/alian/analysis/base/selector.py b/alian/analysis/base/selector.py new file mode 100644 index 0000000..dd90f21 --- /dev/null +++ b/alian/analysis/base/selector.py @@ -0,0 +1,803 @@ +from functools import singledispatchmethod +from pathlib import Path + +import numpy as np + +import heppyy + +from .logs import set_up_logger +from .selection import EventSel, RCTSel, TrackSel, TrigSel +from .utils import read_yaml + +alian = heppyy.load_cppyy("alian") + +class Selector: + _types = {} + _name = "" + _dummy_mask = "" + def __init__(self, **selections): + self.logger = set_up_logger(type(self).__name__) + self._masks = {sel: None for sel in self._types.keys()} + self.reset() + self.update(**selections) + + @singledispatchmethod + @classmethod + def load(cls, file): + raise NotImplementedError(f'Cannot configure selector from type {type(file)}.') + + @load.register(dict) + @classmethod + def _load(cls, cfg): + if "selections" not in cfg: + raise KeyError("Selectors must be defined in a 'selections' block in the YAML configuration!") + if cfg['selections'] is None: + raise KeyError("The 'selections' block in the YAML configuration cannot be empty!") + + if cls._name in cfg["selections"]: + if cfg["selections"][cls._name] is None: + return cls() + else: + return cls(**cfg["selections"][cls._name]) + else: + return None + + @load.register(str) + @load.register(Path) + @classmethod + def _load(cls, file): + cfg = read_yaml(file) + return cls.load(cfg) + + def reset(self): + """Reset all selections.""" + for selection in self._types.keys(): + setattr(self, selection, None) + + def update(self, **selections): + """Set given selections, leaving all other unspecified selections as-is.""" + valid_sels = self._validate(**selections) + for name, val in valid_sels.items(): + setattr(self, name, val) + + def _is_ignore_selection(self, value): + """Check if the selection (written as `value` in the YAML file) should be ignored.""" + return value in [None, "None", "none"] + + @property + def rdf_mask(self): + mask = " && ".join([mask for mask in self._masks.values() if mask is not None]) + if not mask: # if no selections active, pass dummy mask + return self._dummy_mask + return mask + def apply_to(self, df): + """Apply cuts to the event RDataFrame `df`.""" + raise NotImplementedError + + def _validate(self, **selections): + """Validate the passed selections, and raise warnings if invalid selections were given.""" + # validate selection names + unrecognized_selections = [sel for sel in selections.keys() if sel not in self._types.keys()] + if unrecognized_selections: + raise KeyError(f"Unrecognized selections were given: {', '.join(unrecognized_selections)}") + # validate selection types + recognized_selections = {sel: val for sel, val in selections.items() if sel in self._types.keys()} + # check that each selection value is either the right type, or if is not, then it is an allowed ignore keyword + wrong_types = [f"{sel}: {val} (type {type(val)}), must be {self._types[sel]}" + for sel, val in recognized_selections.items() + if not isinstance(val, self._types[sel]) and not self._is_ignore_selection(val) ] + if wrong_types: + wrong_types = "\n".join(wrong_types) + raise TypeError(f"Incorrect types for the following selections were given. Use either the given types or 'none':\n{wrong_types}") + return recognized_selections + + def _canonicalize(self, sel, val): + if val in [None, "None", "none"]: + return None + if isinstance(val, self._types[sel]): + return val + raise TypeError(f"Incorrect type for {type(self).__name__}: type for {sel} must be {self._types[sel]}, not {type(val)}") + + def _pass_ignore(self, *args, **kwargs): + return True + def _fail_ignore(self, *args, **kwargs): + return False + + def selects(self, obj, verbose = False): + raise NotImplementedError + + def dump(self): + """Dump all selections.""" + cfg = "\n".join([f"\t{sel}: {getattr(self, sel)}" for sel in self._types.keys()]) + self.logger.info(f"{type(self).__name__} configuration:\n{cfg}", stacklevel = 2) + +class EventSelector(Selector): + """An event selector, configured via YAML. + + The EventSelector contains all event-level selections. To ignore a particular selection, + set the relevant selection to None. In particular, to analyze MB (i.e. unskimmed data), + `trig_sel` must be set to `None` to avoid losing all events. Multiple selections for the + `event_sel`, `trig_sel`, and `rct` can be combined with commas in the config. Be careful + with "dummy" centrality, multiplicity, and occupancy values! For example, multiplicities + are sometimes filled with dummy values of -999, so a "safe" minimum multiplicity cut of 0 + might accidentally remove events you don't want to remove. + + Attributes: + event_sel (EventSel): event selection to be applied. Events must pass ALL given selections. + trig_sel (TrigSel): trigger selection to be applied. Events must satisfy AT LEAST ONE selection. + rct (RCTSel): RCT selection to be applied. Events must not be marked Bad for ANY of the relevant detectors. + cent_min (int/float): minimum centrality. + cent_max (int/float): maximum centrality. + mult_min (int/float): minimum multiplicity. + mult_max (int/float): maximum multiplicity. + occupancy_min (int/float): minimum occupancy. + occupancy_max (int/float): maximum occupancy. + """ + + _types = { + "event_sel": (EventSel, str, int), + "trig_sel": (TrigSel, str, int), + "rct": (RCTSel, str, int), + "cent_min": (int, float), + "cent_max": (int, float), + "mult_min": (int, float), + "mult_max": (int, float), + "occupancy_min": (int, float), + "occupancy_max": (int, float), + } + _name = "event" + _dummy_mask = "event_sel >= 0" + + # special handling of no-selects to allow for ignoring the event or trigger selection altogether + @property + def event_sel(self): + return self._event_sel + @event_sel.setter + def event_sel(self, event_sel): + self._event_sel = EventSel.create(self._canonicalize('event_sel', event_sel)) + if self._event_sel is None: + self.pass_event_sel = self._pass_ignore + self._masks['event_sel'] = None + else: + self.pass_event_sel = self._pass_event_sel + self._masks['event_sel'] = f"(({int(self._event_sel)} & event_sel) == {int(self._event_sel)})" + @property + def trig_sel(self): + return self._trig_sel + @trig_sel.setter + def trig_sel(self, trig_sel): + self._trig_sel = TrigSel.create(self._canonicalize('trig_sel', trig_sel)) + if self._trig_sel is None: + self.pass_trig_sel = self._pass_ignore + self._masks['trig_sel'] = None + else: + self.pass_trig_sel = self._pass_trig_sel + self._masks['trig_sel'] = f"({int(self._trig_sel)} & trig_sel)" + @property + def rct(self): + return self._rct + @rct.setter + def rct(self, rct): + # NOTE: CCDB check and ZDC have to be included in RCT manually (combine with comma in config) + self._rct = RCTSel.create(self._canonicalize('rct', rct)) + if self._rct is None: + self.pass_rct = self._pass_ignore + self._masks['rct'] = None + else: + self.pass_rct = self._pass_rct + self._masks['rct'] = f"!({int(self._rct)} & rct)" + @property + def cent_min(self): + return self._cent_min + @cent_min.setter + def cent_min(self, cent_min): + self._cent_min = self._canonicalize('cent_min', cent_min) + if self._cent_min is None: + self.pass_cent_min = self._pass_ignore + self._masks['cent_min'] = None + else: + self.pass_cent_min = self._pass_cent_min + self._masks['cent_min'] = f"(centrality >= {self._cent_min})" + @property + def cent_max(self): + return self._cent_max + @cent_max.setter + def cent_max(self, cent_max): + self._cent_max = self._canonicalize('cent_max', cent_max) + if self._cent_max is None: + self.pass_cent_max = self._pass_ignore + self._masks['cent_max'] = None + else: + self.pass_cent_max = self._pass_cent_max + self._masks['cent_max'] = f"(centrality <= {self._cent_max})" + @property + def mult_min(self): + return self._mult_min + @mult_min.setter + def mult_min(self, mult_min): + self._mult_min = self._canonicalize('mult_min', mult_min) + if self._mult_min is None: + self.pass_mult_min = self._pass_ignore + self._masks['mult_min'] = None + else: + self.pass_mult_min = self._pass_mult_min + self._masks['mult_min'] = f"(multiplicity >= {self._mult_min})" + @property + def mult_max(self): + return self._mult_max + @mult_max.setter + def mult_max(self, mult_max): + self._mult_max = self._canonicalize('mult_max', mult_max) + if self._mult_max is None: + self.pass_mult_max = self._pass_ignore + self._masks['mult_max'] = None + else: + self.pass_mult_max = self._pass_mult_max + self._masks['mult_max'] = f"(multiplicity <= {self._mult_max})" + @property + def occupancy_min(self): + return self._occupancy_min + @occupancy_min.setter + def occupancy_min(self, occupancy_min): + self._occupancy_min = self._canonicalize('occupancy_min', occupancy_min) + if self._occupancy_min is None: + self.pass_occupancy_min = self._pass_ignore + self._masks['occupancy_min'] = None + else: + self.pass_occupancy_min = self._pass_occupancy_min + self._masks['occupancy_min'] = f"(occupancy >= {self._occupancy_min})" + @property + def occupancy_max(self): + return self._occupancy_max + @occupancy_max.setter + def occupancy_max(self, occupancy_max): + self._occupancy_max = self._canonicalize('occupancy_max', occupancy_max) + if self._occupancy_max is None: + self.pass_occupancy_max = self._pass_ignore + self._masks['occupancy_max'] = None + else: + self.pass_occupancy_max = self._pass_occupancy_max + self._masks['occupancy_max'] = f"(occupancy <= {self._occupancy_max})" + + def apply_to(self, df, label = "selected"): + """Apply event selections to the event RDataFrame.""" + self.logger.info(f"Applying event mask: {(mask := self.rdf_mask)}") + return df.Filter(mask) + + def _pass_event_sel(self, event): + """Check that the event satisfies ALL requested event selection bits.""" + return self._event_sel in event.event_sel + def _pass_trig_sel(self, event): + """Check that the event satisfies ANY requested trigger bit.""" + return bool(self._trig_sel & event.trig_sel) + def _pass_rct(self, event): + """Check that the event is not Bad for ANY relevant detectors.""" + return not bool(self._rct & event.rct) + def _pass_cent_min(self, event): + return event.centrality >= self._cent_min + def _pass_cent_max(self, event): + return event.centrality <= self._cent_max + def _pass_mult_min(self, event): + return event.multiplicity >= self._mult_min + def _pass_mult_max(self, event): + return event.multiplicity <= self._mult_max + def _pass_occupancy_min(self, event): + return event.occupancy >= self._occupancy_min + def _pass_occupancy_max(self, event): + return event.occupancy <= self._occupancy_max + def selects(self, event): + """Determine if event passes all event-level selections: event selection, trigger selection, RCT, centrality, multiplicity, occupancy.""" + return self.pass_event_sel(event) and self.pass_trig_sel(event) and self.pass_rct(event) \ + and self.pass_cent_min(event) and self.pass_cent_max(event) \ + and self.pass_mult_min(event) and self.pass_mult_max(event) \ + and self.pass_occupancy_min(event) and self.pass_occupancy_max(event) + +class TrackSelector(Selector): + """An track selector, configured via YAML. + + The TrackSelector contains all track-level selections. To ignore a particular selection, + set the relevant selection to None. Multiple `track_sel` can be combined with + commas in the config. + + Attributes: + pt_min (int/float): minimum track transverse momentum. + track_sel (TrackSel): track selection to be applied. Tracks must satisfy AT LEAST ONE selection. + """ + + _types = { + "pt_min": (float, int), + "track_sel": (TrackSel, str, int) + } + _name = "track" + _dummy_mask = "track_pt > -1" + + @property + def pt_min(self): + return self._pt_min + @pt_min.setter + def pt_min(self, pt_min): + self._pt_min = self._canonicalize('pt_min', pt_min) + if self._pt_min is None: + self.pass_pt_min = self._pass_ignore + self._masks['pt_min'] = None + else: + self.pass_pt_min = self._pass_pt_min + self._masks['pt_min'] = f"(track_pt >= {self._pt_min})" + @property + def track_sel(self): + return self._track_sel + @track_sel.setter + def track_sel(self, track_sel): + self._track_sel = TrackSel.create(self._canonicalize('track_sel', track_sel)) + if self._track_sel is None: + self.pass_track_sel = self._pass_ignore + self._masks['track_sel'] = None + else: + self.pass_track_sel = self._pass_track_sel + self._masks['track_sel'] = f"(track_sel & {int(self._track_sel)})" + + def _pass_pt_min(self, track): + """Check that the track has the minimum required pT""" + return track.pt() >= self._pt_min + def _pass_track_sel(self, track): + """Check that the track satisfies ANY requested track selection bit.""" + return bool(self._track_sel & track.user_info[alian.TrackInfo]().track_sel()) + + def apply_to(self, df, label = "selected"): + """Apply selections to RDataFrame df.""" + self.logger.info(f"Applying track mask: {(mask := self.rdf_mask)}") + ret = df.Define("track_mask", mask) + # create new columns based on the mask + track_cols = [col for col in df.GetColumnNames() if col.startswith("track_")] + name_map = {col: f"{col}_{label}" for col in track_cols} + for colname, masked_colname in name_map.items(): + ret = ret.Define(masked_colname, f"{colname}[track_mask]") + return ret + + def selects(self, track): + """Determine if track passes the track selection.""" + return self.pass_track_sel(track) and self.pass_pt_min(track) + + def __call__(self, track): + return self.selects(track) + +class ClusterSelector(Selector): + """An cluster selector, configured via YAML. + + The ClusterSelector contains all cluster-level selections. To ignore a particular selection, + set the relevant selection to None. Note that, due to the BerkeleyTree structure, cuts related + to matched tracks cannot be applied to RDataFrames. + + Attributes: + energy_min (int/float): minimum cluster energy. + pt_min (int/float): minimum cluster transverse momentum. + m02_min (int/float): minimum cluster M02. + m02_max (int/float): maximum cluster M02. + ncells_min (int/float): minimum number of cells in the cluster. Cluster must have ncells greater than OR EQUAL TO this value. + time_min (int/float): minimum cluster time. + time_max (int/float): maximum cluster time. + exoticity (bool): exoticity selection. To select non-exotic clusters, set to True. To select exotic clusters, set to False. To ignore exoticity entirely, set to None. + dbc (int/float): minimum distance to the nearest bad EMC channel. Note that the JE derived data currently fills this with a dummy value of 1024. + nlm (int): minimum number of local maxima. Cluster must have an NLM greater than OR EQUAL TO this value. + defn (int): filter on a cluster definition. Set to 0 for V1, 10 for V3. Multiple definitions cannot be filtered on simultaneously; if needed, set to None and check manually on analysis level. + edge (bool): apply the SM edge cut. + delta_eta (int/float): maximum difference in eta (at EMC) between cluster and track to match. + delta_phi (int/float): maximum difference in phi (at EMC) between cluster and track to match. + ep_max (int/float): maximum ratio of cluster energy to track momentum to match. + """ + + w = 0.0143 # cell granularity in eta/phi + sm_full = (81.2 * np.pi / 180, np.pi) + sm_23 = (261.2 * np.pi / 180, 318.8 * np.pi / 180) + sm_13e = (181.2 * np.pi / 180, 185.8 * np.pi / 180) + sm_13d = (321.2 * np.pi / 180, 325.8 * np.pi / 180) + + _types = { + # cluster cuts + "energy_min": (float, int), + "pt_min": (float, int), + "m02_min": (float, int), + "m02_max": (float, int), + "ncells_min": (float, int), + "time_min": (float, int), + "time_max": (float, int), + "exoticity": bool, + "dbc": (float, int), + "nlm": int, + "defn": int, + "edge": bool, + "delta_eta": (float, int), + "delta_phi": (float, int), + "ep_max": (float, int), + } + _name = "cluster" + _dummy_mask = "cluster_pt > -1" + + @property + def energy_min(self): + return self._energy_min + @energy_min.setter + def energy_min(self, energy_min): + self._energy_min = self._canonicalize('energy_min', energy_min) + if self._energy_min is None: + self.pass_energy_min = self._pass_ignore + self._masks['energy_min'] = None + else: + self.pass_energy_min = self._pass_energy_min + self._masks['energy_min'] = f"(cluster_energy >= {self._energy_min})" + @property + def pt_min(self): + return self._pt_min + @pt_min.setter + def pt_min(self, pt_min): + self._pt_min = self._canonicalize('pt_min', pt_min) + if self._pt_min is None: + self.pass_pt_min = self._pass_ignore + self._masks['pt_min'] = None + else: + self.pass_pt_min = self._pass_pt_min + self._masks['pt_min'] = f"(cluster_pt >= {self._pt_min})" + @property + def m02_min(self): + return self._m02_min + @m02_min.setter + def m02_min(self, m02_min): + self._m02_min = self._canonicalize('m02_min', m02_min) + if self._m02_min is None: + self.pass_m02_min = self._pass_ignore + self._masks['m02'] = None + else: + self.pass_m02_min = self._pass_m02_min + self._masks['m02'] = f"(cluster_m02 >= {self._m02_min})" + @property + def m02_max(self): + return self._m02_max + @m02_max.setter + def m02_max(self, m02_max): + self._m02_max = self._canonicalize('m02_max', m02_max) + if self._m02_max is None: + self.pass_m02_max = self._pass_ignore + self._masks['m02'] = None + else: + self.pass_m02_max = self._pass_m02_max + self._masks['m02'] = f"(cluster_m02 <= {self._m02_max})" + @property + def ncells_min(self): + return self._ncells_min + @ncells_min.setter + def ncells_min(self, ncells_min): + self._ncells_min = self._canonicalize('ncells_min', ncells_min) + if self._ncells_min is None: + self.pass_ncells_min = self._pass_ignore + self._masks['ncells'] = None + else: + self.pass_ncells_min = self._pass_ncells_min + self._masks['ncells'] = f"(cluster_ncells >= {self._ncells_min})" + @property + def time_min(self): + return self._time_min + @time_min.setter + def time_min(self, time_min): + self._time_min = self._canonicalize('time_min', time_min) + if self._time_min is None: + self.pass_time_min = self._pass_ignore + self._masks['time_min'] = None + else: + self.pass_time_min = self._pass_time_min + self._masks['time_min'] = f"(cluster_time >= {self._time_min})" + @property + def time_max(self): + return self._time_max + @time_max.setter + def time_max(self, time_max): + self._time_max = self._canonicalize('time_max', time_max) + if self._time_max is None: + self.pass_time_max = self._pass_ignore + self._masks['time_max'] = None + else: + self.pass_time_max = self._pass_time_max + self._masks['time_max'] = f"(cluster_time <= {self._time_max})" + @property + def exoticity(self): + return self._exoticity + @exoticity.setter + def exoticity(self, exoticity): + self._exoticity = self._canonicalize('exoticity', exoticity) + if self._exoticity is None: + self.pass_exotic = self._pass_ignore + self._masks['exoticity'] = None + elif self._exoticity is True: + self.pass_exotic = self._pass_exotic + self._masks['exoticity'] = "(!cluster_exoticity)" + elif self._exoticity is False: + self.logger.warning("The exoticity cut has been set to False, which will yield exotic clusters only. If you wanted to remove the exoticity check entirely, set this to 'none'.") + self.pass_exotic = self._fail_exotic + self._masks['exoticity'] = "(cluster_exoticity)" + else: + raise RuntimeError("Somehow the type checking was bypassed here...") + @property + def dbc(self): + return self._dbc + @dbc.setter + def dbc(self, dbc): + self._dbc = self._canonicalize('dbc', dbc) + if self._dbc is None: + self.pass_dbc = self._pass_ignore + self._masks['dbc'] = None + else: + self.pass_dbc = self._pass_dbc + self._masks['dbc'] = f"(cluster_dbc >= {self._dbc})" + @property + def nlm(self): + return self._nlm + @nlm.setter + def nlm(self, nlm): + self._nlm = self._canonicalize('nlm', nlm) + if self._nlm is None: + self.pass_nlm = self._pass_ignore + self._masks['nlm'] = None + else: + self.pass_nlm = self._pass_nlm + self._masks['nlm'] = f"(cluster_nlm <= {self._nlm})" + @property + def defn(self): + return self._defn + @defn.setter + def defn(self, defn): + self._defn = self._canonicalize('defn', defn) + if self._defn is None: + self.pass_defn = self._pass_ignore + self._masks['defn'] = None + else: + self.pass_defn = self._pass_defn + self._masks['defn'] = f"(cluster_defn == {self._defn})" + @property + def edge(self): + return self._edge + @edge.setter + def edge(self, edge): + self._edge = self._canonicalize('edge', edge) + if not self._edge: # either False or None + self.pass_edge = self._pass_ignore + self._masks['edge'] = None + else: + self.pass_edge = self._pass_edge + self._masks['edge'] = ("( (cluster_eta_abs < 0.67 && (" + f"(cluster_phi > {self.sm_full[0]} && cluster_phi < {self.sm_full[1]}) || " + f"(cluster_phi > {self.sm_13e[0]} && cluster_phi < {self.sm_13e[1]}) || " + f"(cluster_phi > {self.sm_13d[0]} && cluster_phi < {self.sm_13d[1]}) )) " + f"|| (cluster_eta_abs > 0.25 && cluster_eta_abs < 0.67 && cluster_phi > {self.sm_23[0]} && cluster_phi < {self.sm_23[1]}) )" + ) + @property + def delta_eta(self): + return self._delta_eta + @delta_eta.setter + def delta_eta(self, delta_eta): + self._delta_eta = self._canonicalize('delta_eta', delta_eta) + if self._delta_eta is None: + # NOTE: no way to use a mask to properly cut on matched track info + self.pass_delta_eta = self._fail_ignore + else: + self.pass_delta_eta = self._pass_delta_eta + @property + def delta_phi(self): + return self._delta_phi + @delta_phi.setter + def delta_phi(self, delta_phi): + self._delta_phi = self._canonicalize('delta_phi', delta_phi) + if self._delta_phi is None: + # NOTE: no way to use a mask to properly cut on matched track info + self.pass_delta_phi = self._fail_ignore + else: + self.pass_delta_phi = self._pass_delta_phi + @property + def ep_max(self): + return self._ep_max + @ep_max.setter + def ep_max(self, ep_max): + self._ep_max = self._canonicalize('ep_max', ep_max) + if self._ep_max is None: + # NOTE: no way to use a mask to properly cut on matched track info + self.pass_ep_max = self._fail_ignore + else: + self.pass_ep_max = self._pass_ep_max + + def apply_to(self, df, label = "selected"): + self.logger.warning("Cuts related to matched tracks cannot be properly applied to RDFs, so any cuts specified will be ignored.") + if 'cluster_pt' not in df.GetColumnNames(): + df_masked = df.Define("cluster_pt", "cluster_energy / cosh(cluster_eta)") + else: + df_masked = df + df_masked = df_masked.Define("cluster_eta_abs", "abs(cluster_eta)") + mask = self.rdf_mask + self.logger.info(f"Applying cluster mask: {(mask)}") + df_masked = df_masked.Define("cluster_mask", mask) + cluster_cols = [col for col in df_masked.GetColumnNames() if col.startswith("cluster_")] + name_map = {col: f"{col}_{label}" for col in cluster_cols} + for colname, masked_colname in name_map.items(): + df_masked = df_masked.Define(masked_colname, f"{colname}[cluster_mask]") + return df_masked + + def _pass_energy_min(self, cluster): + """Checks if the cluster has enough energy.""" + return cluster.energy() >= self._energy_min + def _pass_pt_min(self, cluster): + """Checks if the cluster has large enough pT.""" + return cluster.pt() >= self._pt_min + def _pass_m02_min(self, cluster): + return cluster.m02() >= self._m02_min + def _pass_m02_max(self, cluster): + return cluster.m02() <= self._m02_max + def _pass_ncells_min(self, cluster): + """Checks if the cluster has at least the minimum number of cells.""" + return cluster.ncells() >= self._ncells_min + def _pass_time_min(self, cluster): + return cluster.time() >= self._time_min + def _pass_time_max(self, cluster): + return cluster.time() <= self._time_max + # Special handling for the exoticity cut, which is a boolean. If exoticity is + # set to True, then the selector will select non-exotic clusters, and vice versa. + # If set to None, then the cluster exoticity is ignored during selection. + def _pass_exotic(self, cluster): + """Checks if the cluster passes the exoticity cut (i.e. it is non-exotic). Used if exoticity = True.""" + return not cluster.exoticity() + def _fail_exotic(self, cluster): + """Checks if the cluster does not pass the exoticity cut (i.e. it is exotic). Used if exoticity = False.""" + return cluster.exoticity() + def _pass_dbc(self, cluster): + """Checks if the cluster is far enough away from a bad EMC channel.""" + return cluster.dbc() >= self._dbc + def _pass_nlm(self, cluster): + """Checks if the cluster has few enough local maxima.""" + return cluster.nlm() <= self._nlm + def _pass_defn(self, cluster): + """Checks the cluster definition.""" + return cluster.defn() == self._defn + def _pass_edge(self, cluster): + # TODO: check if this cut is done right + """Checks if the cluster is far enough away from a SM edge.""" + abs_eta = abs(cluster.eta()) + phi = cluster.phi() + return (abs_eta < 0.67 and ( \ + (phi > self.sm_full[0] and phi < self.sm_full[1]) or \ + (phi > self.sm_13e[0] and phi < self.sm_13e[1]) or \ + (phi > self.sm_13d[0] and phi < self.sm_13d[1]) )) \ + or (abs_eta > 0.25 and abs_eta < 0.67 and phi > self.sm_23[0] and phi < self.sm_23[1]) + def pass_cluster_cuts(self, cluster): + return self.pass_energy_min(cluster) and self.pass_pt_min(cluster) \ + and self.pass_m02_min(cluster) and self.pass_m02_max(cluster) \ + and self.pass_ncells_min(cluster) and self.pass_time_min(cluster) \ + and self.pass_time_max(cluster) and self.pass_exotic(cluster) \ + and self.pass_dbc(cluster) and self.pass_nlm(cluster) \ + and self.pass_defn(cluster) and self.pass_edge(cluster) + + def _pass_delta_eta(self, cluster, idx): + """Checks if a cluster and the given matched track match in eta.""" + return abs(cluster.matchedTrackDeltaEta()[idx]) <= self._delta_eta + def _pass_delta_phi(self, cluster, idx): + """Checks if a cluster and the given matched track match in phi.""" + return abs(cluster.matchedTrackDeltaPhi()[idx]) <= self._delta_phi + def _pass_ep_max(self, cluster, idx): + """Checks if a cluster and track match in energy.""" + return cluster.energy() / cluster.matchedTrackP()[idx] <= self._ep_max + def is_not_in_sector_edge(self, cluster, idx): + # TODO: implement TPC sector edge cut from LF + return True + def is_matched_to_track(self, cluster, idx): + """Checks if a cluster and the given matched track match.""" + return self.pass_delta_eta(cluster, idx) and self.pass_delta_phi(cluster, idx) \ + and self.pass_ep_max(cluster, idx) and self.is_not_in_sector_edge(cluster, idx) + def is_matched(self, cluster): + return any(self.is_matched_to_track(cluster, idx) for idx in range(cluster.matchedTrackN())) + def selects(self, cluster): + """Determine if cluster is a photon candidate and is not matched to any track.""" + return self.pass_cluster_cuts(cluster) and not self.is_matched(cluster) + def __call__(self, cluster): + return self.selects(cluster) + +class IsolationSelector(Selector): + """An cluster selector for isolation, configured via YAML. + + The IsolationSelector contains the isolation criteria for EMCal clusters. To ignore the isolation pT + altogether, set to None or leave unspecified in the config. Unlike other selections, `iso_cone_R` and + `track_eta_max` will be set to default values of 0.4 and 0.9 if None or unspecified in the config. + + Attributes: + iso_cone_R (float/int): isolation cone radius. If None or unspecified, this is set to 0.4. + iso_pt_max (float/int): maximum isolation transverse momentum to be considered isolated. + track_eta_max (float/int): maximum pseudorapidity for tracks. Only relevant for the geometric acceptance correction. If None or unspecified, this is set to 0.9. + """ + + _types = { + # isolation cuts + "iso_cone_R": (float, int), # isocone radius + "iso_pt_max": (float, int), # max isocone pT (GeV) + "track_eta_max": (float, int), # maximum track pseudorapidity + } + _name = "isolation" + + @property + def iso_pt_max(self): + return self._iso_pt_max + @iso_pt_max.setter + def iso_pt_max(self, iso_pt_max): + self._iso_pt_max = self._canonicalize('iso_pt_max', iso_pt_max) + # IsolationSelector only checks the iso pT, so we define selects() directly here + if self._iso_pt_max is None: + self.selects = self._pass_ignore + else: + self.selects = self._pass_iso_pt_max + @property + def iso_cone_R(self): + return self._iso_cone_R + @iso_cone_R.setter + def iso_cone_R(self, iso_cone_R): + self._iso_cone_R = self._canonicalize('iso_cone_R', iso_cone_R) + if self._iso_cone_R is None: # None value not allowed in this case, so set to 0.4 + self.logger.warning("Isolation cone radius unspecified; setting to 0.4.") + self._iso_cone_R = 0.4 + @property + def track_eta_max(self): + return self._track_eta_max + @track_eta_max.setter + def track_eta_max(self, track_eta_max): + self._track_eta_max = self._canonicalize('track_eta_max', track_eta_max) + if self._track_eta_max is None: # None value not allowed in this case, so set to 0.9 + self.logger.warning("Maximum track eta unspecified; setting to 0.9.") + self._track_eta_max = 0.9 + + def _pass_iso_pt_max(self, cluster, tracks, verbose = False): + """Determine if cluster is isolated.""" + iso_cone_pt = np.sum([t.pt() for t in tracks if cluster.dR(t) < self._iso_cone_R]) + facc = self._facc(cluster.eta()) + iso_cone_pt_eff = iso_cone_pt * facc + if not verbose: + return iso_cone_pt_eff < self._iso_pt_max + else: + return iso_cone_pt_eff < self._iso_pt_max, iso_cone_pt_eff, facc, iso_cone_pt + + def apply_to(self, df): + raise NotImplementedError("Isolation criteria can't be applied to RDataFrames!") + + def _facc(self, eta): + """Calculate the geometric acceptance correction for clusters whose isolation cones extend outside the central barrel acceptance.""" + eta = abs(eta) + if eta < (self._track_eta_max - self._iso_cone_R): + return 1.0 + else: + return np.pi * self._iso_cone_R * self._iso_cone_R / ((self._track_eta_max - eta) * np.sqrt(self._iso_cone_R**2 - (self._track_eta_max - eta)**2 ) + np.pi * self._iso_cone_R**2 * (1 - 1 / np.pi * np.arccos((self._track_eta_max - eta) / self._iso_cone_R))) + +class AnalysisSelector: + def __init__(self, file): + self.logger = set_up_logger(type(self).__name__) + self.event = EventSelector.load(file) + self.track = TrackSelector.load(file) + self.cluster = ClusterSelector.load(file) + self.isolation = IsolationSelector.load(file) + + @classmethod + def load(cls, file): + return cls(file) + + def dump(self): + self.logger.info("Selector configurations:") + for selector in self.active: + getattr(self, selector).dump() + self.logger.info(f"Inactive selectors: {self.inactive}") + + @property + def active(self): + """Return a list of active selectors.""" + selectors = ["event", "track", "cluster", "isolation"] + return [selector for selector in selectors if self.is_active(selector)] + + @property + def inactive(self): + """Return a list of inactive selectors.""" + selectors = ["event", "track", "cluster", "isolation"] + return [selector for selector in selectors if self.is_inactive(selector)] + + def is_active(self, selector: str): + return getattr(self, selector) is not None + def is_inactive(self, selector: str): + return getattr(self, selector) is None \ No newline at end of file diff --git a/alian/analysis/base/utils.py b/alian/analysis/base/utils.py new file mode 100644 index 0000000..650aa5d --- /dev/null +++ b/alian/analysis/base/utils.py @@ -0,0 +1,61 @@ +import os +from collections import defaultdict + +import numpy as np +import yaml + + +def read_yaml(f): + with open(f) as stream: + cfg = yaml.safe_load(stream) + return cfg + +def nested_dict(): + return defaultdict(nested_dict) + +ndict = nested_dict + +def dict_to_ndict(d): + ndict = nested_dict() + for k, v in d.items(): + if isinstance(v, dict): + ndict[k] = dict_to_ndict(v) + else: + ndict[k] = v + return ndict + +def ndict_to_dict(d): + for k, v in d.items(): + if isinstance(v, dict): + d[k] = ndict_to_dict(v) + return dict(d) + +def linbins(xmin, xmax, nbins): + return np.linspace(xmin, xmax, nbins + 1) + +def logbins(xmin, xmax, nbins): + return np.logspace(np.log10(xmin), np.log10(xmax), nbins + 1) + +def delta_R(p1, p2): + """Calculate angular distance with pseudorapidity (rather than rapidity).""" + return np.sqrt(p1.delta_phi_to(p2) ** 2 + (p1.eta() - p2.eta()) ** 2) + +def delta_phi_limit(dphi): + """Limit a given angle between -pi and pi.""" + while dphi > np.pi: + dphi -= 2 * np.pi + while dphi < -np.pi: + dphi += 2 * np.pi + return dphi + +def is_slurm(): + """Check if we are in a Slurm job / on a compute node.""" + if "SLURM_SUBMIT_DIR" in os.environ or 'HOST' not in os.environ: + # check for Slurm environment variables or an unset HOST variable + return True + elif 'login' in os.environ['HOST']: + # check for login node for HOST + return False + else: + # if unparsable, default to Slurm configuration (safer) + return True \ No newline at end of file From af2f41d19fc9dc69932ec2f2d0efb1044931313f Mon Sep 17 00:00:00 2001 From: Tucker Hwang Date: Mon, 13 Apr 2026 23:00:45 -0700 Subject: [PATCH 2/3] Update Run 3 data reader --- alian/config/run3_tstruct.yaml | 46 +++--- alian/io/data_io.py | 14 +- alian/src/fjutil/fjutil.cxx | 285 ++++++++++++++++++++++++++++++++- alian/src/fjutil/fjutil.hh | 153 ++++++++++++++++++ alian/utils/data_fj.py | 18 ++- 5 files changed, 483 insertions(+), 33 deletions(-) diff --git a/alian/config/run3_tstruct.yaml b/alian/config/run3_tstruct.yaml index 9a7b6b6..1cc1045 100644 --- a/alian/config/run3_tstruct.yaml +++ b/alian/config/run3_tstruct.yaml @@ -1,24 +1,30 @@ eventTree: branches: - run_number - - event_selection - - triggersel - - centrality - multiplicity - - track_data_eta - - track_data_phi - - track_data_pt - - track_data_label - - track_data_tracksel -# - cluster_data_energy -# - cluster_data_eta -# - cluster_data_phi -# - cluster_data_m02 -# - cluster_data_m20 -# - cluster_data_ncells -# - cluster_data_time -# - cluster_data_isexotic -# - cluster_data_distancebadchannel -# - cluster_data_nlm -# - cluster_data_clusterdef -# - cluster_data_matchedTrackIndex + - centrality + - occupancy + - event_sel + - trig_sel + - rct + - track_pt + - track_eta + - track_phi + - track_sel + # - cluster_energy + # - cluster_eta + # - cluster_phi + # - cluster_m02 + # - cluster_m20 + # - cluster_ncells + # - cluster_time + # - cluster_exoticity + # - cluster_dbc + # - cluster_nlm + # - cluster_defn + # - cluster_matched_track_n + # - cluster_matched_track_delta_eta + # - cluster_matched_track_delta_phi + # - cluster_matched_track_p + # - cluster_matched_track_pt + # - cluster_matched_track_sel diff --git a/alian/io/data_io.py b/alian/io/data_io.py index a943cbe..cbbdf0e 100644 --- a/alian/io/data_io.py +++ b/alian/io/data_io.py @@ -34,7 +34,7 @@ def __init__(self, file_list, yaml_file=None, tree_name=None, branches=None, **k def add_generic_ebye_info(self): self.event.multiplicity = self.event.data['multiplicity'] self.event.centrality = self.event.data['centrality'] - self.event.track_count = len(self.event.data['track_data_pt']) + self.event.track_count = len(self.event.data['track_pt']) self.event.counter = self.event_count # Efficiently iterate over the tree as a generator @@ -175,7 +175,15 @@ def next_event(self): break -def get_default_tree_structure(lhc_run=3): +def get_default_tree_structure(file_list, lhc_run=3): + if isinstance(file_list, str): + if file_list.endswith(".txt"): + with open(file_list, "r") as file: + filename = file.read().splitlines() + else: + filename = file_list + if os.path.isfile(_guess_path := os.path.join(os.path.dirname(os.path.abspath(filename)), "../tstruct.yaml")): + return _guess_path _alian_path_guess = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../') _default_tree_structure = os.path.join(_alian_path_guess, f'config/run{lhc_run}_tstruct.yaml') _alian_path = yasp.features('prefix', 'alian') @@ -195,7 +203,7 @@ def __init__(self, file_list, lhc_run=None, yaml_file=None, tree_name=None, bran self.tree_name = tree_name self.branches = branches if self.yaml_file is None: - self.yaml_file = get_default_tree_structure(lhc_run=self.lhc_run) + self.yaml_file = get_default_tree_structure(file_list = file_list, lhc_run=self.lhc_run) _data_input = Run2FileInput if lhc_run == 2 else Run3FileInput self.data_input = _data_input(file_list, yaml_file=self.yaml_file, tree_name=self.tree_name, branches=self.branches, **kwargs) diff --git a/alian/src/fjutil/fjutil.cxx b/alian/src/fjutil/fjutil.cxx index 5933e82..e3c8795 100644 --- a/alian/src/fjutil/fjutil.cxx +++ b/alian/src/fjutil/fjutil.cxx @@ -1,7 +1,7 @@ // fjutil.cxx #include "fjutil.hh" #include - +#include namespace alian { void process_numpy_array(PyObject *array) @@ -102,7 +102,288 @@ namespace alian particles.emplace_back(px, py, pz, E); particles.back().set_user_index(i + index_offset); } - return particles; } + + + // function to transform numpy arrays (px, py, pz, tracksel) into a vector of PseudoJets with track information + std::vector numpy_pxpypz_to_tracks(PyObject *px, PyObject *py, PyObject *pz, PyObject *tracksel, int index_offset) + { + PyArrayObject *np_px = reinterpret_cast(px); + PyArrayObject *np_py = reinterpret_cast(py); + PyArrayObject *np_pz = reinterpret_cast(pz); + PyArrayObject *np_tracksel = reinterpret_cast(tracksel); + + if (PyArray_NDIM(np_px) != 1 || PyArray_NDIM(np_py) != 1 || PyArray_NDIM(np_pz) != 1 || PyArray_NDIM(np_tracksel) != 1) + { + PyErr_SetString(PyExc_TypeError, "Arrays must be one-dimensional"); + return std::vector(); + } + + npy_intp size = PyArray_SIZE(np_px); + if (size != PyArray_SIZE(np_py) || size != PyArray_SIZE(np_pz) || size != PyArray_SIZE(np_tracksel)) + { + PyErr_SetString(PyExc_ValueError, "Arrays must have the same size"); + return std::vector(); + } + + float *px_data = static_cast(PyArray_DATA(np_px)); + float *py_data = static_cast(PyArray_DATA(np_py)); + float *pz_data = static_cast(PyArray_DATA(np_pz)); + uint8_t *tracksel_data = static_cast(PyArray_DATA(np_tracksel)); + + std::vector tracks; + tracks.reserve(size); + + for (npy_intp i = 0; i < size; ++i) + { + // assume the charged pion mass + double E = sqrt(px_data[i] * px_data[i] + py_data[i] * py_data[i] + pz_data[i] * pz_data[i] + PION_MASS * PION_MASS); + // charge is stored in the 0th bit of tracksel (1 = positive, 0 = negativ) + short q = tracksel_data[i] & 0b1 ? 1 : -1; + + tracks.emplace_back(px_data[i], py_data[i], pz_data[i], E); + tracks.back().set_user_info(new alian::TrackInfo(q, tracksel_data[i])); + tracks.back().set_user_index(i + index_offset); + } + + return tracks; + } + + // function to transform numpy arrays (pT, eta, phi, tracksel) into a vector of PseudoJets with track information + std::vector numpy_ptetaphi_to_tracks(PyObject *pt, PyObject *eta, PyObject *phi, PyObject *tracksel, int index_offset) + { + PyArrayObject *np_pt = reinterpret_cast(pt); + PyArrayObject *np_eta = reinterpret_cast(eta); + PyArrayObject *np_phi = reinterpret_cast(phi); + PyArrayObject *np_tracksel = reinterpret_cast(tracksel); + + if (PyArray_NDIM(np_pt) != 1 || PyArray_NDIM(np_eta) != 1 || PyArray_NDIM(np_phi) != 1 || PyArray_NDIM(np_tracksel) != 1) + { + PyErr_SetString(PyExc_TypeError, "Arrays must be one-dimensional"); + return std::vector(); + } + + npy_intp size = PyArray_SIZE(np_pt); + if (size != PyArray_SIZE(np_eta) || size != PyArray_SIZE(np_phi) || size != PyArray_SIZE(np_tracksel)) + { + PyErr_SetString(PyExc_ValueError, "Arrays must have the same size"); + return std::vector(); + } + + float *pt_data = static_cast(PyArray_DATA(np_pt)); + float *eta_data = static_cast(PyArray_DATA(np_eta)); + float *phi_data = static_cast(PyArray_DATA(np_phi)); + uint8_t *tracksel_data = static_cast(PyArray_DATA(np_tracksel)); + + std::vector tracks; + tracks.reserve(size); + + for (npy_intp i = 0; i < size; ++i) + { + if (pt_data[i] <= 0.001) // this is 1 MeV (!) + { + // PyErr_SetString(PyExc_ValueError, "pt must be positive"); + return std::vector(); + continue; + } + double px = pt_data[i] * cos(phi_data[i]); + double py = pt_data[i] * sin(phi_data[i]); + double pz = pt_data[i] * sinh(eta_data[i]); + // assume the pion mass + double E = sqrt(px * px + py * py + pz * pz + PION_MASS * PION_MASS); + // charge is stored in the 0th bit of tracksel (1 = positive, 0 = negative) + short q = tracksel_data[i] & 0b1 ? 1 : -1; + + tracks.emplace_back(px, py, pz, E); + tracks.back().set_user_info(new alian::TrackInfo(q, tracksel_data[i])); + tracks.back().set_user_index(i + index_offset); + } + + return tracks; + } + + std::vector numpy_energyetaphi_to_clusters( + PyObject *energy, PyObject *eta, PyObject *phi, + PyObject *m02, + PyObject *m20, + PyObject *ncells, + PyObject *time, + PyObject *exoticity, + PyObject *dbc, + PyObject *nlm, + PyObject *defn, + PyObject *matchedTrackN, + PyObject *matchedTrackDeltaEta, + PyObject *matchedTrackDeltaPhi, + PyObject *matchedTrackP, + PyObject *matchedTrackPt, + PyObject *matchedTrackSel, + int index_offset + ) + { + PyArrayObject *np_energy = reinterpret_cast(energy); + PyArrayObject *np_eta = reinterpret_cast(eta); + PyArrayObject *np_phi = reinterpret_cast(phi); + PyArrayObject *np_m02 = reinterpret_cast(m02); + PyArrayObject *np_m20 = reinterpret_cast(m20); + PyArrayObject *np_ncells = reinterpret_cast(ncells); + PyArrayObject *np_time = reinterpret_cast(time); + PyArrayObject *np_exoticity = reinterpret_cast(exoticity); + PyArrayObject *np_dbc = reinterpret_cast(dbc); + PyArrayObject *np_nlm = reinterpret_cast(nlm); + PyArrayObject *np_defn = reinterpret_cast(defn); + PyArrayObject *np_matchedTrackN = reinterpret_cast(matchedTrackN); + PyArrayObject *np_matchedTrackDeltaEta = reinterpret_cast(matchedTrackDeltaEta); + PyArrayObject *np_matchedTrackDeltaPhi = reinterpret_cast(matchedTrackDeltaPhi); + PyArrayObject *np_matchedTrackP = reinterpret_cast(matchedTrackP); + PyArrayObject *np_matchedTrackPt = reinterpret_cast(matchedTrackPt); + PyArrayObject *np_matchedTrackSel = reinterpret_cast(matchedTrackSel); + + if (PyArray_NDIM(np_energy) != 1 + || PyArray_NDIM(np_eta) != 1 + || PyArray_NDIM(np_phi) != 1 + || PyArray_NDIM(np_m02) != 1 + || PyArray_NDIM(np_m02) != 1 + || PyArray_NDIM(np_m20) != 1 + || PyArray_NDIM(np_ncells) != 1 + || PyArray_NDIM(np_time) != 1 + || PyArray_NDIM(np_exoticity) != 1 + || PyArray_NDIM(np_dbc) != 1 + || PyArray_NDIM(np_nlm) != 1 + || PyArray_NDIM(np_defn) != 1 + || PyArray_NDIM(np_matchedTrackN) != 1 + || PyArray_NDIM(np_matchedTrackDeltaEta) != 1 + || PyArray_NDIM(np_matchedTrackDeltaPhi) != 1 + || PyArray_NDIM(np_matchedTrackP) != 1 + || PyArray_NDIM(np_matchedTrackPt) != 1 + || PyArray_NDIM(np_matchedTrackSel) != 1 + ) + { + PyErr_SetString(PyExc_TypeError, "Arrays must be one-dimensional"); + return std::vector(); + } + + npy_intp size = PyArray_SIZE(np_energy); + if (size != PyArray_SIZE(np_eta) + || size != PyArray_SIZE(np_phi) + || size != PyArray_SIZE(np_m02) + || size != PyArray_SIZE(np_m02) + || size != PyArray_SIZE(np_m20) + || size != PyArray_SIZE(np_ncells) + || size != PyArray_SIZE(np_time) + || size != PyArray_SIZE(np_exoticity) + || size != PyArray_SIZE(np_dbc) + || size != PyArray_SIZE(np_nlm) + || size != PyArray_SIZE(np_defn) + || size != PyArray_SIZE(np_matchedTrackN) + ) + { + PyErr_SetString(PyExc_ValueError, "Arrays must have the same size"); + return std::vector(); + } + + // if no clusters in the event, return empty vector + if (size == 0) { + return std::vector(); + } + + // Data types here have to match those from the NumPy arrays + // These are taken from the types that uproot reads from the BerkeleyTrees + float *energy_data = static_cast (PyArray_DATA(np_energy)); + float *eta_data = static_cast (PyArray_DATA(np_eta)); + float *phi_data = static_cast (PyArray_DATA(np_phi)); + float *m02_data = static_cast (PyArray_DATA(np_m02)); + float *m20_data = static_cast (PyArray_DATA(np_m20)); + int *ncells_data = static_cast (PyArray_DATA(np_ncells)); + float *time_data = static_cast (PyArray_DATA(np_time)); + bool *exoticity_data = static_cast (PyArray_DATA(np_exoticity)); + float *dbc_data = static_cast (PyArray_DATA(np_dbc)); + int *nlm_data = static_cast (PyArray_DATA(np_nlm)); + int *defn_data = static_cast (PyArray_DATA(np_defn)); + int *matchedTrackN_data = static_cast (PyArray_DATA(np_matchedTrackN)); + // float *matchedTrackEta_data = static_cast (PyArray_DATA(np_matchedTrackEta)); + // float *matchedTrackPhi_data = static_cast (PyArray_DATA(np_matchedTrackPhi)); + // float *matchedTrackP_data = static_cast (PyArray_DATA(np_matchedTrackP)); + + npy_intp sizeMatchedDeltaEta = PyArray_SIZE(np_matchedTrackDeltaEta); + npy_intp sizeMatchedDeltaPhi = PyArray_SIZE(np_matchedTrackDeltaPhi); + npy_intp sizeMatchedP = PyArray_SIZE(np_matchedTrackP); + npy_intp sizeMatchedPt = PyArray_SIZE(np_matchedTrackPt); + npy_intp sizeMatchedSel = PyArray_SIZE(np_matchedTrackSel); + + uint16_t nMatchedTracks; + for (npy_intp i = 0; i < size; ++i) { + nMatchedTracks += matchedTrackN_data[i]; + } + + if ( nMatchedTracks != sizeMatchedDeltaEta + || nMatchedTracks != sizeMatchedDeltaPhi + || nMatchedTracks != sizeMatchedP + || nMatchedTracks != sizeMatchedPt + || nMatchedTracks != sizeMatchedSel) + { + PyErr_SetString(PyExc_ValueError, "Number of matched tracks does not fit"); + return std::vector(); + } + + std::vector clusters; + clusters.reserve(size); + + int iMatchedTrack(0); + for (npy_intp i = 0; i < size; ++i) + { + // assume massless in this calculation + double pt = energy_data[i] / cosh(eta_data[i]); + // double pz = energy_data[i] * tanh(eta_data[i]); + // if (pt <= 0.001) // this is 1 MeV (!) + // { + // // PyErr_SetString(PyExc_ValueError, "pt must be positive"); + // return std::vector(); + // continue; + // } + double px = pt * cos(phi_data[i]); + double py = pt * sin(phi_data[i]); + double pz = pt * sinh(eta_data[i]); + + npy_intp const dims[1] = {matchedTrackN_data[i]}; + PyArrayObject* matchedTrackDeltaEta = reinterpret_cast(PyArray_SimpleNew(1, dims, NPY_FLOAT)); + PyArrayObject* matchedTrackDeltaPhi = reinterpret_cast(PyArray_SimpleNew(1, dims, NPY_FLOAT)); + PyArrayObject* matchedTrackP = reinterpret_cast(PyArray_SimpleNew(1, dims, NPY_FLOAT)); + PyArrayObject* matchedTrackPt = reinterpret_cast(PyArray_SimpleNew(1, dims, NPY_FLOAT)); + PyArrayObject* matchedTrackSel = reinterpret_cast(PyArray_SimpleNew(1, dims, NPY_UINT8)); + + for (int iTrack = 0; iTrack < matchedTrackN_data[i]; ++iTrack) { + *(npy_float *)PyArray_GETPTR1(matchedTrackDeltaEta, iTrack) = *(npy_float *)PyArray_GETPTR1(np_matchedTrackDeltaEta, iMatchedTrack); + *(npy_float *)PyArray_GETPTR1(matchedTrackDeltaPhi, iTrack) = *(npy_float *)PyArray_GETPTR1(np_matchedTrackDeltaPhi, iMatchedTrack); + *(npy_float *)PyArray_GETPTR1(matchedTrackP, iTrack) = *(npy_float *)PyArray_GETPTR1(np_matchedTrackP, iMatchedTrack); + *(npy_float *)PyArray_GETPTR1(matchedTrackPt, iTrack) = *(npy_float *)PyArray_GETPTR1(np_matchedTrackPt, iMatchedTrack); + *(npy_uint8 *)PyArray_GETPTR1(matchedTrackSel, iTrack) = *(npy_uint8 *)PyArray_GETPTR1(np_matchedTrackSel, iMatchedTrack); + iMatchedTrack++; + } + clusters.emplace_back(px, py, pz, + energy_data[i], + m02_data[i], + m20_data[i], + ncells_data[i], + time_data[i], + exoticity_data[i], + dbc_data[i], + nlm_data[i], + defn_data[i], + matchedTrackN_data[i], + (PyObject*)matchedTrackDeltaEta, + (PyObject*)matchedTrackDeltaPhi, + (PyObject*)matchedTrackP, + (PyObject*)matchedTrackPt, + (PyObject*)matchedTrackSel + ); + clusters.back().set_user_index(i + index_offset); + } + if (iMatchedTrack != nMatchedTracks) { + PyErr_SetString(PyExc_ValueError, "Somehow did not consume all matched tracks"); + return std::vector(); + } + return clusters; + } } \ No newline at end of file diff --git a/alian/src/fjutil/fjutil.hh b/alian/src/fjutil/fjutil.hh index 1798c6c..78932f7 100644 --- a/alian/src/fjutil/fjutil.hh +++ b/alian/src/fjutil/fjutil.hh @@ -43,6 +43,159 @@ namespace alian std::vector numpy_pxpypz_to_pseudojets(PyObject *px, PyObject *py, PyObject *pz, double m, int index_offset = 0); std::vector numpy_ptetaphi_to_pseudojets(PyObject *pt, PyObject *eta, PyObject *phi, double m, int index_offset = 0); + + constexpr double PION_MASS = 0.1395704; + + std::vector numpy_pxpypz_to_tracks(PyObject *px, PyObject *py, PyObject *pz, PyObject *tracksel, int index_offset); + std::vector numpy_ptetaphi_to_tracks(PyObject *pt, PyObject *eta, PyObject *phi, PyObject *tracksel, int index_offset); + + class TrackInfo : public fastjet::PseudoJet::UserInfoBase + { + public: + TrackInfo(short q, uint16_t track_sel) : _q(q), _track_sel(track_sel) {} + + const short q() const { return _q; } + const uint16_t track_sel() const { return _track_sel; } + + private: + short _q; + uint16_t _track_sel; + }; + + class Cluster : public fastjet::PseudoJet + { + public: + Cluster( + const double px_in, + const double py_in, + const double pz_in, + const double E_in, + const float m02, + const float m20, + const int ncells, + const float time, + const bool exoticity, + const float dbc, + const int nlm, + const int defn, + const int matchedTrackN, + PyObject* matchedTrackDeltaEta, + PyObject* matchedTrackDeltaPhi, + PyObject* matchedTrackP, + PyObject* matchedTrackPt, + PyObject* matchedTrackSel + ) : fastjet::PseudoJet(px_in, py_in, pz_in, E_in), + _m02(m02), + _m20(m20), + _ncells(ncells), + _time(time), + _exoticity(exoticity), + _dbc(dbc), + _nlm(nlm), + _defn(defn), + _matchedTrackN(matchedTrackN), + _matchedTrackDeltaEta(matchedTrackDeltaEta), + _matchedTrackDeltaPhi(matchedTrackDeltaPhi), + _matchedTrackP(matchedTrackP), + _matchedTrackPt(matchedTrackPt), + _matchedTrackSel(matchedTrackSel) + { + Py_INCREF(_matchedTrackDeltaEta); + Py_INCREF(_matchedTrackDeltaPhi); + Py_INCREF(_matchedTrackP); + Py_INCREF(_matchedTrackPt); + Py_INCREF(_matchedTrackSel); + } + + ~Cluster() { + Py_DECREF(_matchedTrackDeltaEta); + Py_DECREF(_matchedTrackDeltaPhi); + Py_DECREF(_matchedTrackP); + Py_DECREF(_matchedTrackPt); + Py_DECREF(_matchedTrackSel); + } + template + const double delta_eta(const T &other) const { return other.eta() - eta(); }; + template + const double abs_delta_eta(const T &other) const { return fabs(other.eta() - eta()); }; + template + const double abs_delta_phi(const T &other) const { return fabs(delta_phi_to(other)); }; + template + const double dR(const T &other) const { + double dphi = delta_phi_to(other); + double deta = delta_eta(other); + return sqrt(dphi*dphi + deta*deta); + }; + inline const auto energy() const { return e(); }; + inline const auto m02() const { return _m02; }; + inline const auto m20() const { return _m20; }; + inline const auto ncells() const { return _ncells; }; + inline const auto time() const { return _time; }; + inline const auto exoticity() const { return _exoticity; }; + inline const auto dbc() const { return _dbc; }; + inline const auto nlm() const { return _nlm; }; + inline const auto defn() const { return _defn; }; + inline const auto matchedTrackN() const { return _matchedTrackN; }; + inline auto matchedTrackDeltaEta() const { Py_INCREF(_matchedTrackDeltaEta); return _matchedTrackDeltaEta; }; + inline auto matchedTrackDeltaPhi() const { Py_INCREF(_matchedTrackDeltaPhi); return _matchedTrackDeltaPhi; }; + inline auto matchedTrackP() const { Py_INCREF(_matchedTrackP); return _matchedTrackP; }; + inline auto matchedTrackPt() const { Py_INCREF(_matchedTrackPt); return _matchedTrackPt; }; + inline auto matchedTrackSel() const { Py_INCREF(_matchedTrackSel); return _matchedTrackSel; }; + // Copy constructor + // Cluster(const Cluster& other) + // : _m02(other.m02()), + // _m20(other.m20()), + // _ncells(other.ncells()), + // _time(other.time()), + // _exoticity(other.exoticity()), + // _dbc(other.dbc()), + // _nlm(other.nlm()), + // _defn(other.defn()), + // _matchedTrackN(other.matchedTrackN()), + // _matchedTrackEta(other.matchedTrackEta()), + // _matchedTrackPhi(other.matchedTrackPhi()), + // _matchedTrackP(other.matchedTrackP()) { + // Py_DECREF(_matchedTrackEta); + // // Py_DECREF(_matchedTrackEta); + // Py_DECREF(_matchedTrackPhi); + // Py_DECREF(_matchedTrackP); + // } + + private: + float _m02; + float _m20; + int _ncells; + float _time; + bool _exoticity; + float _dbc; + int _nlm; + int _defn; + int _matchedTrackN; + PyObject* _matchedTrackDeltaEta; + PyObject* _matchedTrackDeltaPhi; + PyObject* _matchedTrackP; + PyObject* _matchedTrackPt; + PyObject* _matchedTrackSel; + }; + + std::vector numpy_energyetaphi_to_clusters( + PyObject *energy, PyObject *eta, PyObject *phi, + PyObject *m02, + PyObject *m20, + PyObject *ncells, + PyObject *time, + PyObject *exoticity, + PyObject *dbc, + PyObject *nlm, + PyObject *defn, + PyObject *matchedTrackN, + PyObject *matchedTrackDeltaEta, + PyObject *matchedTrackDeltaPhi, + PyObject *matchedTrackP, + PyObject *matchedTrackPt, + PyObject *matchedTrackSel, + int index_offset + ); } #endif \ No newline at end of file diff --git a/alian/utils/data_fj.py b/alian/utils/data_fj.py index aba2f71..3e7d617 100644 --- a/alian/utils/data_fj.py +++ b/alian/utils/data_fj.py @@ -24,6 +24,9 @@ def psjv_from_tracks_run3(event, index_offset=0, m=0.13957): rv = alian.numpy_ptetaphi_to_pseudojets(event.data['track_data_pt'], event.data['track_data_eta'], event.data['track_data_phi'], m, index_offset) return rv +def psjv_from_tracks_run3_selected(pt, eta, phi, m, index_offset): + return alian.numpy_ptetaphi_to_pseudojets(pt, eta, phi, m, index_offset) + def psjv_from_tracks_run2(event, index_offset=0, m=0.13957): rv = std.vector[fj.PseudoJet]() for i in range(len(event.data['ParticlePt'])): @@ -39,11 +42,10 @@ def psjv_from_tracks_run2(event, index_offset=0, m=0.13957): return rv def data_tracks_to_pseudojets(event, index_offset=0, m=0.13957, lhc_run=3): - if lhc_run == 3: - return psjv_from_tracks_run3(event, index_offset, m) - elif lhc_run == 2: - return psjv_from_tracks_run2(event, index_offset, m) - else: - raise ValueError(f'Invalid run number: {lhc_run}') - return None - + if lhc_run == 3: + return psjv_from_tracks_run3(event, index_offset, m) + elif lhc_run == 2: + return psjv_from_tracks_run2(event, index_offset, m) + else: + raise ValueError(f'Invalid run number: {lhc_run}') + return None From 4d9b94f87b8c85ffbf16f8bf6edf70eb9b3d563a Mon Sep 17 00:00:00 2001 From: Tucker Hwang Date: Tue, 14 Apr 2026 07:35:09 -0700 Subject: [PATCH 3/3] Add example analyses --- alian/analysis/example/example_analysis.py | 80 ++++++++++++ alian/analysis/example/example_rdf.py | 142 +++++++++++++++++++++ alian/analysis/example/merge.sh | 100 +++++++++++++++ alian/config/example/example_analysis.yaml | 65 ++++++++++ alian/config/example/example_rdf.yaml | 25 ++++ 5 files changed, 412 insertions(+) create mode 100755 alian/analysis/example/example_analysis.py create mode 100755 alian/analysis/example/example_rdf.py create mode 100755 alian/analysis/example/merge.sh create mode 100644 alian/config/example/example_analysis.yaml create mode 100644 alian/config/example/example_rdf.yaml diff --git a/alian/analysis/example/example_analysis.py b/alian/analysis/example/example_analysis.py new file mode 100755 index 0000000..cc55732 --- /dev/null +++ b/alian/analysis/example/example_analysis.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 + +"""Example script on using the alian framework for Run 3 analysis. + +This script is used on the ROOT file (provided as an argument) to +perform the analysis of the events containing isolated photons and +jets. The analysis is configured with a config YAML file, creates +and fills histograms for the cleaned data, and stores them in the +output file. + +To be used with alian/config/example/example_analysis.yaml +""" + +import argparse +import itertools + +import numpy as np +from alian.analysis.base import AnalysisBase, add_default_args, delta_R + +import heppyy + +fj = heppyy.load_cppyy('fastjet') + + +class AnalysisExample(AnalysisBase): + _defaults = { + 'photon_jet_angle_min': 0.875 * np.pi, + 'pt_min_eec': 1.0, + } + def init_analysis(self, analysis_cfg: dict): + config = self._defaults | analysis_cfg + for setting, value in config.items(): + setattr(self, setting, value) + self.eec_trk_selector = fj.SelectorPtMin(self.pt_min_eec) + + def analyze_event(self): + # Analyzes this event that has passed the selection criteria + # self.event contains the selected event + # self.tracks contains selected tracks (i.e. after selection cuts) + # self.clusters contains selected clusters (i.e. after selection cuts) + # self.jets contains selected jets + + [self.hists['track_pT'].Fill(t.pt()) for t in self.tracks] + [self.hists['jet_pT'].Fill(j.pt()) for j in self.jets] + [self.hists['jet_pT_coarse'].Fill(j.pt()) for j in self.jets] + + for c in self.clusters: + is_iso, iso_pt, _, _ = self.selector.isolation.selects(c, self.tracks, verbose = True) + self.hists['tot_iso_pT'].Fill(iso_pt) + if is_iso: + self.hists['photon_E'].Fill(c.e()) + # find back-to-back jets + for j in self.jets: + dphi = np.abs(j.delta_phi_to(c)) + if dphi > self.photon_jet_angle_min: + self.hists["jet_photon_dphi"].Fill(dphi) + self.hists["y_jet_pT"].Fill(j.pt()) + self.hists["xjy"].Fill(j.pt() / c.e(), c.e()) + self.do_eec(j) + + def do_eec(self, jet): + tracks = self.eec_trk_selector(jet.constituents()) + for p1, p2 in itertools.permutations(tracks, 2): + ew = p1.pt() * p2.pt() / jet.pt() / jet.pt() + angle = delta_R(p1, p2) + + self.hists["eec"].Fill(jet.pt(), angle, ew) + + def finalize(self): + self.hists['track_pT'].Scale(1, "width") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description="Run analysis on ROOT file using YAML configuration.") + parser = add_default_args(parser) + + args = parser.parse_args() + + ana = AnalysisExample(args.input_file, args.output_file, args.config_file, args.tree_struct, args.nev, args.lhc_run) + ana.run() diff --git a/alian/analysis/example/example_rdf.py b/alian/analysis/example/example_rdf.py new file mode 100755 index 0000000..fe90995 --- /dev/null +++ b/alian/analysis/example/example_rdf.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 + +"""Example script on using RDataFrame for analysis. + +An example script that uses RDataFrame for analysis. RDF +implementations can only handle basic selections on cluster +and track quantities; for more complicated tasks like jet +finding or cluster-track matching, use example_analysis.py. + +To be used with alian/config/example/example_rdf.yaml +""" + +import argparse +from datetime import datetime as dt +from time import perf_counter as timenow + +import numpy as np +import ROOT +from alian.analysis.base import AnalysisSelector, linbins, logbins, set_up_logger + +ROOT.TH1.SetDefaultSumw2() # applies to TH2 and TH3 as well + +class AnalysisQA: + def __init__(self, inf, outf, cfg): + self.logger = set_up_logger(__name__) + self.outf = outf + self.selector = AnalysisSelector.load(cfg) + self.df = ROOT.RDataFrame("eventTree", inf) + self.histos = {} + + def note_time(self, msg): + self.logger.info(f"{msg}: -------- {timenow() - self.start_time:.3f} sec. --------", stacklevel = 2) + def note_start(self, msg): + self.start_time = timenow() + self.logger.info(f"{msg}: {dt.now().replace(microsecond=0)}", stacklevel = 2) + + def analyze(self): + self.note_start("Starting analysis") + self.apply_cuts() + self.create_histos() + self.fill_and_save() + self.note_time("Finished analysis") + + def apply_cuts(self): + """ + Apply cuts to events, tracks, and clusters. The event selection + is just a Filter applied on the RDF. Cluster and track selections + are a little more complicated since they're vector columns, so + the implementation is hidden behind apply_to(). This method + creates a new set of columns for tracks and clusters (appended + with `_selected`) with the relevant cuts applied to them. Use + these column names to fill histograms from clusters or tracks + with the cuts applied, or use the default track_pt, track_phi; etc. + """ + self.df = self.selector.event.apply_to(self.df) + # cluster pT isn't a column that exists already in the BerkeleyTree so we can create it: + # self.df = self.df.Define("cluster_pt", "cluster_energy / cosh(cluster_eta)") + # Applying the cluster selection will also automatically add it if it's not already defined: + self.df = self.selector.cluster.apply_to(self.df) + self.df = self.selector.track.apply_to(self.df) + self.note_time("Cuts applied") + + def create_histos(self): + """ + Define your histograms here. For Histo*D, the first argument is + a tuple corresponding to what you would use for a normal TH*. + The second is the column that your histogram will be Filled with. + Note that the histograms themselves are not filled until + fill_and_save() is called. + """ + self.histos['track_pt'] = self.df.Histo1D( + ("track_pt", 'Track #it{p}_{T};track #it{p}_{T} (GeV);Counts', 300, logbins(0.15, 200, 300)), + "track_pt_selected" + ) + self.histos['track_eta'] = self.df.Histo1D( + ("track_eta", 'Track #eta;track #eta;Counts', 200, -0.9, 0.9), + "track_eta_selected" + ) + self.histos['track_phi'] = self.df.Histo1D( + ("track_phi", 'Track #phi;track #phi;Counts', 200, 0, 2*np.pi), + "track_phi_selected" + ) + self.histos['track_eta_phi'] = self.df.Histo2D( + ("track_eta_phi", 'Track #eta-#phi distribution;#eta;#phi', 200, -0.9, 0.9, 200, 0, 2*np.pi), + "track_eta_selected", "track_phi_selected" + ) + pt_bins = np.array([0.15, 0.5, 1, 5, 10, 15, 20, 30, 40, 50, 60, 100, 200]) + self.histos['track_eta_phi_pt'] = self.df.Histo3D( + ("track_eta_phi_pT", 'Track #eta-#phi-#it{p}_{T} distribution;#eta;#phi;#it{p}_{T}', + 100, linbins(-0.9, 0.9, 100), + 100, linbins(0, 2*np.pi, 100), + len(pt_bins) - 1, pt_bins), + "track_eta_selected", "track_phi_selected", "track_pt_selected" + ) + + # these histograms will be filled with all clusters, not just those that pass selections, + # since we are not using the columns with `_selected` appended + self.histos['cluster_E'] = self.df.Histo1D( + ("cluster_E", 'Cluster energy distribution;#it{E} (GeV);Counts', 200, 0, 100), + "cluster_energy" + ) + self.histos['cluster_pT'] = self.df.Histo1D( + ("cluster_pT", 'Cluster #it{p}_{T} distribution;#it{p}_{T} (GeV/#it{c});Counts', 200, 0, 100), + "cluster_pt" + ) + self.histos['cluster_m02'] = self.df.Histo1D( + ("cluster_m02", 'Cluster shape;#it{M}_{02};Counts', 400, 0, 2), + "cluster_m02" + ) + self.histos['cluster_time'] = self.df.Histo1D( + ("cluster_time", 'Cluster time;time (ns);Counts', 120, -30, 30), + "cluster_time" + ) + self.histos['cluster_ncells'] = self.df.Histo1D( + ("cluster_ncells", 'Cluster cell multiplicity;n_{cells};Counts', 25, -0.5, 24.5), + "cluster_ncells" + ) + + self.note_time("Histograms defined") + + def fill_and_save(self): + """ + Fill and save all defined histograms. Note that, for histograms + defined in RDFs, filling is triggered when Write() is called. + """ + self.note_time("Filling and saving histograms...") + with ROOT.TFile(self.outf, 'recreate'): + for h in self.histos.values(): + h.Write() + self.note_time("Histograms saved") + self.logger.info(f"Output saved to: {self.outf}") + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description = __doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-i", "--input", type = str, required = True, help = "Input file or file containing a list of input files.") + parser.add_argument("-o", "--output", type = str, required = True, help = "File to write the analysis.") + parser.add_argument("-c", "--config", type = str, required = True, help = "YAML file describing the cuts to be applied to the data.") + args = parser.parse_args() + + ROOT.EnableImplicitMT() + analysis = AnalysisQA(args.input, args.output, args.config) + analysis.analyze() \ No newline at end of file diff --git a/alian/analysis/example/merge.sh b/alian/analysis/example/merge.sh new file mode 100755 index 0000000..0284970 --- /dev/null +++ b/alian/analysis/example/merge.sh @@ -0,0 +1,100 @@ +#!/usr/bin/bash + +# A simple script to merge files within a directory. Feel free to make a copy +# and customize to your needs. + +show_help() { + cat << EOF +Usage: + $0 -i|--input INPUT -o|--output OUTPUT [-n|--name FILENAME] [-f|--force] [-h|--help] + +Searches for files in a given directory INPUT and a given filename FILENAME, +and merges them with ROOT hadd into a specified output file OUTPUT. +The list of files that were merged will be placed in a text file in the +same directory as the output file. + +Options: + -i, --input INPUT Input directory to search for files + -o, --output OUTPUT Filename for merged output + -n, --name NAME Search for NAME in INPUT (default: analysis.root) + -j, --jobs [N] Parallelize merging with N jobs (default: system max $(nproc)) + -f, --force Overwrite existing output file (default: false) + -h, --help Show this help message + +EOF +} + +if ! command -v hadd >/dev/null 2>&1; then echo "hadd not available, crashing." && exit 127; fi + +if [ $# -eq 0 ]; then + show_help && exit 0 +fi + +PARSEDARGS=$(getopt -o i:o:n:j:fh \ + --long input:,output:,name:,jobs:,force,help \ + -n 'merge' -- "$@") +PARSE_EXIT=$? +if [ $PARSE_EXIT -ne 0 ] ; then exit $PARSE_EXIT ; fi + +# NOTE: quotes around PARSEDARGS essential to parse spaces between parsed options +eval set -- "$PARSEDARGS" + +input= +output= +filename=analysis.root +force_opt= +njobs=$(nproc) +echo "Maximum cores: $njobs" + +while true; do + case "$1" in + -i | --input ) input="$2"; shift 2;; + -o | --output ) output="$2"; shift 2;; + -n | --name ) filename="$2"; shift 2;; + -j | --jobs ) if (( "$2" < njobs )); then + njobs="$2"; + else + echo "Not enough cores for $2 jobs, setting to maximum $njobs."; fi + shift 2;; + -f | --force ) force_opt="-f"; shift ;; + -h | --help ) show_help; exit 0 ;; + -- ) shift; break ;; + * ) break ;; + esac +done + +if [[ -z "$input" ]]; then + echo "Input directory must be passed:" + show_help + exit 1 +fi +if [[ -z "$output" ]]; then + echo "Output filename must be passed:" + show_help + exit 1 +fi + +filelist="$(dirname -- "$(realpath "$output")")/list.txt" + +if [[ -z "$force_opt" ]]; then + if [[ -f $filelist ]]; then + echo "Filelist $filelist already exists; pass -f or --force to overwrite." + exit 1 + fi + if [[ -f $output ]]; then + echo "Output file $output already exists; pass -f or --force to overwrite." + exit 1 + fi +fi + +echo "Searching in $input for files named $filename..." +find "$input" -name "$filename" -type f > "$filelist" +echo "Found $(wc -l < "$filelist") files to merge. Merging..." + +hadd -v 99 $force_opt -j "$njobs" "$output" @"$filelist" +exitcode=$? +if [[ $exitcode -ne 0 ]]; then + echo "Merging failed." + exit $exitcode +fi +echo "Merging completed." diff --git a/alian/config/example/example_analysis.yaml b/alian/config/example/example_analysis.yaml new file mode 100644 index 0000000..138649a --- /dev/null +++ b/alian/config/example/example_analysis.yaml @@ -0,0 +1,65 @@ +# to be used with alian/analysis/example/example_analysis.py +constants: # a YAML hack to define constants + pi34: &pi34 2.35619449019 + halfpi: &pihalf 1.57079632679 + pi: &pi 3.14159265359 + +# Event cuts +selections: + event: + event_sel: sel8 + trig_sel: GammaHighPtEMCAL, GammaHighPtDCAL + rct: CBT_calo + track: + pt_min: 0.150 + track_sel: globalTrack + # remove the cluster section below if clusters are not needed in analysis + # if clusters are needed but no selections should be applied, keep only `cluster:` + cluster: + energy_min: 0.7 + m02_min: none + m02_max: none + ncells_min: 2 + time_min: -30 + time_max: 35 + exoticity: True + nlm: 2 + defn: 0 + edge: True + delta_phi: 0.05 + delta_eta: 0.05 + ep_max: 1.75 + isolation: + iso_pt_max: 1.5 + +# remove the jet_finder section below if jets are not needed in analysis +jet_finder: + R: 0.4 + alg: antikt + pT_min: 10 + +analysis: + photon_jet_angle_min: *pi34 + pt_min_eec: 1.0 + +output: + bins: + track_pT: ['log', 0.1, 20, 300] + iso_pT: ['lin', 0, 10, 100] + jet_pT: ['lin', 0, 200, 200] + jet_pT_coarse: ['custom', [5, 20, 40, 60, 80, 100, 150, 200]] + RL: ['log', 1.0e-3, 1, 30] + photon_E: ['lin', 0, 80, 200] + dphi: ['lin', *pihalf, *pi, 30] + xjy: ['lin', 0.5, 1.5, 30] + histograms: + # define histograms with bins described in `bins` + track_pT: [1, "track_pT", "Track #it{p}_{T} spectrum;#it{p}_{T} (GeV);dN/d#it{p}_{T}", track_pT] + tot_iso_pT: [1, "iso_pT", "Isolation;isolation #it{p}_{T} (GeV);Counts", iso_pT] + jet_pT: [1, "jet_pT", "Jet #it{p}_{T} spectrum;jet #it{p}_{T} (GeV);Counts", jet_pT] + jet_pT_coarse: [1, "jet_pT_coarse", "Jet #it{p}_{T} spectrum (coarse binning);jet #it{p}_{T} (GeV);Counts", jet_pT_coarse] + photon_E: [1, "photon_E", "Photon energy spectrum;photon energy (GeV);Counts", photon_E] + jet_photon_dphi: [1, "dphi", "Photon-jet #Delta#phi;#Delta#phi (radians);Counts", dphi] + y_jet_pT: [1, "y_jet_pT", "Photon-tagged jet #it{p}_{T} spectrum;jet #it{p}_{T} (GeV);Counts", jet_pT] + eec: [2, "eec", "Photon-tagged EEC;jet #it{p}_{T} (GeV);#it{R}_{L}", jet_pT, RL] + xjy: [2, "xjy", "Photon-jet imbalance;#it{x}_{J#gamma};photon energy (Gev)", xjy, photon_E] diff --git a/alian/config/example/example_rdf.yaml b/alian/config/example/example_rdf.yaml new file mode 100644 index 0000000..f694da0 --- /dev/null +++ b/alian/config/example/example_rdf.yaml @@ -0,0 +1,25 @@ +# to be used with alian/analysis/example/example_rdf.py +selections: + event: + event_sel: sel8 + trig_sel: GammaHighPtEMCAL, GammaHighPtDCAL + rct: CBT_calo + track: + pt_min: 0.150 + track_sel: globalTrack + cluster: + energy_min: 0.7 + m02_min: none + m02_max: none + ncells_min: 2 + time_min: -30 + time_max: 35 + exoticity: True + nlm: 2 + defn: 0 + edge: True + delta_phi: 0.05 + delta_eta: 0.05 + ep_max: 1.75 + isolation: + iso_pt_max: 1.5