From 513eaa6cd692de62562641259dff7674dde70082 Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Tue, 7 Apr 2026 18:35:18 -0500 Subject: [PATCH 1/9] Analyzer state to produce and plot plus_minus comparisons. --- analyzer/postprocessing/plots/plots_1d.py | 5 +- analyzer/postprocessing/plots/plots_2d.py | 2 +- .../single_stop/singlestop_base.yaml | 8 +- .../studies/signal_plus_minus_comp.yaml | 157 ++++++++++++++++++ 4 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 configurations/single_stop/studies/signal_plus_minus_comp.yaml diff --git a/analyzer/postprocessing/plots/plots_1d.py b/analyzer/postprocessing/plots/plots_1d.py index 822c1256..e61f6d71 100644 --- a/analyzer/postprocessing/plots/plots_1d.py +++ b/analyzer/postprocessing/plots/plots_1d.py @@ -213,7 +213,7 @@ def plotStackedDenominators(ax, denominators, styler, normalize=False): for item, meta in den_to_plot: hists.append(item.histogram) - titles.append(meta.get("title") or meta["dataset_title"]) + titles.append(meta.get("sample_name") or meta.get("title") or meta["dataset_title"]) style = styler.getStyle(meta) for key, value in style.get().items(): style_kwargs[key].append(value) @@ -233,6 +233,7 @@ def plotStackedDenominators(ax, denominators, styler, normalize=False): mplhep.histplot( den_total, ax=ax, + density=normalize, label="Den. Stat. Unc.", histtype="band", ) @@ -288,7 +289,7 @@ def plotMultiNumerators( hist.plot1d( ax=ax, - label=meta.get("title") or meta["dataset_title"], + label=meta.get("sample_name") or meta.get("title") or meta["dataset_title"], density=normalize, yerr=True, **style.get(), diff --git a/analyzer/postprocessing/plots/plots_2d.py b/analyzer/postprocessing/plots/plots_2d.py index 1fee8ec4..7d6b6002 100644 --- a/analyzer/postprocessing/plots/plots_2d.py +++ b/analyzer/postprocessing/plots/plots_2d.py @@ -48,7 +48,7 @@ def plot2D( [meta], plot_configuration=pc, metadata=common_meta, - extra_text=f"{common_meta['pipeline']}", + extra_text=f"{common_meta['sample_name']}\n{common_meta['pipeline']}", text_color="white", ) plt.close(fig) diff --git a/configurations/single_stop/singlestop_base.yaml b/configurations/single_stop/singlestop_base.yaml index cbc4fdcc..3824fdbb 100644 --- a/configurations/single_stop/singlestop_base.yaml +++ b/configurations/single_stop/singlestop_base.yaml @@ -144,7 +144,7 @@ model_path: "nn_models/nominalBinaryCompressed0p67_24-09-18-20-06/jetMatcherNNTraced.pt" scaler_path: "nn_models/nominalBinaryCompressed0p67_24-09-18-20-06/scaler.pkl" -- module_name: JetCombos +- module_name: JetComboHistograms input_col: GoodJet prefix: "composite" jet_combos: @@ -182,7 +182,6 @@ ratio_range: [0.0, 0.8] bins: 50 y_bins: 60 - {%- endmacro %} {% macro singlestop_extra_histograms() -%} @@ -191,6 +190,11 @@ input_col: GoodFatJet max_idx: 1 +- module_name: TopVecHistograms + prefix: "goodjet" + input_col: GoodJet + max_idx: 4 + - module_name: SimpleHistogram hist_name: HT input_cols: [HT] diff --git a/configurations/single_stop/studies/signal_plus_minus_comp.yaml b/configurations/single_stop/studies/signal_plus_minus_comp.yaml new file mode 100644 index 00000000..ec9d1911 --- /dev/null +++ b/configurations/single_stop/studies/signal_plus_minus_comp.yaml @@ -0,0 +1,157 @@ +{% import "singlestop_base.yaml" as base %} + +analyzer: + default_run_builder: + strategy_name: NoSystematics + + Signal312: + - module_name: SimpleHLT + triggers: [HT,AK8SingleJetPtNoTrim, AK8SingleJetPt] + should_run: + sample_type: Data + {{ base.event_level_cleanup() | indent(4) }} + - module_name: SelectOnColumns + sel_name: pre_selection + {{ base.jec_jer_corrections() | indent(4) }} + {{ base.singlestop_objects() | indent(4) }} + {{ base.weights() | indent(4) }} + - module_name: TriggerBNNCorrection + base_path: postprocessing/newtriggereffs/ + correction_pattern: "{era}.json.gz" + - module_name: NObjFilter + input_col: MedB + selection_name: 2bjet + min_count: 2 + - module_name: NObjFilter + input_col: TightB + selection_name: 1tightbjet + min_count: 1 + - module_name: VecDRSelection + input_col: MedB + selection_name: b_dr + min_dr: 1.0 + {{ base.singlestop_common_selection() | indent(4) }} + - module_name: SelectOnColumns + sel_name: selection + {{ base.singlestop_common_histograms() | indent(4) }} + {{ base.singlestop_extra_histograms() | indent(4) }} + + Signal313: + - module_name: SimpleHLT + triggers: [HT,AK8SingleJetPtNoTrim, AK8SingleJetPt] + should_run: + sample_type: Data + {{ base.event_level_cleanup() | indent(4) }} + - module_name: SelectOnColumns + sel_name: pre_selection + {{ base.jec_jer_corrections() | indent(4) }} + {{ base.singlestop_objects() | indent(4) }} + {{ base.weights() | indent(4) }} + - module_name: TriggerBNNCorrection + base_path: postprocessing/newtriggereffs/ + correction_pattern: "{era}.json.gz" + - module_name: NObjFilter + input_col: TightB + selection_name: 3tightbjet + min_count: 3 + - module_name: VecDRSelection + input_col: MedB + selection_name: b_dr + min_dr: 1.0 + {{ base.singlestop_common_selection() | indent(4) }} + - module_name: SelectOnColumns + sel_name: selection + {{ base.singlestop_common_histograms() | indent(4) }} + {{ base.singlestop_extra_histograms() | indent(4) }} + +location_priorities: [".*FNAL.*", ".*US.*", ".*(DE|IT|CH|FR).*", ".*(T0|T1|T2).*","eos"] + +event_collections: + - dataset: 'signal_2018_312_1500_600_official' + pipelines: [Signal312] + #- dataset: 'signal_2018_312_1500_400_official' + # pipelines: [Signal312] + #- dataset: 'signal_2018_312_1500_500_official' + # pipelines: [Signal312] + + +extra_executors: + test: + executor_name: ImmediateExecutor + chunk_size: 10000 + + test2: + executor_name: LocalDaskExecutor + chunk_size: 100000 + min_workers: 1 + max_workers: 3 + reduction_factor: 2 + + testcondor: + executor_name: LPCCondorDask + container: /cvmfs/unpacked.cern.ch/registry.hub.docker.com/coffeateam/coffea-dask-almalinux9:2025.10.2-py3.12 + venv_path: .venv + chunk_size: 80000 + min_workers: 40 + max_workers: 200 + worker_memory: 4GB + reduction_factor: 5 + +Postprocessing: + do_merge_and_scale: False + processors: + - name: RatioPlot + inputs: + - "*/*/*/*/composite_13_m" + - "*/*/*/*/composite_14_m" + - "*/*/*/*/composite_24_m" + - "*/*/*/*/comp_mChi" + - "*/*/*/*/comp_mStop" + - "*/*/*/*/uncomp_mChi" + - "*/*/*/*/uncomp_mStop" + - "*/*/*/*/HT" + - "*/*/*/*/fatjet_pt_1" + - "*/*/*/*/fatjet_eta_1" + - "*/*/*/*/fatjet_phi_1" + - "*/*/*/*/goodjet_pt_1" + - "*/*/*/*/goodjet_eta_1" + - "*/*/*/*/goodjet_phi_1" + - "*/*/*/*/goodjet_pt_2" + - "*/*/*/*/goodjet_eta_2" + - "*/*/*/*/goodjet_phi_2" + - "*/*/*/*/goodjet_pt_3" + - "*/*/*/*/goodjet_eta_3" + - "*/*/*/*/goodjet_phi_3" + - "*/*/*/*/goodjet_pt_4" + - "*/*/*/*/goodjet_eta_4" + - "*/*/*/*/goodjet_phi_4" + normalize: True + structure: + select: {dataset_name: "*", pipeline: Signal312} + group: {era.name: "*"} + subgroups: + denominator: + select: {sample_name: "*_plus"} + transforms: + - name: SelectAxesValues + select_axes_values: {"variation": "central"} + title: plus + numerator: + select: {sample_name: "*_minus"} + transforms: + - name: SelectAxesValues + select_axes_values: {"variation": "central"} + title: minus + output_name: "{prefix}/{era.name}/ratio_{dataset_name}_{name}.pdf" + - name: Histogram2D + inputs: + - "*/*/*/*/comp_mStop_vs_mChiRatio" + - "*/*/*/*/uncomp_mStop_vs_mChiRatio" + normalize: False + structure: + select: {dataset_name: "*", pipeline: "*"} + group: {era.name: "*", sample_name: "*"} + transforms: + - name: SelectAxesValues + select_axes_values: {variation: "central"} + output_name: "{prefix}/{era.name}/{sample_name}_{name}.pdf" From 028e205556eccb8204772dcb7bd02493f883171b Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Fri, 10 Apr 2026 10:58:41 -0500 Subject: [PATCH 2/9] Fix unc not being scaled correctly when ratio plot is normalized. --- analyzer/postprocessing/plots/plots_1d.py | 1 + 1 file changed, 1 insertion(+) diff --git a/analyzer/postprocessing/plots/plots_1d.py b/analyzer/postprocessing/plots/plots_1d.py index e61f6d71..e0a1e80b 100644 --- a/analyzer/postprocessing/plots/plots_1d.py +++ b/analyzer/postprocessing/plots/plots_1d.py @@ -183,6 +183,7 @@ def computeRatio(n, d, normalize=False, ratio_type="poisson"): if normalize: with np.errstate(divide="ignore", invalid="ignore"): ratio = (n / np.sum(n)) / (d / np.sum(d)) + unc = unc*(np.sum(d)/np.sum(n)) ratio[ratio == 0] = np.nan ratio[np.isinf(ratio)] = np.nan From cb91ecf52d3bf2d2ec81822e980b9091856441c7 Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Tue, 14 Apr 2026 23:56:08 -0500 Subject: [PATCH 3/9] WIP: Added pull plot and more bug fixes. --- analyzer/postprocessing/basic_histograms.py | 29 ++++- analyzer/postprocessing/plots/plots_1d.py | 11 +- analyzer/postprocessing/plots/plots_2d.py | 50 +++++++- .../studies/signal_plus_minus_comp.yaml | 116 ++++++++++++++++-- 4 files changed, 194 insertions(+), 12 deletions(-) diff --git a/analyzer/postprocessing/basic_histograms.py b/analyzer/postprocessing/basic_histograms.py index 158d967e..e511bedb 100644 --- a/analyzer/postprocessing/basic_histograms.py +++ b/analyzer/postprocessing/basic_histograms.py @@ -12,7 +12,7 @@ ) from .processors import BasePostprocessor from .plots.plots_1d import plotOne, plotRatio, plotRatioOfRatios -from .plots.plots_2d import plot2D +from .plots.plots_2d import plot2D, plot2DPulls from attrs import define, field ResultSet = list[list[ItemWithMeta]] @@ -192,3 +192,30 @@ def getRunFuncs(self, group, prefix=None): plot_configuration=self.plot_configuration, color_scale=self.scale, ) + + +@define +class HistogramPulls2D(BasePostprocessor): + output_name: str + scale: Literal["log", "linear"] = "linear" + normalize: bool = False + + def getRunFuncs(self, group, prefix=None): + hist1 = group["hist1"] + hist2 = group["hist2"] + common_meta = commonDict(it.chain(hist1, hist2)) + output_path = dotFormat( + self.output_name, prefix=prefix, **dict(dictToDot(common_meta)) + ) + pc = self.plot_configuration.makeFormatted(common_meta) + yield ft.partial( + plot2DPulls, + hist1[0], + hist2[0], + output_path, + self.style_set, + normalize=self.normalize, + plot_configuration=pc, + color_scale=self.scale, + override_axis_labels = None, + ) \ No newline at end of file diff --git a/analyzer/postprocessing/plots/plots_1d.py b/analyzer/postprocessing/plots/plots_1d.py index e0a1e80b..42a9e51c 100644 --- a/analyzer/postprocessing/plots/plots_1d.py +++ b/analyzer/postprocessing/plots/plots_1d.py @@ -142,15 +142,22 @@ def plotDictAsBars( fig, ax = plt.subplots(layout="constrained") for item, meta in items: title = meta.get("title") or meta["dataset_title"] + + if 'minus' in meta.get('sample_name'): + minus_plus = 'Minus' + else: + minus_plus = 'Plus' flow = getter(item) style = styler.getStyle(meta) h = makeStrHist([(x, y) for x, y in flow.items()], ax_name=ax_name) + if normalize: + h = h/h.values()[0] h.plot1d( ax=ax, - label=title, - density=normalize, + label=f'{minus_plus} '+title, **style.get(), ) + ax.legend() labelAxis(ax, "y", h.axes) labelAxis(ax, "x", h.axes) diff --git a/analyzer/postprocessing/plots/plots_2d.py b/analyzer/postprocessing/plots/plots_2d.py index 7d6b6002..4b612e89 100644 --- a/analyzer/postprocessing/plots/plots_2d.py +++ b/analyzer/postprocessing/plots/plots_2d.py @@ -1,7 +1,7 @@ import numpy as np import matplotlib import matplotlib.pyplot as plt - +import hist from analyzer.postprocessing.style import Styler @@ -139,3 +139,51 @@ def plot2DSigBkg( text_color="white", ) plt.close(fig) + +def plot2DPulls( + hist1, + hist2, + output_path, + style_set, + normalize=False, + plot_configuration=None, + color_scale="linear", + override_axis_labels=None + ): + + + override_axis_labels = override_axis_labels or {} + pc = plot_configuration or PlotConfiguration() + fig, ax = plt.subplots(layout="constrained") + item1, meta1 = hist1 + item2, meta2 = hist2 + h1 = item1.histogram + h2 = item2.histogram + + if normalize: + h1 = h1 / np.sum(h1.values()) + h2 = h2 / np.sum(h2.values()) + + pulls = (h2.values()-h1.values())/np.sqrt(h2.variances()) + pulls_hist = hist.Hist(*h1.axes) + pulls_hist[...] = pulls + + if color_scale == "log": + pulls_hist.plot2d(norm=matplotlib.colors.LogNorm(), ax=ax) + else: + pulls_hist.plot2d(ax=ax, norm=matplotlib.colors.TwoSlopeNorm(vmin=-10,vmax=10,vcenter=0), cmap='RdYlGn') + + common_meta = commonDict([meta1, meta2], key=lambda x: x) + #breakpoint() + addCMSBits( + ax, + [common_meta], + extra_text=f"{common_meta["pipeline"]}\n{common_meta["dataset_name"]}\nNormalized Pulls\n(Norm Plus-Norm Minus)/Norm Plus_Unc", + text_color="black", + plot_configuration=pc, + ) + + common_meta = commonDict([meta1, meta2], key=lambda x: x) + saveFig(fig, output_path, metadata=common_meta, extension=pc.image_type) + plt.close(fig) + diff --git a/configurations/single_stop/studies/signal_plus_minus_comp.yaml b/configurations/single_stop/studies/signal_plus_minus_comp.yaml index ec9d1911..7a61873b 100644 --- a/configurations/single_stop/studies/signal_plus_minus_comp.yaml +++ b/configurations/single_stop/studies/signal_plus_minus_comp.yaml @@ -67,12 +67,79 @@ analyzer: location_priorities: [".*FNAL.*", ".*US.*", ".*(DE|IT|CH|FR).*", ".*(T0|T1|T2).*","eos"] event_collections: + - dataset: 'signal_2018_312_1000_400_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1250_400_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1750_400_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1000_600_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1250_600_official' + pipelines: [Signal312] - dataset: 'signal_2018_312_1500_600_official' pipelines: [Signal312] - #- dataset: 'signal_2018_312_1500_400_official' - # pipelines: [Signal312] - #- dataset: 'signal_2018_312_1500_500_official' - # pipelines: [Signal312] + - dataset: 'signal_2018_312_1750_600_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_2000_600_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1000_900_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1250_900_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1250_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1500_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1750_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_2000_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1500_1400_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_1750_1400_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_2000_1400_official' + pipelines: [Signal312] + - dataset: 'signal_2018_312_2000_1700_official' + pipelines: [Signal312] + + - dataset: 'signal_2023_preBPix_312_1000_400_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1250_400_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1750_400_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1000_600_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1250_600_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1500_600_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1750_600_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_2000_600_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1000_900_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1250_900_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1250_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1500_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1750_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_2000_1100_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1500_1400_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_1750_1400_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_2000_1400_official' + pipelines: [Signal312] + - dataset: 'signal_2023_preBPix_312_2000_1700_official' + pipelines: [Signal312] extra_executors: @@ -128,7 +195,7 @@ Postprocessing: normalize: True structure: select: {dataset_name: "*", pipeline: Signal312} - group: {era.name: "*"} + group: {dataset_name: "*", era.name: "*"} subgroups: denominator: select: {sample_name: "*_plus"} @@ -142,7 +209,7 @@ Postprocessing: - name: SelectAxesValues select_axes_values: {"variation": "central"} title: minus - output_name: "{prefix}/{era.name}/ratio_{dataset_name}_{name}.pdf" + output_name: "{prefix}/{era.name}/{dataset_name}/ratio_{name}.pdf" - name: Histogram2D inputs: - "*/*/*/*/comp_mStop_vs_mChiRatio" @@ -150,8 +217,41 @@ Postprocessing: normalize: False structure: select: {dataset_name: "*", pipeline: "*"} - group: {era.name: "*", sample_name: "*"} + group: {dataset_name: "*", era.name: "*", sample_name: "*"} transforms: - name: SelectAxesValues select_axes_values: {variation: "central"} - output_name: "{prefix}/{era.name}/{sample_name}_{name}.pdf" + output_name: "{prefix}/{era.name}/{dataset_name}/{sample_name}_{name}.pdf" + - name: HistogramPulls2D + inputs: + - "*/*/*/*/comp_mStop_vs_mChiRatio" + - "*/*/*/*/uncomp_mStop_vs_mChiRatio" + normalize: True + scale: linear + structure: + select: {dataset_name: "*", pipeline: "*"} + group: {dataset_name: "*", era.name: "*"} + subgroups: + hist1: + select: {sample_name: "*_minus"} + transforms: + - name: SelectAxesValues + select_axes_values: {"variation": "central"} + title: minus + hist2: + select: {sample_name: "*_plus"} + transforms: + - name: SelectAxesValues + select_axes_values: {"variation": "central"} + title: plus + output_name: "{prefix}/{era.name}/{dataset_name}/{name}_pulls.pdf" + - name: PlotSelectionFlow + inputs: + - "*/*/*/*/pre_selection" + - "*/*/*/*/selection" + normalize: True + structure: + select: {dataset_name: "*", pipeline: "*"} + group: {dataset_name: "*", era.name: "*"} + output_name: "{prefix}/{era.name}/{dataset_name}/{name}.pdf" + From f114f9d9c13b4907992a8c555e26b10a0c52de45 Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Wed, 15 Apr 2026 13:47:59 -0500 Subject: [PATCH 4/9] Removal of a 2023 sample from the config that doesn't have a minus sample. --- configurations/single_stop/studies/signal_plus_minus_comp.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/configurations/single_stop/studies/signal_plus_minus_comp.yaml b/configurations/single_stop/studies/signal_plus_minus_comp.yaml index 7a61873b..8fb365a9 100644 --- a/configurations/single_stop/studies/signal_plus_minus_comp.yaml +++ b/configurations/single_stop/studies/signal_plus_minus_comp.yaml @@ -134,8 +134,6 @@ event_collections: pipelines: [Signal312] - dataset: 'signal_2023_preBPix_312_1500_1400_official' pipelines: [Signal312] - - dataset: 'signal_2023_preBPix_312_1750_1400_official' - pipelines: [Signal312] - dataset: 'signal_2023_preBPix_312_2000_1400_official' pipelines: [Signal312] - dataset: 'signal_2023_preBPix_312_2000_1700_official' From 9cc42e8a3ae26f53bb30d6d6639549588e02dc47 Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Wed, 22 Apr 2026 03:58:17 -0500 Subject: [PATCH 5/9] New plots and fix colors on pull plots, and fix uncertainties on cutflow. --- analyzer/postprocessing/basic_histograms.py | 27 ++++- analyzer/postprocessing/plots/plots_1d.py | 11 ++- analyzer/postprocessing/plots/plots_2d.py | 99 +++++++++++++++++-- .../transforms/hist_transforms.py | 15 +++ 4 files changed, 141 insertions(+), 11 deletions(-) diff --git a/analyzer/postprocessing/basic_histograms.py b/analyzer/postprocessing/basic_histograms.py index e511bedb..3a288575 100644 --- a/analyzer/postprocessing/basic_histograms.py +++ b/analyzer/postprocessing/basic_histograms.py @@ -12,7 +12,7 @@ ) from .processors import BasePostprocessor from .plots.plots_1d import plotOne, plotRatio, plotRatioOfRatios -from .plots.plots_2d import plot2D, plot2DPulls +from .plots.plots_2d import plot2D, plot2DPulls, plotEffRatio from attrs import define, field ResultSet = list[list[ItemWithMeta]] @@ -218,4 +218,29 @@ def getRunFuncs(self, group, prefix=None): plot_configuration=pc, color_scale=self.scale, override_axis_labels = None, + ) + +@define +class Histogram2DEffRatio(BasePostprocessor): + output_name: str + scale: Literal["log", "linear"] = "linear" + normalize: bool = False + + def getRunFuncs(self, group, prefix=None): + num = group["num"] + den = group["den"] + common_meta = commonDict(it.chain(num, den)) + output_path = dotFormat( + self.output_name, prefix=prefix, **dict(dictToDot(common_meta)) + ) + pc = self.plot_configuration.makeFormatted(common_meta) + yield ft.partial( + plotEffRatio, + num, + den, + output_path, + self.style_set, + plot_configuration=pc, + color_scale=self.scale, + override_axis_labels = None, ) \ No newline at end of file diff --git a/analyzer/postprocessing/plots/plots_1d.py b/analyzer/postprocessing/plots/plots_1d.py index 42a9e51c..e6b8eb16 100644 --- a/analyzer/postprocessing/plots/plots_1d.py +++ b/analyzer/postprocessing/plots/plots_1d.py @@ -118,9 +118,11 @@ def plotOne( def makeStrHist(data, ax_name): import hist - ax = hist.axis.StrCategory([x[0] for x in data], name=ax_name) - h = hist.Hist(ax, storage="double") - h[:] = np.array([x[1] for x in data]) + cat = np.array([x[0] for x in data]) + ax = hist.axis.StrCategory(cat, name=ax_name, growth=True) + h = hist.Hist(ax, storage="weight") + data_vals = np.array([x[1] for x in data]) + h[...] = np.stack([data_vals, data_vals], axis=-1) return h @@ -139,6 +141,7 @@ def plotDictAsBars( styler = Styler(style_set) mpl.use("Agg") + import hist fig, ax = plt.subplots(layout="constrained") for item, meta in items: title = meta.get("title") or meta["dataset_title"] @@ -151,7 +154,7 @@ def plotDictAsBars( style = styler.getStyle(meta) h = makeStrHist([(x, y) for x, y in flow.items()], ax_name=ax_name) if normalize: - h = h/h.values()[0] + h = h/(h.values()[0]) h.plot1d( ax=ax, label=f'{minus_plus} '+title, diff --git a/analyzer/postprocessing/plots/plots_2d.py b/analyzer/postprocessing/plots/plots_2d.py index 4b612e89..896e40fb 100644 --- a/analyzer/postprocessing/plots/plots_2d.py +++ b/analyzer/postprocessing/plots/plots_2d.py @@ -164,26 +164,113 @@ def plot2DPulls( h1 = h1 / np.sum(h1.values()) h2 = h2 / np.sum(h2.values()) - pulls = (h2.values()-h1.values())/np.sqrt(h2.variances()) - pulls_hist = hist.Hist(*h1.axes) - pulls_hist[...] = pulls + with np.errstate(divide='ignore', invalid='ignore'): + pulls = (h2.values()-h1.values())/np.sqrt(h2.variances()+h1.variances()) + pulls_hist = hist.Hist(*h1.axes) + pulls_hist[...] = pulls if color_scale == "log": pulls_hist.plot2d(norm=matplotlib.colors.LogNorm(), ax=ax) else: - pulls_hist.plot2d(ax=ax, norm=matplotlib.colors.TwoSlopeNorm(vmin=-10,vmax=10,vcenter=0), cmap='RdYlGn') + pulls_hist.plot2d(ax=ax, norm=matplotlib.colors.TwoSlopeNorm(vmin=-5,vmax=5,vcenter=0), cmap='bwr') common_meta = commonDict([meta1, meta2], key=lambda x: x) + import re + dataset_name_numbers = re.findall(r'\d+', common_meta["dataset_name"]) #breakpoint() addCMSBits( ax, [common_meta], - extra_text=f"{common_meta["pipeline"]}\n{common_meta["dataset_name"]}\nNormalized Pulls\n(Norm Plus-Norm Minus)/Norm Plus_Unc", + extra_text=f"{common_meta["pipeline"]}\n{dataset_name_numbers[-2]}_{dataset_name_numbers[-1]}\nNormalized Pulls\n(Norm Plus-Norm Minus)\n/Sqrt(Var_Sum)", text_color="black", plot_configuration=pc, ) common_meta = commonDict([meta1, meta2], key=lambda x: x) saveFig(fig, output_path, metadata=common_meta, extension=pc.image_type) - plt.close(fig) + plt.close(fig) + +def plotEffRatio( + num_hists, + den_hists, + output_path, + style_set, + plot_configuration=None, + color_scale="linear", + override_axis_labels=None, +): + import re + import mplhep as hep + plt.style.use(hep.style.CMS) + override_axis_labels = override_axis_labels or {} + pc = plot_configuration or PlotConfiguration() + fig, ax = plt.subplots(layout="constrained") + ratio_eff = [] + x_values = [] + y_values = [] + metas = [] + labels = [] + for num_hist in num_hists: + for den_hist in den_hists: + num_item, num_meta = num_hist + den_item, den_meta = den_hist + den_name_numbers = re.findall(r'\d+', den_meta["dataset_name"]) + num_name_numbers = re.findall(r'\d+', num_meta["dataset_name"]) + if num_name_numbers != den_name_numbers: + continue + else: + metas.append(num_meta) + x_values.append(int(num_name_numbers[-2])) + y_values.append(int(num_name_numbers[-1])) + + num = num_item.cutflow + den = den_item.cutflow + with np.errstate(divide='ignore', invalid='ignore'): + n_init = num["initial"] + d_init = den["initial"] + n_count = list(num.values())[-1] + d_count = list(den.values())[-1] + num_eff = n_count/n_init + den_eff = d_count/d_init + ratio = num_eff/den_eff + + #Ratio of two binomials is approx log normal, calculate two sided error in 'log space' and then go back. + log_val = np.log(ratio) + log_sf_var = (1/n_count) + (1/d_count) - (1/n_init) - (1/d_init) + log_sf_sigma = np.sqrt(max(0, log_sf_var)) + upper = np.exp(log_val + log_sf_sigma) + lower = np.exp(log_val - log_sf_sigma) + err_up = upper - ratio + err_down = ratio - lower + + ratio_eff.append(num_eff/den_eff) + label_str = f"${ratio:.3g}^{{+{err_up:.3g}}}_{{-{err_down:.3g}}}$" + labels.append(label_str) + + viridis_clipped = matplotlib.colors.LinearSegmentedColormap.from_list( + 'viridis_clipped', matplotlib.cm.viridis(np.linspace(0.2, 1.0, 256)) + ) + sc = ax.scatter( + x_values, + y_values, + c=ratio_eff, + cmap=viridis_clipped, + marker='s', + s=2500, + ) + for x, y, txt in zip(x_values, y_values, labels): + ax.text(x, y, txt, ha='center', va='center', fontsize=10, color='black') + + ax.set_xlabel(override_axis_labels.get("x", "$m_{{\\mathit{{\\tilde t_1}}}}$")) + ax.set_ylabel(override_axis_labels.get("y", "$m_{{\\mathit{{\\tilde \\chi^{{\\pm}}_1}}}}$")) + fig.colorbar(sc, ax=ax, label="PlusEff/MinusEff") + addCMSBits( + ax, + [num_meta], + extra_text=f"{num_meta["pipeline"]}", + text_color="black", + plot_configuration=pc, + ) + saveFig(fig, output_path, metadata=num_meta, extension=pc.image_type) + plt.close(fig) \ No newline at end of file diff --git a/analyzer/postprocessing/transforms/hist_transforms.py b/analyzer/postprocessing/transforms/hist_transforms.py index dd77a582..b2ef16c6 100644 --- a/analyzer/postprocessing/transforms/hist_transforms.py +++ b/analyzer/postprocessing/transforms/hist_transforms.py @@ -473,3 +473,18 @@ def makeSlice(x_slice, y_slice): ) return ret + +@define +class NormalizeByXSec(TransformHistogram): + def __call__(self, items: list[ItemWithMeta]): + ret = [] + for item, meta in items: + h = item.histogram + nh = h*(meta["x_sec"]*meta["era"]["lumi"])/meta["n_events"] + # new_axes = [x for x in item.axes if x.name not in select_axes_values] + ret.append( + ItemWithMeta( + Histogram(name=h.name, axes=None, histogram=nh), metadata=meta + ) + ) + return ret From 7558c268b7b54af139cd301e422b2ce59eec9312 Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Wed, 22 Apr 2026 04:39:36 -0500 Subject: [PATCH 6/9] Small updates and fix config. --- .../transforms/hist_transforms.py | 4 +- .../studies/signal_plus_minus_comp.yaml | 117 +++++++++++++++--- 2 files changed, 105 insertions(+), 16 deletions(-) diff --git a/analyzer/postprocessing/transforms/hist_transforms.py b/analyzer/postprocessing/transforms/hist_transforms.py index b2ef16c6..566764b4 100644 --- a/analyzer/postprocessing/transforms/hist_transforms.py +++ b/analyzer/postprocessing/transforms/hist_transforms.py @@ -477,10 +477,12 @@ def makeSlice(x_slice, y_slice): @define class NormalizeByXSec(TransformHistogram): def __call__(self, items: list[ItemWithMeta]): + unique_processes = {(meta["dataset_name"], meta["x_sec"]) for _, meta in items} + total_xsec = sum(xsec for _, xsec in unique_processes) ret = [] for item, meta in items: h = item.histogram - nh = h*(meta["x_sec"]*meta["era"]["lumi"])/meta["n_events"] + nh = h*(meta["x_sec"]*meta["era"]["lumi"])/(meta["n_events"]*total_xsec) # new_axes = [x for x in item.axes if x.name not in select_axes_values] ret.append( ItemWithMeta( diff --git a/configurations/single_stop/studies/signal_plus_minus_comp.yaml b/configurations/single_stop/studies/signal_plus_minus_comp.yaml index 8fb365a9..80f6a84e 100644 --- a/configurations/single_stop/studies/signal_plus_minus_comp.yaml +++ b/configurations/single_stop/studies/signal_plus_minus_comp.yaml @@ -139,6 +139,75 @@ event_collections: - dataset: 'signal_2023_preBPix_312_2000_1700_official' pipelines: [Signal312] + - dataset: 'signal_2018_313_1000_400_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1250_400_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1750_400_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1000_600_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1250_600_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1500_600_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1750_600_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_2000_600_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1000_900_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1250_900_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1250_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1500_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1750_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_2000_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1500_1400_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_1750_1400_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_2000_1400_official' + pipelines: [Signal313] + - dataset: 'signal_2018_313_2000_1700_official' + pipelines: [Signal313] + + - dataset: 'signal_2023_preBPix_313_1000_400_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1250_400_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1000_600_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1250_600_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1500_600_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1750_600_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_2000_600_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1000_900_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1250_900_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1250_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1500_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1750_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_2000_1100_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_1500_1400_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_2000_1400_official' + pipelines: [Signal313] + - dataset: 'signal_2023_preBPix_313_2000_1700_official' + pipelines: [Signal313] extra_executors: test: @@ -190,36 +259,39 @@ Postprocessing: - "*/*/*/*/goodjet_pt_4" - "*/*/*/*/goodjet_eta_4" - "*/*/*/*/goodjet_phi_4" - normalize: True + normalize: False structure: - select: {dataset_name: "*", pipeline: Signal312} - group: {dataset_name: "*", era.name: "*"} + select: {dataset_name: "*"} + group: {dataset_name: "*", era.name: "*", pipeline: "*"} subgroups: denominator: - select: {sample_name: "*_plus"} + select: {sample_name: "*_minus"} transforms: - name: SelectAxesValues select_axes_values: {"variation": "central"} + - name: NormalizeByXSec title: plus numerator: - select: {sample_name: "*_minus"} + select: {sample_name: "*_plus"} transforms: - name: SelectAxesValues select_axes_values: {"variation": "central"} + - name: NormalizeByXSec title: minus - output_name: "{prefix}/{era.name}/{dataset_name}/ratio_{name}.pdf" + output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/ratio_{name}.pdf" - name: Histogram2D inputs: - "*/*/*/*/comp_mStop_vs_mChiRatio" - "*/*/*/*/uncomp_mStop_vs_mChiRatio" normalize: False structure: - select: {dataset_name: "*", pipeline: "*"} - group: {dataset_name: "*", era.name: "*", sample_name: "*"} + select: {dataset_name: "*"} + group: {dataset_name: "*", era.name: "*", sample_name: "*", pipeline: "*"} transforms: - name: SelectAxesValues select_axes_values: {variation: "central"} - output_name: "{prefix}/{era.name}/{dataset_name}/{sample_name}_{name}.pdf" + - name: NormalizeByXSec + output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{sample_name}_{name}.pdf" - name: HistogramPulls2D inputs: - "*/*/*/*/comp_mStop_vs_mChiRatio" @@ -227,22 +299,24 @@ Postprocessing: normalize: True scale: linear structure: - select: {dataset_name: "*", pipeline: "*"} - group: {dataset_name: "*", era.name: "*"} + select: {dataset_name: "*"} + group: {dataset_name: "*", era.name: "*", pipeline: "*"} subgroups: hist1: select: {sample_name: "*_minus"} transforms: - name: SelectAxesValues select_axes_values: {"variation": "central"} + - name: NormalizeByXSec title: minus hist2: select: {sample_name: "*_plus"} transforms: - name: SelectAxesValues select_axes_values: {"variation": "central"} + - name: NormalizeByXSec title: plus - output_name: "{prefix}/{era.name}/{dataset_name}/{name}_pulls.pdf" + output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{name}_pulls.pdf" - name: PlotSelectionFlow inputs: - "*/*/*/*/pre_selection" @@ -250,6 +324,19 @@ Postprocessing: normalize: True structure: select: {dataset_name: "*", pipeline: "*"} - group: {dataset_name: "*", era.name: "*"} - output_name: "{prefix}/{era.name}/{dataset_name}/{name}.pdf" - + group: {dataset_name: "*", era.name: "*", pipeline: "*"} + output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{name}.pdf" + - name: Histogram2DEffRatio + inputs: + - "*/*/*/*/pre_selection" + - "*/*/*/*/selection" + normalize: False + structure: + select: {dataset_name: "*"} + group: {era.name: "*", "pipeline": "*"} + subgroups: + num: + select: {sample_name: "*_plus"} + den: + select: {sample_name: "*_minus"} + output_name: "{prefix}/{era.name}/{pipeline}/signal_eff_plane.pdf" From 962139aaf906e96063dcdbd176a26b76ddf79f7c Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Wed, 22 Apr 2026 06:27:10 -0500 Subject: [PATCH 7/9] Fix bug with ratio errors. --- analyzer/postprocessing/basic_histograms.py | 2 + analyzer/postprocessing/plots/plots_1d.py | 41 ++++++++++++++----- analyzer/postprocessing/plots/plots_2d.py | 2 +- .../transforms/hist_transforms.py | 2 +- .../studies/signal_plus_minus_comp.yaml | 14 ++++--- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/analyzer/postprocessing/basic_histograms.py b/analyzer/postprocessing/basic_histograms.py index 3a288575..69722fd5 100644 --- a/analyzer/postprocessing/basic_histograms.py +++ b/analyzer/postprocessing/basic_histograms.py @@ -64,6 +64,7 @@ class RatioPlot(BasePostprocessor): "poisson" ) no_stack: bool = False + xsec_normalize: bool = False def getRunFuncs(self, group, prefix=None): numerator = group["numerator"] @@ -87,6 +88,7 @@ def getRunFuncs(self, group, prefix=None): ratio_height=self.ratio_height, no_stack=self.no_stack, plot_configuration=pc, + xsec_normalize=self.xsec_normalize, ) diff --git a/analyzer/postprocessing/plots/plots_1d.py b/analyzer/postprocessing/plots/plots_1d.py index e6b8eb16..6c9b2497 100644 --- a/analyzer/postprocessing/plots/plots_1d.py +++ b/analyzer/postprocessing/plots/plots_1d.py @@ -248,7 +248,7 @@ def plotStackedDenominators(ax, denominators, styler, normalize=False): label="Den. Stat. Unc.", histtype="band", ) - return den_total + return den_total, denominators def plotUnstackedDenominators(ax, denominators, styler, *, normalize): @@ -275,13 +275,14 @@ def plotMultiNumerators( ax, ratio_ax, numerators, - den_total, + den_stacked, styler, normalize, ratio_type, x_values, ratio_func=computeRatio, show_den_unc=True, + xsec_normalize=False, ): for item, meta in numerators: @@ -289,15 +290,31 @@ def plotMultiNumerators( style = styler.getStyle(meta) n_vals = hist.values() + den_total = den_stacked[0] d_vals = den_total.values() + dmeta = den_stacked[1][0].metadata + + if xsec_normalize: + total_xsec = dmeta['x_sec']+meta['x_sec'] + inv_den_weight=(dmeta['n_events']*total_xsec)/dmeta['x_sec'] + inv_num_weight=(meta['n_events']*total_xsec)/meta['x_sec'] + + ratio, unc = ratio_func( + n_vals*inv_num_weight, + d_vals*inv_den_weight, + normalize=normalize, + ratio_type=ratio_type, + ) - ratio, unc = ratio_func( - n_vals, - d_vals, - normalize=normalize, - ratio_type=ratio_type, - ) - + ratio *= inv_den_weight/inv_num_weight + unc *= inv_den_weight/inv_num_weight + else: + ratio, unc = ratio_func( + n_vals, + d_vals, + normalize=normalize, + ratio_type=ratio_type, + ) hist.plot1d( ax=ax, label=meta.get("sample_name") or meta.get("title") or meta["dataset_title"], @@ -377,6 +394,7 @@ def plotRatio( no_stack=False, ratio_hlines=(1.0,), ratio_height=0.3, + xsec_normalize=False, ): pc = plot_configuration or PlotConfiguration() styler = Styler(style_set) @@ -409,7 +427,7 @@ def plotRatio( ratio_func=ratio_func, ) else: - den_total = plotStackedDenominators( + den_stacked = plotStackedDenominators( ax, denominator, styler, @@ -419,12 +437,13 @@ def plotRatio( ax, ratio_ax, numerators, - den_total, + den_stacked, styler, normalize=normalize, ratio_type=ratio_type, x_values=x_values, ratio_func=ratio_func, + xsec_normalize=xsec_normalize, ) for y in ratio_hlines: diff --git a/analyzer/postprocessing/plots/plots_2d.py b/analyzer/postprocessing/plots/plots_2d.py index 896e40fb..99487e85 100644 --- a/analyzer/postprocessing/plots/plots_2d.py +++ b/analyzer/postprocessing/plots/plots_2d.py @@ -263,7 +263,7 @@ def plotEffRatio( ax.set_xlabel(override_axis_labels.get("x", "$m_{{\\mathit{{\\tilde t_1}}}}$")) ax.set_ylabel(override_axis_labels.get("y", "$m_{{\\mathit{{\\tilde \\chi^{{\\pm}}_1}}}}$")) - fig.colorbar(sc, ax=ax, label="PlusEff/MinusEff") + fig.colorbar(sc, ax=ax, label="MinusEff/PlusEff") addCMSBits( ax, [num_meta], diff --git a/analyzer/postprocessing/transforms/hist_transforms.py b/analyzer/postprocessing/transforms/hist_transforms.py index 566764b4..8682cafd 100644 --- a/analyzer/postprocessing/transforms/hist_transforms.py +++ b/analyzer/postprocessing/transforms/hist_transforms.py @@ -482,7 +482,7 @@ def __call__(self, items: list[ItemWithMeta]): ret = [] for item, meta in items: h = item.histogram - nh = h*(meta["x_sec"]*meta["era"]["lumi"])/(meta["n_events"]*total_xsec) + nh = h*(meta["x_sec"])/(meta["n_events"]*total_xsec) # new_axes = [x for x in item.axes if x.name not in select_axes_values] ret.append( ItemWithMeta( diff --git a/configurations/single_stop/studies/signal_plus_minus_comp.yaml b/configurations/single_stop/studies/signal_plus_minus_comp.yaml index 80f6a84e..bb85448b 100644 --- a/configurations/single_stop/studies/signal_plus_minus_comp.yaml +++ b/configurations/single_stop/studies/signal_plus_minus_comp.yaml @@ -259,20 +259,22 @@ Postprocessing: - "*/*/*/*/goodjet_pt_4" - "*/*/*/*/goodjet_eta_4" - "*/*/*/*/goodjet_phi_4" - normalize: False + normalize: False + xsec_normalize: True + ratio_type: poisson-ratio structure: select: {dataset_name: "*"} group: {dataset_name: "*", era.name: "*", pipeline: "*"} subgroups: denominator: - select: {sample_name: "*_minus"} + select: {sample_name: "*_plus"} transforms: - name: SelectAxesValues select_axes_values: {"variation": "central"} - name: NormalizeByXSec title: plus numerator: - select: {sample_name: "*_plus"} + select: {sample_name: "*_minus"} transforms: - name: SelectAxesValues select_axes_values: {"variation": "central"} @@ -296,7 +298,7 @@ Postprocessing: inputs: - "*/*/*/*/comp_mStop_vs_mChiRatio" - "*/*/*/*/uncomp_mStop_vs_mChiRatio" - normalize: True + normalize: False scale: linear structure: select: {dataset_name: "*"} @@ -336,7 +338,7 @@ Postprocessing: group: {era.name: "*", "pipeline": "*"} subgroups: num: - select: {sample_name: "*_plus"} - den: select: {sample_name: "*_minus"} + den: + select: {sample_name: "*_plus"} output_name: "{prefix}/{era.name}/{pipeline}/signal_eff_plane.pdf" From e261c0297c074216aa2e6f48c4bde6ad18b1aad5 Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Thu, 23 Apr 2026 14:02:22 -0500 Subject: [PATCH 8/9] WIP saving work --- analyzer/postprocessing/plots/plots_1d.py | 6 +++--- .../postprocessing/transforms/hist_transforms.py | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/analyzer/postprocessing/plots/plots_1d.py b/analyzer/postprocessing/plots/plots_1d.py index 6c9b2497..098cb641 100644 --- a/analyzer/postprocessing/plots/plots_1d.py +++ b/analyzer/postprocessing/plots/plots_1d.py @@ -295,9 +295,9 @@ def plotMultiNumerators( dmeta = den_stacked[1][0].metadata if xsec_normalize: - total_xsec = dmeta['x_sec']+meta['x_sec'] - inv_den_weight=(dmeta['n_events']*total_xsec)/dmeta['x_sec'] - inv_num_weight=(meta['n_events']*total_xsec)/meta['x_sec'] + plus_xsec = dmeta['x_sec'] + inv_den_weight=1 + inv_num_weight=meta['x_sec']/plus_xsec ratio, unc = ratio_func( n_vals*inv_num_weight, diff --git a/analyzer/postprocessing/transforms/hist_transforms.py b/analyzer/postprocessing/transforms/hist_transforms.py index 8682cafd..39bb6a27 100644 --- a/analyzer/postprocessing/transforms/hist_transforms.py +++ b/analyzer/postprocessing/transforms/hist_transforms.py @@ -477,12 +477,21 @@ def makeSlice(x_slice, y_slice): @define class NormalizeByXSec(TransformHistogram): def __call__(self, items: list[ItemWithMeta]): - unique_processes = {(meta["dataset_name"], meta["x_sec"]) for _, meta in items} - total_xsec = sum(xsec for _, xsec in unique_processes) + unique_processes = {meta["dataset_name"]: meta["x_sec"] for _, meta in items} + + plus_xsec = next(v for k, v in unique_processes.items() if 'plus' in k) + minus_xsec = next(v for k, v in unique_processes.items() if 'minus' in k) + + scale = plus_xsec / minus_xsec + + scaled_xsecs = { + k: (v * scale if 'minus' in k else v) + for k, v in unique_processes.items() + } ret = [] for item, meta in items: h = item.histogram - nh = h*(meta["x_sec"])/(meta["n_events"]*total_xsec) + nh = h*(plus_xsec/meta["x_sec"]) # new_axes = [x for x in item.axes if x.name not in select_axes_values] ret.append( ItemWithMeta( From 69babc4b1edb7e1e916ef4d47290205dd4574f3e Mon Sep 17 00:00:00 2001 From: Seth Bendigo Date: Tue, 12 May 2026 13:25:22 -0500 Subject: [PATCH 9/9] More plot edits and fix to xsec --- analyzer/postprocessing/basic_histograms.py | 27 ++- analyzer/postprocessing/plots/plots_1d.py | 49 +++-- analyzer/postprocessing/plots/plots_2d.py | 126 +++++++++-- .../transforms/hist_transforms.py | 27 ++- .../studies/signal_plus_minus_comp.yaml | 206 ++++++++++-------- 5 files changed, 290 insertions(+), 145 deletions(-) diff --git a/analyzer/postprocessing/basic_histograms.py b/analyzer/postprocessing/basic_histograms.py index 69722fd5..ee7155c8 100644 --- a/analyzer/postprocessing/basic_histograms.py +++ b/analyzer/postprocessing/basic_histograms.py @@ -12,7 +12,7 @@ ) from .processors import BasePostprocessor from .plots.plots_1d import plotOne, plotRatio, plotRatioOfRatios -from .plots.plots_2d import plot2D, plot2DPulls, plotEffRatio +from .plots.plots_2d import plot2D, plot2DPulls, plotEffRatio, plotMinusToPlusNEventsRatio from attrs import define, field ResultSet = list[list[ItemWithMeta]] @@ -245,4 +245,29 @@ def getRunFuncs(self, group, prefix=None): plot_configuration=pc, color_scale=self.scale, override_axis_labels = None, + ) + +@define +class Histogram2DNRatio(BasePostprocessor): + output_name: str + scale: Literal["log", "linear"] = "linear" + normalize: bool = False + + def getRunFuncs(self, group, prefix=None): + num = group["num"] + den = group["den"] + common_meta = commonDict(it.chain(num, den)) + output_path = dotFormat( + self.output_name, prefix=prefix, **dict(dictToDot(common_meta)) + ) + pc = self.plot_configuration.makeFormatted(common_meta) + yield ft.partial( + plotMinusToPlusNEventsRatio, + num, + den, + output_path, + self.style_set, + plot_configuration=pc, + color_scale=self.scale, + override_axis_labels = None, ) \ No newline at end of file diff --git a/analyzer/postprocessing/plots/plots_1d.py b/analyzer/postprocessing/plots/plots_1d.py index 098cb641..562092f3 100644 --- a/analyzer/postprocessing/plots/plots_1d.py +++ b/analyzer/postprocessing/plots/plots_1d.py @@ -142,25 +142,26 @@ def plotDictAsBars( mpl.use("Agg") import hist + fig, ax = plt.subplots(layout="constrained") for item, meta in items: title = meta.get("title") or meta["dataset_title"] - if 'minus' in meta.get('sample_name'): - minus_plus = 'Minus' + if "minus" in meta.get("sample_name"): + minus_plus = "Minus" else: - minus_plus = 'Plus' + minus_plus = "Plus" flow = getter(item) style = styler.getStyle(meta) h = makeStrHist([(x, y) for x, y in flow.items()], ax_name=ax_name) if normalize: - h = h/(h.values()[0]) + h = h / (h.values()[0]) h.plot1d( ax=ax, - label=f'{minus_plus} '+title, + label=f"{minus_plus} " + title, **style.get(), ) - + ax.legend() labelAxis(ax, "y", h.axes) labelAxis(ax, "x", h.axes) @@ -193,7 +194,7 @@ def computeRatio(n, d, normalize=False, ratio_type="poisson"): if normalize: with np.errstate(divide="ignore", invalid="ignore"): ratio = (n / np.sum(n)) / (d / np.sum(d)) - unc = unc*(np.sum(d)/np.sum(n)) + unc = unc * (np.sum(d) / np.sum(n)) ratio[ratio == 0] = np.nan ratio[np.isinf(ratio)] = np.nan @@ -224,7 +225,9 @@ def plotStackedDenominators(ax, denominators, styler, normalize=False): for item, meta in den_to_plot: hists.append(item.histogram) - titles.append(meta.get("sample_name") or meta.get("title") or meta["dataset_title"]) + titles.append( + meta.get("sample_name") or meta.get("title") or meta["dataset_title"] + ) style = styler.getStyle(meta) for key, value in style.get().items(): style_kwargs[key].append(value) @@ -295,19 +298,31 @@ def plotMultiNumerators( dmeta = den_stacked[1][0].metadata if xsec_normalize: - plus_xsec = dmeta['x_sec'] - inv_den_weight=1 - inv_num_weight=meta['x_sec']/plus_xsec - + # plus_xsec = dmeta['x_sec'] + # minus_xsec = meta['x_sec'] + # total_xsec = plus_xsec+minus_xsec + # lumi = meta['era']['lumi'] + ##n_n_events = meta['n_events'] + # n_n_events = hist.sum().value + # d_n_events = den_total.sum().value + ##d_n_events = dmeta['n_events'] + # den_weight = (lumi*total_xsec)/d_n_events + # num_weight = (lumi*total_xsec)/n_n_events + # ratio, unc = ratio_func( + # n_vals/num_weight, + # d_vals/den_weight, + # normalize=normalize, + # ratio_type=ratio_type, + # ) + + # ratio *= num_weight/den_weight + # unc *= num_weight/den_weight ratio, unc = ratio_func( - n_vals*inv_num_weight, - d_vals*inv_den_weight, + n_vals, + d_vals, normalize=normalize, ratio_type=ratio_type, ) - - ratio *= inv_den_weight/inv_num_weight - unc *= inv_den_weight/inv_num_weight else: ratio, unc = ratio_func( n_vals, diff --git a/analyzer/postprocessing/plots/plots_2d.py b/analyzer/postprocessing/plots/plots_2d.py index 99487e85..76ca834c 100644 --- a/analyzer/postprocessing/plots/plots_2d.py +++ b/analyzer/postprocessing/plots/plots_2d.py @@ -12,6 +12,17 @@ import mplhep +def getRatioAndUnc(num, den, uncertainty_type="poisson-ratio"): + import hist.intervals as hinter + + with np.errstate(divide="ignore", invalid="ignore"): + ratios = num / den + unc = hinter.ratio_uncertainty( + num=num, denom=den, uncertainty_type=uncertainty_type + ) + return ratios, unc + + def plot2D( histogram, common_meta, @@ -141,7 +152,7 @@ def plot2DSigBkg( plt.close(fig) def plot2DPulls( - hist1, + hist1, hist2, output_path, style_set, @@ -163,7 +174,6 @@ def plot2DPulls( if normalize: h1 = h1 / np.sum(h1.values()) h2 = h2 / np.sum(h2.values()) - with np.errstate(divide='ignore', invalid='ignore'): pulls = (h2.values()-h1.values())/np.sqrt(h2.variances()+h1.variances()) pulls_hist = hist.Hist(*h1.axes) @@ -175,13 +185,13 @@ def plot2DPulls( pulls_hist.plot2d(ax=ax, norm=matplotlib.colors.TwoSlopeNorm(vmin=-5,vmax=5,vcenter=0), cmap='bwr') common_meta = commonDict([meta1, meta2], key=lambda x: x) - import re + import re dataset_name_numbers = re.findall(r'\d+', common_meta["dataset_name"]) - #breakpoint() + #breakpoint() addCMSBits( ax, [common_meta], - extra_text=f"{common_meta["pipeline"]}\n{dataset_name_numbers[-2]}_{dataset_name_numbers[-1]}\nNormalized Pulls\n(Norm Plus-Norm Minus)\n/Sqrt(Var_Sum)", + extra_text=f"{common_meta["pipeline"]}\n{dataset_name_numbers[-2]}_{dataset_name_numbers[-1]}\nPulls\n(Plus-Minus)\n/Sqrt(Var_Sum)", text_color="black", plot_configuration=pc, ) @@ -191,28 +201,28 @@ def plot2DPulls( plt.close(fig) def plotEffRatio( - num_hists, + num_hists, den_hists, output_path, style_set, plot_configuration=None, color_scale="linear", override_axis_labels=None, -): +): import re import mplhep as hep plt.style.use(hep.style.CMS) override_axis_labels = override_axis_labels or {} pc = plot_configuration or PlotConfiguration() fig, ax = plt.subplots(layout="constrained") - ratio_eff = [] - x_values = [] + ratio_eff = [] + x_values = [] y_values = [] metas = [] labels = [] for num_hist in num_hists: for den_hist in den_hists: - num_item, num_meta = num_hist + num_item, num_meta = num_hist den_item, den_meta = den_hist den_name_numbers = re.findall(r'\d+', den_meta["dataset_name"]) num_name_numbers = re.findall(r'\d+', num_meta["dataset_name"]) @@ -223,14 +233,14 @@ def plotEffRatio( x_values.append(int(num_name_numbers[-2])) y_values.append(int(num_name_numbers[-1])) - num = num_item.cutflow + num = num_item.cutflow den = den_item.cutflow with np.errstate(divide='ignore', invalid='ignore'): n_init = num["initial"] d_init = den["initial"] n_count = list(num.values())[-1] d_count = list(den.values())[-1] - num_eff = n_count/n_init + num_eff = n_count/n_init den_eff = d_count/d_init ratio = num_eff/den_eff @@ -249,13 +259,13 @@ def plotEffRatio( viridis_clipped = matplotlib.colors.LinearSegmentedColormap.from_list( 'viridis_clipped', matplotlib.cm.viridis(np.linspace(0.2, 1.0, 256)) - ) + ) sc = ax.scatter( - x_values, - y_values, - c=ratio_eff, - cmap=viridis_clipped, - marker='s', + x_values, + y_values, + c=ratio_eff, + cmap=viridis_clipped, + marker='s', s=2500, ) for x, y, txt in zip(x_values, y_values, labels): @@ -273,4 +283,82 @@ def plotEffRatio( ) saveFig(fig, output_path, metadata=num_meta, extension=pc.image_type) - plt.close(fig) \ No newline at end of file + plt.close(fig) + + +def plotMinusToPlusNEventsRatio( + num_hists, + den_hists, + output_path, + style_set, + plot_configuration=None, + color_scale="linear", + override_axis_labels=None, +): + import re + import mplhep as hep + plt.style.use(hep.style.CMS) + override_axis_labels = override_axis_labels or {} + pc = plot_configuration or PlotConfiguration() + fig, ax = plt.subplots(layout="constrained") + ratio_eff = [] + x_values = [] + y_values = [] + metas = [] + labels = [] + for num_hist in num_hists: + for den_hist in den_hists: + num_item, num_meta = num_hist + den_item, den_meta = den_hist + den_name_numbers = re.findall(r'\d+', den_meta["dataset_name"]) + num_name_numbers = re.findall(r'\d+', num_meta["dataset_name"]) + if num_name_numbers != den_name_numbers: + continue + else: + metas.append(num_meta) + x_values.append(int(num_name_numbers[-2])) + y_values.append(int(num_name_numbers[-1])) + + num = num_item.cutflow + den = den_item.cutflow + with np.errstate(divide='ignore', invalid='ignore'): + n_init = num["initial"] + #d_init = den["initial"] + #n_count = list(num.values())[-1] + d_count = list(den.values())[-1] + + #ratio, unc = getRatioAndUnc(n_count, d_count) + + #Ratio of two binomials is approx log normal, calculate two sided error in 'log space' and then go back. + ratio_eff.append(d_count) + #label_str = f"${ratio:.3g}^{{+{unc[1]:.3g}}}_{{-{unc[0]:.3g}}}$" + label_str = f"${d_count:.3g}$" + labels.append(label_str) + + viridis_clipped = matplotlib.colors.LinearSegmentedColormap.from_list( + 'viridis_clipped', matplotlib.cm.viridis(np.linspace(0.2, 1.0, 256)) + ) + sc = ax.scatter( + x_values, + y_values, + c=ratio_eff, + cmap=viridis_clipped, + marker='s', + s=2500, + ) + for x, y, txt in zip(x_values, y_values, labels): + ax.text(x, y, txt, ha='center', va='center', fontsize=10, color='black') + + ax.set_xlabel(override_axis_labels.get("x", "$m_{{\\mathit{{\\tilde t_1}}}}$")) + ax.set_ylabel(override_axis_labels.get("y", "$m_{{\\mathit{{\\tilde \\chi^{{\\pm}}_1}}}}$")) + fig.colorbar(sc, ax=ax, label="PlusFinal") + addCMSBits( + ax, + [num_meta], + extra_text=f"{num_meta["pipeline"]}", + text_color="black", + plot_configuration=pc, + ) + + saveFig(fig, output_path, metadata=num_meta, extension=pc.image_type) + plt.close(fig) diff --git a/analyzer/postprocessing/transforms/hist_transforms.py b/analyzer/postprocessing/transforms/hist_transforms.py index 39bb6a27..e8e3b42d 100644 --- a/analyzer/postprocessing/transforms/hist_transforms.py +++ b/analyzer/postprocessing/transforms/hist_transforms.py @@ -477,25 +477,28 @@ def makeSlice(x_slice, y_slice): @define class NormalizeByXSec(TransformHistogram): def __call__(self, items: list[ItemWithMeta]): - unique_processes = {meta["dataset_name"]: meta["x_sec"] for _, meta in items} + ret = [] + scale = dict() - plus_xsec = next(v for k, v in unique_processes.items() if 'plus' in k) - minus_xsec = next(v for k, v in unique_processes.items() if 'minus' in k) - - scale = plus_xsec / minus_xsec + #make xsec map between plus and minus datasets + for item, meta in items: + dname = meta["dataset_name"] + if dname not in scale.keys(): + scale[dname] = dict() + scale[dname][meta["sample_name"]] = {"x_sec": meta["x_sec"], 'n_events': meta["n_events"]} - scaled_xsecs = { - k: (v * scale if 'minus' in k else v) - for k, v in unique_processes.items() - } - ret = [] for item, meta in items: h = item.histogram - nh = h*(plus_xsec/meta["x_sec"]) - # new_axes = [x for x in item.axes if x.name not in select_axes_values] + dname = meta["dataset_name"] + plus = dname+"_plus" + minus = dname+"_minus" + sf = (scale[dname][plus]["x_sec"]+scale[dname][minus]["x_sec"]) + lumi_scale = meta['era']['lumi'] * sf / meta["n_events"] + nh = h*lumi_scale ret.append( ItemWithMeta( Histogram(name=h.name, axes=None, histogram=nh), metadata=meta ) ) return ret + diff --git a/configurations/single_stop/studies/signal_plus_minus_comp.yaml b/configurations/single_stop/studies/signal_plus_minus_comp.yaml index bb85448b..d9baae6b 100644 --- a/configurations/single_stop/studies/signal_plus_minus_comp.yaml +++ b/configurations/single_stop/studies/signal_plus_minus_comp.yaml @@ -234,101 +234,115 @@ extra_executors: Postprocessing: do_merge_and_scale: False processors: - - name: RatioPlot - inputs: - - "*/*/*/*/composite_13_m" - - "*/*/*/*/composite_14_m" - - "*/*/*/*/composite_24_m" - - "*/*/*/*/comp_mChi" - - "*/*/*/*/comp_mStop" - - "*/*/*/*/uncomp_mChi" - - "*/*/*/*/uncomp_mStop" - - "*/*/*/*/HT" - - "*/*/*/*/fatjet_pt_1" - - "*/*/*/*/fatjet_eta_1" - - "*/*/*/*/fatjet_phi_1" - - "*/*/*/*/goodjet_pt_1" - - "*/*/*/*/goodjet_eta_1" - - "*/*/*/*/goodjet_phi_1" - - "*/*/*/*/goodjet_pt_2" - - "*/*/*/*/goodjet_eta_2" - - "*/*/*/*/goodjet_phi_2" - - "*/*/*/*/goodjet_pt_3" - - "*/*/*/*/goodjet_eta_3" - - "*/*/*/*/goodjet_phi_3" - - "*/*/*/*/goodjet_pt_4" - - "*/*/*/*/goodjet_eta_4" - - "*/*/*/*/goodjet_phi_4" - normalize: False - xsec_normalize: True - ratio_type: poisson-ratio - structure: - select: {dataset_name: "*"} - group: {dataset_name: "*", era.name: "*", pipeline: "*"} - subgroups: - denominator: - select: {sample_name: "*_plus"} - transforms: - - name: SelectAxesValues - select_axes_values: {"variation": "central"} - - name: NormalizeByXSec - title: plus - numerator: - select: {sample_name: "*_minus"} - transforms: - - name: SelectAxesValues - select_axes_values: {"variation": "central"} - - name: NormalizeByXSec - title: minus - output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/ratio_{name}.pdf" - - name: Histogram2D - inputs: - - "*/*/*/*/comp_mStop_vs_mChiRatio" - - "*/*/*/*/uncomp_mStop_vs_mChiRatio" - normalize: False - structure: - select: {dataset_name: "*"} - group: {dataset_name: "*", era.name: "*", sample_name: "*", pipeline: "*"} - transforms: - - name: SelectAxesValues - select_axes_values: {variation: "central"} - - name: NormalizeByXSec - output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{sample_name}_{name}.pdf" - - name: HistogramPulls2D - inputs: - - "*/*/*/*/comp_mStop_vs_mChiRatio" - - "*/*/*/*/uncomp_mStop_vs_mChiRatio" - normalize: False - scale: linear - structure: - select: {dataset_name: "*"} - group: {dataset_name: "*", era.name: "*", pipeline: "*"} - subgroups: - hist1: - select: {sample_name: "*_minus"} - transforms: - - name: SelectAxesValues - select_axes_values: {"variation": "central"} - - name: NormalizeByXSec - title: minus - hist2: - select: {sample_name: "*_plus"} - transforms: - - name: SelectAxesValues - select_axes_values: {"variation": "central"} - - name: NormalizeByXSec - title: plus - output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{name}_pulls.pdf" - - name: PlotSelectionFlow - inputs: - - "*/*/*/*/pre_selection" - - "*/*/*/*/selection" - normalize: True - structure: - select: {dataset_name: "*", pipeline: "*"} - group: {dataset_name: "*", era.name: "*", pipeline: "*"} - output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{name}.pdf" - - name: Histogram2DEffRatio + #- name: RatioPlot + # inputs: + # - "*/*/*/*/composite_13_m" + # - "*/*/*/*/composite_14_m" + # - "*/*/*/*/composite_24_m" + # - "*/*/*/*/comp_mChi" + # - "*/*/*/*/comp_mStop" + # - "*/*/*/*/uncomp_mChi" + # - "*/*/*/*/uncomp_mStop" + # - "*/*/*/*/HT" + # - "*/*/*/*/fatjet_pt_1" + # - "*/*/*/*/fatjet_eta_1" + # - "*/*/*/*/fatjet_phi_1" + # - "*/*/*/*/goodjet_pt_1" + # - "*/*/*/*/goodjet_eta_1" + # - "*/*/*/*/goodjet_phi_1" + # - "*/*/*/*/goodjet_pt_2" + # - "*/*/*/*/goodjet_eta_2" + # - "*/*/*/*/goodjet_phi_2" + # - "*/*/*/*/goodjet_pt_3" + # - "*/*/*/*/goodjet_eta_3" + # - "*/*/*/*/goodjet_phi_3" + # - "*/*/*/*/goodjet_pt_4" + # - "*/*/*/*/goodjet_eta_4" + # - "*/*/*/*/goodjet_phi_4" + # normalize: False + # xsec_normalize: True + # ratio_type: poisson-ratio + # structure: + # select: {dataset_name: "*"} + # group: {dataset_name: "*", era.name: "*", pipeline: "*"} + # subgroups: + # denominator: + # select: {sample_name: "*_plus"} + # transforms: + # - name: SelectAxesValues + # select_axes_values: {"variation": "central"} + # title: plus + # numerator: + # select: {sample_name: "*_minus"} + # transforms: + # - name: SelectAxesValues + # select_axes_values: {"variation": "central"} + # title: minus + # transforms: + # - name: NormalizeByXSec + # output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/ratio_{name}.pdf" + #- name: Histogram2D + # inputs: + # - "*/*/*/*/comp_mStop_vs_mChiRatio" + # - "*/*/*/*/uncomp_mStop_vs_mChiRatio" + # normalize: False + # structure: + # select: {dataset_name: "*"} + # group: {dataset_name: "*", era.name: "*", sample_name: "*", pipeline: "*"} + # transforms: + # - name: SelectAxesValues + # select_axes_values: {variation: "central"} + # - name: NormalizeByXSec + # output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{sample_name}_{name}.pdf" + #- name: HistogramPulls2D + # inputs: + # - "*/*/*/*/comp_mStop_vs_mChiRatio" + # - "*/*/*/*/uncomp_mStop_vs_mChiRatio" + # normalize: False + # scale: linear + # structure: + # select: {dataset_name: "*"} + # group: {dataset_name: "*", era.name: "*", pipeline: "*"} + # subgroups: + # hist1: + # select: {sample_name: "*_minus"} + # transforms: + # - name: SelectAxesValues + # select_axes_values: {"variation": "central"} + # title: minus + # hist2: + # select: {sample_name: "*_plus"} + # transforms: + # - name: SelectAxesValues + # select_axes_values: {"variation": "central"} + # title: plus + # transforms: + # - name: NormalizeByXSec + # output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{name}_pulls.pdf" + #- name: PlotSelectionFlow + # inputs: + # - "*/*/*/*/pre_selection" + # - "*/*/*/*/selection" + # normalize: True + # structure: + # select: {dataset_name: "*", pipeline: "*"} + # group: {dataset_name: "*", era.name: "*", pipeline: "*"} + # output_name: "{prefix}/{era.name}/{pipeline}/{dataset_name}/{name}.pdf" + #- name: Histogram2DEffRatio + # inputs: + # - "*/*/*/*/pre_selection" + # - "*/*/*/*/selection" + # normalize: False + # structure: + # select: {dataset_name: "*"} + # group: {era.name: "*", "pipeline": "*"} + # subgroups: + # num: + # select: {sample_name: "*_minus"} + # den: + # select: {sample_name: "*_plus"} + # output_name: "{prefix}/{era.name}/{pipeline}/signal_eff_plane.pdf" + - name: Histogram2DNRatio inputs: - "*/*/*/*/pre_selection" - "*/*/*/*/selection" @@ -341,4 +355,4 @@ Postprocessing: select: {sample_name: "*_minus"} den: select: {sample_name: "*_plus"} - output_name: "{prefix}/{era.name}/{pipeline}/signal_eff_plane.pdf" + output_name: "{prefix}/{era.name}/{pipeline}/n_passed_plane_plus.pdf"