From cd4375cdca8a61cd205d884dbac4d5fd95d36fc4 Mon Sep 17 00:00:00 2001
From: tobiaskleiner <tobias.kleiner@desy.de>
Date: Mon, 19 May 2025 16:22:46 +0200
Subject: [PATCH 1/4] adding skeleton light curve validation script

---
 .../gammapy/crab_lightcurve_validation.py     | 135 ++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 release_tests/gammapy/crab_lightcurve_validation.py

diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py
new file mode 100644
index 0000000..7d2443f
--- /dev/null
+++ b/release_tests/gammapy/crab_lightcurve_validation.py
@@ -0,0 +1,135 @@
+
+"""
+This script performs light curve estimation for Crab data using Gammapy v1.3.
+
+Functions:
+- read_obs_ids(runlist_path): Reads obs IDs from a runlist file.
+- make_exclusion_mask(): Creates an exclusion mask for regions to be excluded from analysis.
+- make_datasets(data_store, obs_ids, exclusion_mask): Generates datasets for the given observations and exclusion mask.
+- estimate_lightcurve(datasets): Estimates the light curve for the given datasets using a predefined spectral model.
+- plot_lightcurve(lightcurve): Plots the estimated light curve.
+- main(): Main function to parse arguments, process data, and generate the light curve.
+
+Usage:
+Run the script with the `--runlist` argument pointing to a file containing observation IDs.
+"""
+import argparse
+import logging
+from pathlib import Path
+
+import astropy.units as u
+import matplotlib.pyplot as plt
+import numpy as np
+from astropy.coordinates import SkyCoord
+from gammapy.data import DataStore
+from gammapy.datasets import Datasets, SpectrumDataset
+from gammapy.estimators import LightCurveEstimator
+from gammapy.makers import (
+    ReflectedRegionsBackgroundMaker,
+    SafeMaskMaker,
+    SpectrumDatasetMaker,
+)
+from gammapy.maps import MapAxis, RegionGeom, WcsGeom
+from gammapy.modeling.models import Models, PowerLawSpectralModel, SkyModel
+from regions import CircleSkyRegion
+
+
+def read_obs_ids(runlist_path):
+    with open(runlist_path) as f:
+        obs_ids = [int(line.strip()) for line in f if line.strip()]
+    return obs_ids
+
+def make_exclusion_mask():
+    axis = MapAxis.from_edges(
+        np.logspace(-1.0, 1.0, 10), unit="TeV", name="energy", interp="log"
+    )
+    geom = WcsGeom.create(
+        skydir=(83.6333, 22.0145), width=(4, 4), binsz=0.01, frame="icrs", axes=[axis]
+    )
+
+    target_position = SkyCoord(ra=83.6333, dec=22.0145, unit="deg", frame="icrs")
+    on_region_radius = 0.3 * u.deg
+    on_region = CircleSkyRegion(center=target_position, radius=on_region_radius)
+
+    exclusions = [
+        CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=0.3 * u.deg)
+        for ra, dec in [
+            (81.9087, 21.937), (82.6806, 22.4623), (83.4118, 20.4742),
+            (84.1099, 21.9931), (84.4112, 21.1425), (84.8629, 21.7629),
+            (85.4782, 23.3262), (85.5166, 22.6603)
+        ]
+    ]
+
+    regions = [on_region] + exclusions
+    exclusion_mask = ~geom.to_image().region_mask(regions)
+
+    plt.figure()
+    exclusion_mask.plot()
+    return exclusion_mask
+
+def make_datasets(data_store, obs_ids, exclusion_mask):
+    observations = data_store.get_observations(obs_ids, required_irf="point-like")
+
+    energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "10 TeV", nbin=15)
+    energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "20 TeV", nbin=40, name="energy_true")
+
+    geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.11)", axes=[energy_axis])
+    dataset_empty = SpectrumDataset.create(geom=geom, energy_axis_true=energy_axis_true, name="crab")
+
+    wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02)
+    exclusion_mask = wcsgeom.region_mask(geom.region, inside=False)
+
+    maker = SpectrumDatasetMaker()
+    safe_mask_maker = SafeMaskMaker(methods=["aeff-max"], aeff_percent=10)
+    bkg_maker = ReflectedRegionsBackgroundMaker(exclusion_mask=exclusion_mask)
+
+    datasets = Datasets()
+    for obs in observations:
+        dataset = maker.run(dataset_empty.copy(), obs)
+        dataset = safe_mask_maker.run(dataset, obs)
+        dataset_on_off = bkg_maker.run(dataset, obs)
+        datasets.append(dataset)
+
+    return datasets
+
+def estimate_lightcurve(datasets):
+    target_position = SkyCoord(ra=83.63308, dec=22.01450, unit="deg")
+    spectral_model = PowerLawSpectralModel(
+        index=2.5,
+        amplitude=3.5e-11 * u.Unit("1 / (cm2 s TeV)"),
+        reference=1 * u.TeV,
+    )
+    sky_model = SkyModel(spectral_model=spectral_model, name="crab")
+    datasets.models = Models([sky_model])
+
+    lc_maker = LightCurveEstimator(energy_edges=[1, 10] * u.TeV, reoptimize=False)
+    return lc_maker.run(datasets)
+
+def plot_lightcurve(args, lightcurve):
+    fig, ax = plt.subplots(
+        figsize=(8, 6),
+        gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98},
+    )
+    lightcurve.plot(ax=ax, marker="o", label="1D")
+    plt.legend()
+    runlist_name = Path(args.runlist).stem
+    output_path = Path(f"./validation_plots/lightcurve_plot_{runlist_name}.png")
+    fig.savefig(output_path)
+    logging.info(f"Lightcurve plot saved to {output_path}")
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--runlist", required=True, help="Path to runlist file")
+    args = parser.parse_args()
+
+    obs_ids = read_obs_ids(args.runlist)
+    data_store = DataStore.from_file(filename="./FITS/hdu-index.fits.gz")
+
+    exclusion_mask = make_exclusion_mask()
+    datasets = make_datasets(data_store, obs_ids, exclusion_mask)
+    lightcurve = estimate_lightcurve(datasets)
+    plot_lightcurve(args, lightcurve)
+
+if __name__ == "__main__":
+    main()
+

From 4c9dbe776ed098d8b05bab3e5555c7d3aa48fcd7 Mon Sep 17 00:00:00 2001
From: Eshita Joshi <eshita.joshi@desy.de>
Date: Wed, 21 May 2025 09:54:50 +0200
Subject: [PATCH 2/4] autoplotting lightcurves for all runlists

---
 .../gammapy/crab_lightcurve_validation.py     | 13 ++---
 .../gammapy/runlist_lightcurve_runner.py      | 50 +++++++++++++++++++
 2 files changed, 57 insertions(+), 6 deletions(-)
 create mode 100644 release_tests/gammapy/runlist_lightcurve_runner.py

diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py
index 7d2443f..d748ae1 100644
--- a/release_tests/gammapy/crab_lightcurve_validation.py
+++ b/release_tests/gammapy/crab_lightcurve_validation.py
@@ -73,27 +73,28 @@ def make_datasets(data_store, obs_ids, exclusion_mask):
     energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "10 TeV", nbin=15)
     energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "20 TeV", nbin=40, name="energy_true")
 
-    geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.11)", axes=[energy_axis])
+    geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.08944272)", axes=[energy_axis])
     dataset_empty = SpectrumDataset.create(geom=geom, energy_axis_true=energy_axis_true, name="crab")
 
     wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02)
     exclusion_mask = wcsgeom.region_mask(geom.region, inside=False)
 
-    maker = SpectrumDatasetMaker()
+    maker = SpectrumDatasetMaker(
+    containment_correction=False, selection=["counts", "exposure", "edisp"]
+    )
     safe_mask_maker = SafeMaskMaker(methods=["aeff-max"], aeff_percent=10)
     bkg_maker = ReflectedRegionsBackgroundMaker(exclusion_mask=exclusion_mask)
 
     datasets = Datasets()
     for obs in observations:
-        dataset = maker.run(dataset_empty.copy(), obs)
-        dataset = safe_mask_maker.run(dataset, obs)
+        dataset = maker.run(dataset_empty.copy(name=f'{obs.obs_id}'), obs)
         dataset_on_off = bkg_maker.run(dataset, obs)
-        datasets.append(dataset)
+        dataset_on_off = safe_mask_maker.run(dataset, obs)
+        datasets.append(dataset_on_off)
 
     return datasets
 
 def estimate_lightcurve(datasets):
-    target_position = SkyCoord(ra=83.63308, dec=22.01450, unit="deg")
     spectral_model = PowerLawSpectralModel(
         index=2.5,
         amplitude=3.5e-11 * u.Unit("1 / (cm2 s TeV)"),
diff --git a/release_tests/gammapy/runlist_lightcurve_runner.py b/release_tests/gammapy/runlist_lightcurve_runner.py
new file mode 100644
index 0000000..ed22a02
--- /dev/null
+++ b/release_tests/gammapy/runlist_lightcurve_runner.py
@@ -0,0 +1,50 @@
+import os
+import re
+import sys
+from pathlib import Path
+from gammapy.data import DataStore
+import crab_lightcurve_validation
+
+
+# Function to read all run IDs from a runlist file
+def read_runs_from_file(file_path):
+    with open(file_path) as f:
+        return [int(line.strip()) for line in f if line.strip()]
+
+# Directory containing runlists
+download_dir = Path("/EventDisplay_Release_v491/Crab/runlists")
+all_files = os.listdir(download_dir)
+runlist_files = [f for f in all_files if re.match(r'runlist_releaseTesting.*\.dat', f)]
+
+print(f"Found {len(runlist_files)} runlist files.")
+
+
+# Collect all unique run IDs
+all_runs = set()
+for runlist in runlist_files:
+    local_path = download_dir / runlist
+    runs = read_runs_from_file(local_path)
+    all_runs.update(runs)
+
+print(f"Found {len(all_runs)} unique runs across all runlists.")
+
+# Create a combined runlist file
+combined_runlist_path = Path("./combined_runlist.dat")
+with open(combined_runlist_path, "w") as f:
+    for run in sorted(all_runs):
+        f.write(f"{run}\n")
+
+print(f"Created combined runlist with {len(all_runs)} runs at {combined_runlist_path}")
+
+
+combined_runlist_path = Path("./combined_runlist.dat")
+
+for runlist in runlist_files:
+    local_path = download_dir / runlist
+    print(f"Processing {runlist}")
+    sys.argv = [
+        "crab_lightcurve_validation.py",
+        "--runlist",
+        str(local_path)
+    ]
+    crab_lightcurve_validation.main()
\ No newline at end of file

From 559189476944d8b8458412780835215c1ec65b13 Mon Sep 17 00:00:00 2001
From: Eshita Joshi <eshita.joshi@desy.de>
Date: Tue, 27 May 2025 11:38:28 +0200
Subject: [PATCH 3/4] added comparative plots of relative differences

---
 .../gammapy/crab_lightcurve_validation.py     | 612 +++++++++++++++++-
 .../gammapy/runlist_lightcurve_runner.py      | 127 +++-
 2 files changed, 683 insertions(+), 56 deletions(-)

diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py
index d748ae1..2cc8f49 100644
--- a/release_tests/gammapy/crab_lightcurve_validation.py
+++ b/release_tests/gammapy/crab_lightcurve_validation.py
@@ -1,26 +1,79 @@
-
 """
 This script performs light curve estimation for Crab data using Gammapy v1.3.
 
 Functions:
 - read_obs_ids(runlist_path): Reads obs IDs from a runlist file.
 - make_exclusion_mask(): Creates an exclusion mask for regions to be excluded from analysis.
-- make_datasets(data_store, obs_ids, exclusion_mask): Generates datasets for the given observations and exclusion mask.
-- estimate_lightcurve(datasets): Estimates the light curve for the given datasets using a predefined spectral model.
+- make_datasets(data_store, obs_ids, exclusion_mask): Generates datasets for the
+  analysis.
+- main(args=None): Main function to parse arguments and run the workflow.
+    gc.collect()
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--runlist", required=True, help="Path to runlist file")
+    parser.add_argument(
+        "--datastore",
+        def        stats_text.append(
+            f"{knn_val}:\n"
+            f"  Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, σ: {stats['std']:.2f}%"
+        )="./hdu-index.fits.gz",
+        help="Path to the datastore index file",
+    )
+    parser.add_argument(
+        "--only-relative",
+        action="store_true",
+        help="Only create relative difference plot",
+    )
+    parser.add_argument(
+        "--no-relative",
+        action="store_true",
+        help="Skip creating relative difference plot",
+    )
+
+    # Parse either command line arguments or the passed arguments
+    if args is None:
+        parsed_args = parser.parse_args()
+    else:
+        parsed_args = parser.parse_args(args)
+
+    obs_ids = read_obs_ids(parsed_args.runlist)
+    data_store = DataStore.from_file(filename=parsed_args.datastore)
+
+    exclusion_mask = make_exclusion_mask()
+    datasets = make_datasets(data_store, obs_ids, exclusion_mask)
+    lightcurve = estimate_lightcurve(datasets)
+    plot_lightcurve(parsed_args, lightcurve)
+
+    # Generate relative difference plot unless explicitly disabled
+    if not hasattr(parsed_args, "no_relative") or not parsed_args.no_relative:
+        plot_relative_difference(parsed_args, lightcurve)
+
+    gc.collect() and exclusion mask.
+- estimate_lightcurve(datasets): Estimates the light curve for the given datasets
+                                 using a predefined spectral model.
 - plot_lightcurve(lightcurve): Plots the estimated light curve.
+- plot_relative_difference(args, lightcurve): Plots the relative difference between
+                                              Gammapy and Anasum values.
 - main(): Main function to parse arguments, process data, and generate the light curve.
 
 Usage:
 Run the script with the `--runlist` argument pointing to a file containing observation IDs.
 """
+
 import argparse
+import datetime
+import gc
 import logging
+import re
+import traceback
 from pathlib import Path
 
 import astropy.units as u
+import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
 from astropy.coordinates import SkyCoord
+from astropy.time import Time
 from gammapy.data import DataStore
 from gammapy.datasets import Datasets, SpectrumDataset
 from gammapy.estimators import LightCurveEstimator
@@ -33,16 +86,19 @@
 from gammapy.modeling.models import Models, PowerLawSpectralModel, SkyModel
 from regions import CircleSkyRegion
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+DATA_DIR = "/home/eshita/Documents/VERITAS/"
+
 
 def read_obs_ids(runlist_path):
     with open(runlist_path) as f:
         obs_ids = [int(line.strip()) for line in f if line.strip()]
     return obs_ids
 
+
 def make_exclusion_mask():
-    axis = MapAxis.from_edges(
-        np.logspace(-1.0, 1.0, 10), unit="TeV", name="energy", interp="log"
-    )
+    axis = MapAxis.from_edges(np.logspace(-1.0, 1.0, 10), unit="TeV", name="energy", interp="log")
     geom = WcsGeom.create(
         skydir=(83.6333, 22.0145), width=(4, 4), binsz=0.01, frame="icrs", axes=[axis]
     )
@@ -54,46 +110,56 @@ def make_exclusion_mask():
     exclusions = [
         CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=0.3 * u.deg)
         for ra, dec in [
-            (81.9087, 21.937), (82.6806, 22.4623), (83.4118, 20.4742),
-            (84.1099, 21.9931), (84.4112, 21.1425), (84.8629, 21.7629),
-            (85.4782, 23.3262), (85.5166, 22.6603)
+            (81.9087, 21.937),
+            (82.6806, 22.4623),
+            (83.4118, 20.4742),
+            (84.1099, 21.9931),
+            (84.4112, 21.1425),
+            (84.8629, 21.7629),
+            (85.4782, 23.3262),
+            (85.5166, 22.6603),
         ]
     ]
 
     regions = [on_region] + exclusions
     exclusion_mask = ~geom.to_image().region_mask(regions)
 
-    plt.figure()
+    fig = plt.figure()
     exclusion_mask.plot()
+    plt.close(fig)
     return exclusion_mask
 
+
 def make_datasets(data_store, obs_ids, exclusion_mask):
     observations = data_store.get_observations(obs_ids, required_irf="point-like")
 
-    energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "10 TeV", nbin=15)
-    energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "20 TeV", nbin=40, name="energy_true")
+    energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "50 TeV", nbin=25)
+    energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "70 TeV", nbin=50, name="energy_true")
 
     geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.08944272)", axes=[energy_axis])
-    dataset_empty = SpectrumDataset.create(geom=geom, energy_axis_true=energy_axis_true, name="crab")
+    dataset_empty = SpectrumDataset.create(
+        geom=geom, energy_axis_true=energy_axis_true, name="crab"
+    )
 
     wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02)
     exclusion_mask = wcsgeom.region_mask(geom.region, inside=False)
 
     maker = SpectrumDatasetMaker(
-    containment_correction=False, selection=["counts", "exposure", "edisp"]
+        containment_correction=False, selection=["counts", "exposure", "edisp"]
     )
     safe_mask_maker = SafeMaskMaker(methods=["aeff-max"], aeff_percent=10)
     bkg_maker = ReflectedRegionsBackgroundMaker(exclusion_mask=exclusion_mask)
 
     datasets = Datasets()
     for obs in observations:
-        dataset = maker.run(dataset_empty.copy(name=f'{obs.obs_id}'), obs)
+        dataset = maker.run(dataset_empty.copy(name=f"{obs.obs_id}"), obs)
         dataset_on_off = bkg_maker.run(dataset, obs)
         dataset_on_off = safe_mask_maker.run(dataset, obs)
         datasets.append(dataset_on_off)
 
     return datasets
 
+
 def estimate_lightcurve(datasets):
     spectral_model = PowerLawSpectralModel(
         index=2.5,
@@ -103,34 +169,528 @@ def estimate_lightcurve(datasets):
     sky_model = SkyModel(spectral_model=spectral_model, name="crab")
     datasets.models = Models([sky_model])
 
-    lc_maker = LightCurveEstimator(energy_edges=[1, 10] * u.TeV, reoptimize=False)
-    return lc_maker.run(datasets)
+    lc_maker = LightCurveEstimator(energy_edges=[0.5, 50] * u.TeV, reoptimize=False)
+    lightcurve = lc_maker.run(datasets)
+
+    return lightcurve
+
 
 def plot_lightcurve(args, lightcurve):
+    # Create output path
+    runlist_name = Path(args.runlist).stem
+    output_dir = Path("./validation_plots/knn05_MinMaxScaler")
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_path = output_dir / f"lightcurve_plot_{runlist_name}.png"
+
+    # Check if the plot already exists
+    if output_path.exists():
+        logging.info(f"Lightcurve plot already exists at {output_path}, skipping generation")
+        return
+
+    plt.figure(figsize=(10, 6))
+
     fig, ax = plt.subplots(
         figsize=(8, 6),
         gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98},
     )
-    lightcurve.plot(ax=ax, marker="o", label="1D")
-    plt.legend()
+    ax.set_ylim(1e-12, 1e-9)
+    lightcurve.plot(ax=ax, sed_type="flux", marker="o", label="Gammapy")
+
+    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
+    ax.xaxis.set_major_locator(mdates.AutoDateLocator())
+    fig.autofmt_xdate()
+
     runlist_name = Path(args.runlist).stem
-    output_path = Path(f"./validation_plots/lightcurve_plot_{runlist_name}.png")
+    folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name)
+
+    if folder_match:
+        folder_name = folder_match.group(1)
+        # Try to find and load the reference CSV file
+        base_path = f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel"
+        csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv")
+
+        if csv_path.exists():
+            try:
+                ref_data = pd.read_csv(csv_path)
+
+                if "MDJ" in ref_data.columns and "Flux" in ref_data.columns:
+                    times = Time(ref_data["MDJ"], format="mjd")
+                    plot_times = [t.plot_date for t in times]
+
+                    yerr = None
+                    if "FluxError" in ref_data.columns:
+                        yerr = ref_data["FluxError"]
+
+                    ax.errorbar(
+                        plot_times,
+                        ref_data["Flux"],
+                        yerr=yerr,
+                        fmt="s",
+                        color="red",
+                        label="Anasum",
+                    )
+
+                    if "MJD_width" in ref_data.columns:
+                        for i, (mjd, flux, width) in enumerate(
+                            zip(ref_data["MDJ"], ref_data["Flux"], ref_data["MJD_width"])
+                        ):
+                            left = Time(mjd - width / 2, format="mjd").plot_date
+                            right = Time(mjd + width / 2, format="mjd").plot_date
+                            ax.hlines(flux, left, right, colors="red", lw=1.5)
+
+                    logging.info(f"Added reference data from {csv_path}")
+                else:
+                    logging.warning("CSV file missing required columns (MDJ, Flux)")
+            except Exception as e:
+                logging.warning(f"Failed to load or plot reference data: {e}")
+
+    plt.legend()
+
+    output_dir = Path("./validation_plots/knn05_MinMaxScaler")
+    output_dir.mkdir(exist_ok=True, parents=True)
+
+    output_path = output_dir / f"lightcurve_plot_{runlist_name}.png"
     fig.savefig(output_path)
     logging.info(f"Lightcurve plot saved to {output_path}")
+    plt.close()
+    plt.clf()
+    gc.collect()
+
+
+def plot_relative_difference(args, lightcurve):
+    """Create a plot showing the relative difference between Gammapy and Anasum values."""
+    runlist_name = Path(args.runlist).stem
+    output_dir = Path("./validation_plots/knn05_MinMaxScaler")
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_path = output_dir / f"relative_diff_{runlist_name}.png"
+
+    # Check if the plot already exists
+    if output_path.exists():
+        logger.info(
+            f"Relative difference plot already exists at {output_path}, skipping generation"
+        )
+        return
+
+    # Continue with existing code...
+    folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name)
+
+    if not folder_match:
+        logger.warning(f"Could not extract folder name from {runlist_name}, skipping relative plot")
+        return
+
+    folder_name = folder_match.group(1)
+    csv_path = Path(
+        f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6_{folder_name}/LightCurve.csv"
+    )
+
+    if not csv_path.exists():
+        logger.warning(f"No reference CSV found at {csv_path}, skipping relative plot")
+        return
+
+    try:
+        try:
+            ref_data = pd.read_csv(csv_path)
+            if len(ref_data) == 0:
+                logger.warning("Anasum CSV file is empty, skipping relative plot")
+                return
+        except Exception as e:
+            logger.warning(f"Failed to load Anasum data: {e}")
+            return
+
+        temp_fig, temp_ax = plt.subplots()
+        lightcurve.plot(ax=temp_ax, sed_type="flux")
+        lines = [child for child in temp_ax.get_children() if isinstance(child, plt.Line2D)]
+        if not lines or len(lines[0].get_xdata()) == 0:
+            logger.warning("No Gammapy flux data found, skipping relative plot")
+            plt.close(temp_fig)
+            return
+
+        gammapy_times_plot = lines[0].get_xdata()
+        gammapy_flux = lines[0].get_ydata()
+
+        if len(gammapy_times_plot) == 0:
+            logger.warning("No Gammapy time points found, skipping relative plot")
+            plt.close(temp_fig)
+            return
+
+        gammapy_flux_err = lightcurve.to_table(format="lightcurve", sed_type="flux")[
+            "flux_err"
+        ].quantity.value
+
+        # Convert plot x-values (dates) to MJD format for comparison
+        if isinstance(gammapy_times_plot[0], float):
+            gammapy_times = np.array([mdates.num2date(t).timestamp() for t in gammapy_times_plot])
+            gammapy_times = Time(gammapy_times, format="unix").mjd
+        elif isinstance(gammapy_times_plot[0], (np.datetime64, datetime.datetime)):
+            gammapy_times = Time(
+                [t.isoformat() if hasattr(t, "isoformat") else t for t in gammapy_times_plot]
+            ).mjd
+        plt.close(temp_fig)
+
+        ref_data = pd.read_csv(csv_path)
+
+        anasum_times = Time(ref_data["MDJ"], format="mjd")
+        anasum_flux = ref_data["Flux"]
+        anasum_flux_err = ref_data.get("FluxError", np.zeros_like(anasum_flux))
+
+        rel_diffs = []
+        rel_diff_errors = []
+        matched_times = []
+
+        total_gammapy_points = len(gammapy_times)
+        logger.info(f"Total Gammapy points: {total_gammapy_points}")
+
+        for g_time, g_flux, g_err in zip(gammapy_times, gammapy_flux, gammapy_flux_err):
+            # Find closest Anasum time point within tolerance
+            g_time_value = g_time.value if hasattr(g_time, "value") else g_time
+            time_diffs = np.abs(anasum_times.mjd - g_time_value)
+            closest_idx = np.argmin(time_diffs)
+
+            if float(time_diffs[closest_idx]) < (30 / 1440):  # 30 minutes in days
+                a_flux = anasum_flux.iloc[closest_idx]
+
+                # Skip extreme outliers where flux is too low
+                if hasattr(g_flux, "to_value"):
+                    g_flux_val = g_flux.to_value(u.Unit("1 / (cm2 s)"))
+                else:
+                    g_flux_val = float(g_flux)
+                if g_flux_val < 1e-15 or float(a_flux) < 1e-15:
+                    continue
+
+                a_flux_val = float(a_flux)
+                rel_diff = (g_flux_val - a_flux_val) / a_flux_val * 100
+
+                if hasattr(g_err, "to_value"):
+                    g_rel_err = g_err.to_value("") / a_flux_val * 100
+                else:
+                    g_rel_err = float(g_err) / a_flux_val * 100
+
+                # Calculate error on relative difference
+                g_rel_err = float(g_err) / a_flux_val * 100
+                a_rel_err = 0
+                if anasum_flux_err is not None and not pd.isna(anasum_flux_err.iloc[closest_idx]):
+                    a_err = float(anasum_flux_err.iloc[closest_idx])
+                    a_rel_err = a_err / a_flux_val * 100
+
+                # Combine errors in quadrature
+                rel_diff_err = np.sqrt(g_rel_err**2 + a_rel_err**2)
+
+                # Only append if finite
+                if np.isfinite(rel_diff) and np.isfinite(rel_diff_err):
+                    rel_diffs.append(rel_diff)
+                    rel_diff_errors.append(rel_diff_err)
+                    matched_times.append(g_time)
 
-def main():
+        if not rel_diffs:
+            logger.warning("No valid matching time points found for relative difference plot")
+            return
+
+        fig, ax = plt.subplots(
+            figsize=(8, 6),
+            gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98},
+        )
+
+        matched_times = Time(matched_times, format="mjd")
+
+        ax.errorbar(
+            matched_times.plot_date,
+            rel_diffs,
+            yerr=rel_diff_errors,
+            fmt="o",
+            color="purple",
+            label="Relative Difference",
+        )
+
+        ax.axhline(y=0, color="k", linestyle="-", alpha=0.3)
+
+        ax.set_ylabel(
+            r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$"
+        )
+        ax.set_title(f"Relative Flux Difference\n{runlist_name}")
+
+        ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
+        ax.xaxis.set_major_locator(mdates.AutoDateLocator())
+        fig.autofmt_xdate()
+
+        rel_diffs_array = np.array(rel_diffs)
+        mean_diff = np.mean(rel_diffs_array)
+        median_diff = np.median(rel_diffs_array)
+        std_diff = np.std(rel_diffs_array)
+
+        stats_text = (
+            f"Mean: {mean_diff:.2f}%\n" f"Median: {median_diff:.2f}%\n" f"Std Dev: {std_diff:.2f}%"
+        )
+
+        ax.text(
+            0.05,
+            0.95,
+            stats_text,
+            transform=ax.transAxes,
+            fontsize=10,
+            verticalalignment="top",
+            bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
+        )
+
+        output_dir = Path("./validation_plots/knn05_MinMaxScaler")
+        output_dir.mkdir(exist_ok=True, parents=True)
+
+        output_path = output_dir / f"relative_diff_{runlist_name}.png"
+        fig.savefig(output_path, bbox_inches="tight")
+        plt.close(fig)
+        plt.clf()
+        gc.collect()
+        logger.info(f"Relative difference plot saved to {output_path}")
+
+    except Exception as e:
+        logger.error(f"Error creating relative difference plot: {e}", exc_info=True)
+
+        logger.error(traceback.format_exc())
+
+
+def plot_combined_relative_differences(runlist_path):
+    """Create a combined plot showing relative differences compared to Anasum."""
+
+    # Get runlist name and subfolder
+    runlist_name = Path(runlist_path).stem
+    folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name)
+
+    if not folder_match:
+        logger.warning(f"Could not extract folder name from {runlist_name}, skipping combined plot")
+        return
+
+    folder_name = folder_match.group(1)
+    base_path = f"{DATA_DIR}EventDisplay_Release_v491/Crab/V6_moderate2tel"
+    anasum_csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv")
+
+    if not anasum_csv_path.exists():
+        logger.warning(f"No reference CSV found at {anasum_csv_path}, skipping combined plot")
+        return
+
+    # Load Anasum reference data
+    try:
+        ref_data = pd.read_csv(anasum_csv_path)
+        anasum_times = Time(ref_data["MDJ"], format="mjd")
+        anasum_flux = ref_data["Flux"]
+    except Exception as e:
+        logger.warning(f"Failed to load Anasum data: {e}")
+        return
+
+    fig, ax = plt.subplots(
+        figsize=(10, 6),
+        gridspec_kw={"left": 0.14, "bottom": 0.14, "top": 0.92, "right": 0.92},
+    )
+
+    # List of kNN values to process
+    knn_values = ["knn05_sec", "knn05", "knn05_MinMaxScaler"]
+    markers = ["o", "s", "^"]
+    colors = ["purple", "green", "orange"]
+
+    all_stats = {}
+
+    for idx, knn_val in enumerate(knn_values):
+        try:
+            datastore_path = Path(f"./FITS/{knn_val}/{folder_name}/hdu-index.fits.gz")
+            if not datastore_path.exists():
+                logger.warning(f"No datastore found at {datastore_path}, skipping {knn_val}")
+                continue
+
+            obs_ids = read_obs_ids(runlist_path)
+            data_store = DataStore.from_file(filename=str(datastore_path))
+            exclusion_mask = make_exclusion_mask()
+            datasets = make_datasets(data_store, obs_ids, exclusion_mask)
+            lightcurve = estimate_lightcurve(datasets)
+
+            flux_points_table = lightcurve.to_table(format="lightcurve", sed_type="flux")
+            if (
+                "flux" not in flux_points_table.colnames
+                or "flux_err" not in flux_points_table.colnames
+            ):
+                logger.error(f"Missing flux or flux_err columns in lightcurve data for {knn_val}")
+                continue
+
+            gammapy_flux = flux_points_table["flux"].quantity.value
+            gammapy_flux_err = flux_points_table["flux_err"].quantity.value
+
+            time_col = flux_points_table["time_min"]
+            if hasattr(time_col, "mjd"):
+                # Already a Time object or Quantity with .mjd
+                gammapy_times = time_col.mjd
+            elif isinstance(time_col[0], (float, np.floating, int)):
+                # Already MJD floats
+                gammapy_times = time_col
+            else:
+                # Try to convert to Time, then get mjd
+                gammapy_times = Time(time_col).mjd
+
+            logger.info(
+                f"Retrieved {len(gammapy_flux)} flux points with proper errors for {knn_val}"
+            )
+
+            if len(gammapy_times) == 0:
+                logger.warning(f"No valid time points found for {knn_val}, skipping")
+                continue
+
+            rel_diffs = []
+            rel_diff_errors = []
+            matched_times = []
+
+            total_gammapy_points = len(gammapy_times)
+            logger.info(f"Total Gammapy points for {knn_val}: {total_gammapy_points}")
+
+            for g_time, g_flux, g_err in zip(gammapy_times, gammapy_flux, gammapy_flux_err):
+                g_time_value = g_time.value if hasattr(g_time, "value") else g_time
+                time_diffs = np.abs(anasum_times.mjd - g_time_value)
+                closest_idx = np.argmin(time_diffs)
+
+                if float(time_diffs[closest_idx]) < (30 / 1440):  # 30 minutes in days
+                    a_flux = anasum_flux.iloc[closest_idx]
+
+                    # Skip points with very low flux
+                    if hasattr(g_flux, "to_value"):
+                        g_flux_val = g_flux.to_value(u.Unit("1 / (cm2 s)"))
+                    else:
+                        g_flux_val = float(g_flux)
+                    if g_flux_val < 1e-15 or float(a_flux) < 1e-15:
+                        continue
+
+                    try:
+                        g_flux_val = float(g_flux)
+                        a_flux_val = float(a_flux)
+
+                        rel_diff = (g_flux_val - a_flux_val) / a_flux_val * 100
+
+                        if not np.isfinite(rel_diff):
+                            continue
+
+                        g_rel_err = float(g_err) / a_flux_val * 100
+
+                        if np.isfinite(rel_diff) and np.isfinite(g_rel_err):
+                            rel_diffs.append(rel_diff)
+                            rel_diff_errors.append(g_rel_err)
+                            matched_times.append(g_time)
+
+                    except (ValueError, ZeroDivisionError):
+                        continue
+
+            if not rel_diffs:
+                logger.warning(f"No valid matching time points found for {knn_val}, skipping")
+                continue
+
+            # Plot this kNN's relative difference
+            matched_times = Time(matched_times, format="mjd")
+            ax.errorbar(
+                matched_times.plot_date,
+                rel_diffs,
+                yerr=rel_diff_errors,
+                fmt=markers[idx],
+                color=colors[idx],
+                label=f"{knn_val}",
+                alpha=0.7,
+            )
+
+            # Calculate statistics for this kNN value
+            rel_diffs_array = np.array(rel_diffs)
+            all_stats[knn_val] = {
+                "mean": np.mean(rel_diffs_array),
+                "median": np.median(rel_diffs_array),
+                "std": np.std(rel_diffs_array),
+            }
+
+        except Exception:
+            logger.error(f"Error processing {knn_val}")
+            continue
+
+    # If we didn't plot anything, return
+    if not all_stats:
+        logger.warning("No data to plot for any kNN value, skipping combined plot")
+        plt.close(fig)
+        return
+
+    ax.axhline(y=0, color="k", linestyle="-", alpha=0.3)
+
+    ax.set_ylabel(r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$")
+
+    ax.set_title(f"Relative Flux Difference\n{runlist_name}")
+    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
+    ax.xaxis.set_major_locator(mdates.AutoDateLocator())
+    fig.autofmt_xdate()
+    handles, labels = ax.get_legend_handles_labels()
+
+    stats_text = []
+    for knn_val in all_stats:
+        stats = all_stats[knn_val]
+        stats_text.append(
+            f"{knn_val}:\n"
+            f"  Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, c3: {stats['std']:.2f}%"
+        )
+
+    ax.legend(handles, labels)
+
+    stat_box = "\n".join(stats_text)
+    ax.text(
+        0.02,
+        0.02,
+        stat_box,
+        transform=ax.transAxes,
+        fontsize=9,
+        verticalalignment="bottom",
+        horizontalalignment="left",
+        bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
+    )
+
+    # Save the combined plot
+    output_dir = Path("./validation_plots/combined")
+    output_dir.mkdir(exist_ok=True, parents=True)
+    output_path = output_dir / f"combined_relative_diff_{runlist_name}.png"
+    fig.savefig(output_path, bbox_inches="tight")
+    plt.close(fig)
+
+    logger.info(f"Combined relative difference plot saved to {output_path}")
+
+
+def main(args=None):
+    gc.collect()
     parser = argparse.ArgumentParser()
     parser.add_argument("--runlist", required=True, help="Path to runlist file")
-    args = parser.parse_args()
+    parser.add_argument(
+        "--datastore",
+        default="./hdu-index.fits.gz",
+        help="Path to the datastore index file",
+    )
+    parser.add_argument(
+        "--only-relative",
+        action="store_true",
+        help="Only create relative difference plot",
+    )
+    parser.add_argument(
+        "--no-relative",
+        action="store_true",
+        help="Skip creating relative difference plot",
+    )
 
-    obs_ids = read_obs_ids(args.runlist)
-    data_store = DataStore.from_file(filename="./FITS/hdu-index.fits.gz")
+    if args is None:
+        parsed_args = parser.parse_args()
+    else:
+        parsed_args = parser.parse_args(args)
+
+    obs_ids = read_obs_ids(parsed_args.runlist)
+    data_store = DataStore.from_file(filename=parsed_args.datastore)
 
     exclusion_mask = make_exclusion_mask()
     datasets = make_datasets(data_store, obs_ids, exclusion_mask)
     lightcurve = estimate_lightcurve(datasets)
-    plot_lightcurve(args, lightcurve)
+
+    # Only create the plots specified by the arguments
+    if not hasattr(parsed_args, "only_relative") or not parsed_args.only_relative:
+        plot_lightcurve(parsed_args, lightcurve)
+
+    if (not hasattr(parsed_args, "no_relative") or not parsed_args.no_relative) and (
+        not hasattr(parsed_args, "only_relative") or parsed_args.only_relative
+    ):
+        plot_relative_difference(parsed_args, lightcurve)
+
+    gc.collect()
+
 
 if __name__ == "__main__":
     main()
-
diff --git a/release_tests/gammapy/runlist_lightcurve_runner.py b/release_tests/gammapy/runlist_lightcurve_runner.py
index ed22a02..35ef624 100644
--- a/release_tests/gammapy/runlist_lightcurve_runner.py
+++ b/release_tests/gammapy/runlist_lightcurve_runner.py
@@ -1,9 +1,56 @@
+import gc
+import logging
+import multiprocessing as mp
 import os
 import re
-import sys
+from functools import partial
 from pathlib import Path
-from gammapy.data import DataStore
+
 import crab_lightcurve_validation
+from gammapy.data import DataStore
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(processName)s - %(message)s")
+logger = logging.getLogger(__name__)
+
+DATA_DIR = "/home/eshita/Documents/VERITAS/"
+
+
+def generate_index_files(runlist_path, runlist_name):
+    path = Path(DATA_DIR + "Data/kNN/v2dl3_moderate2tel_point-like_knn05_MinMaxScaler")
+
+    # Extract the runlist subfolder name from the runlist filename
+    folder_match = re.search(r"runlist_releaseTestingV6_(.*)", Path(runlist_name).stem)
+    if folder_match:
+        subfolder = folder_match.group(1)
+    else:
+        subfolder = Path(runlist_name).stem  # Use full name if pattern doesn't match
+
+    # Read the run numbers from a file
+    with open(runlist_path, "r") as f:
+        selected_runs = {
+            int(line.strip()) for line in f if line.strip().isdigit()
+        }  # Convert to a set of integers
+    # Find all FITS files and filter by run number
+    paths = [
+        p
+        for p in path.rglob("*.fits.gz")
+        if any(f"{run}." in p.name for run in selected_runs)  # Match run number in filename
+    ]
+
+    if not paths:
+        raise ValueError(f"No matching FITS files found for {runlist_name}")
+
+    data_store = DataStore.from_events_files(paths)
+
+    # Create output directory
+    output_dir = Path(f"./FITS/knn05_MinMaxScaler/{subfolder}")
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Write the index files
+    data_store.hdu_table.write(output_dir / "hdu-index.fits.gz", overwrite=True)
+    data_store.obs_table.write(output_dir / "obs-index.fits.gz", overwrite=True)
+
+    return output_dir
 
 
 # Function to read all run IDs from a runlist file
@@ -11,40 +58,60 @@ def read_runs_from_file(file_path):
     with open(file_path) as f:
         return [int(line.strip()) for line in f if line.strip()]
 
-# Directory containing runlists
-download_dir = Path("/EventDisplay_Release_v491/Crab/runlists")
-all_files = os.listdir(download_dir)
-runlist_files = [f for f in all_files if re.match(r'runlist_releaseTesting.*\.dat', f)]
 
-print(f"Found {len(runlist_files)} runlist files.")
+def process_runlist(download_dir, runlist):
+    try:
+        local_path = download_dir / runlist
+        logger.info(f"Starting to process {runlist}")
 
+        # Check if combined plot already exists
+        runlist_name = Path(runlist).stem
+        combined_plot_path = Path(
+            f"./validation_plots/combined/combined_relative_diff_{runlist_name}.png"
+        )
 
-# Collect all unique run IDs
-all_runs = set()
-for runlist in runlist_files:
-    local_path = download_dir / runlist
-    runs = read_runs_from_file(local_path)
-    all_runs.update(runs)
+        if combined_plot_path.exists():
+            logger.info(f"Combined plot already exists for {runlist}, skipping processing")
+            return True
 
-print(f"Found {len(all_runs)} unique runs across all runlists.")
+        # Generate index files first if they don't exist
+        logger.info(f"Generating index files for {runlist} if needed...")
+        output_dir = generate_index_files(local_path, runlist)
+        datastore_path = output_dir / "hdu-index.fits.gz"
 
-# Create a combined runlist file
-combined_runlist_path = Path("./combined_runlist.dat")
-with open(combined_runlist_path, "w") as f:
-    for run in sorted(all_runs):
-        f.write(f"{run}\n")
+        logger.info(f"Creating plots for {runlist}")
+        crab_lightcurve_validation.main(
+            ["--runlist", str(local_path), "--datastore", str(datastore_path), "--no-relative"]
+        )
+        crab_lightcurve_validation.plot_combined_relative_differences(str(local_path))
 
-print(f"Created combined runlist with {len(all_runs)} runs at {combined_runlist_path}")
+        logger.info(f"Successfully completed processing {runlist}")
+        return True
+    except Exception as e:
+        logger.error(f"Error processing {runlist}: {e}")
+        return False
 
 
-combined_runlist_path = Path("./combined_runlist.dat")
+if __name__ == "__main__":
+    # Directory containing runlists
+    download_dir = Path(DATA_DIR + "EventDisplay_Release_v491/Crab/runlists")
+    all_files = os.listdir(download_dir)
+    runlist_files = [f for f in all_files if re.match(r"runlist_releaseTestingV6_..*\.dat", f)]
 
-for runlist in runlist_files:
-    local_path = download_dir / runlist
-    print(f"Processing {runlist}")
-    sys.argv = [
-        "crab_lightcurve_validation.py",
-        "--runlist",
-        str(local_path)
-    ]
-    crab_lightcurve_validation.main()
\ No newline at end of file
+    print(f"Found {len(runlist_files)} runlist files")
+
+    # Number of parallel processes to use
+    num_processes = max(1, mp.cpu_count() - 1)
+    logger.info(f"Using {num_processes} parallel processes")
+
+    # Create a partial function with the download_dir argument fixed
+    process_func = partial(process_runlist, download_dir)
+
+    # Process runlists in parallel
+    with mp.Pool(processes=num_processes) as pool:
+        results = pool.map(process_func, runlist_files)
+
+    # Summary of results
+    successful = sum(results)
+    logger.info(f"Processing complete: {successful} successful.")
+    gc.collect()

From 0e7abd3a29ee0cac31c2533f56795e7ebea41a33 Mon Sep 17 00:00:00 2001
From: Eshita Joshi <eshita.joshi@desy.de>
Date: Fri, 13 Jun 2025 11:49:35 +0200
Subject: [PATCH 4/4] plotting high diff runs

---
 .../gammapy/crab_lightcurve_validation.py     | 566 +++++++++++-------
 1 file changed, 361 insertions(+), 205 deletions(-)

diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py
index 2cc8f49..51cf5e4 100644
--- a/release_tests/gammapy/crab_lightcurve_validation.py
+++ b/release_tests/gammapy/crab_lightcurve_validation.py
@@ -14,7 +14,7 @@
         "--datastore",
         def        stats_text.append(
             f"{knn_val}:\n"
-            f"  Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, σ: {stats['std']:.2f}%"
+            f"  Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, \sigma: {stats['std']:.2f}%"
         )="./hdu-index.fits.gz",
         help="Path to the datastore index file",
     )
@@ -66,7 +66,7 @@ def        stats_text.append(
 import re
 import traceback
 from pathlib import Path
-
+from gammapy.estimators import FluxPointsEstimator, FluxPoints
 import astropy.units as u
 import matplotlib.dates as mdates
 import matplotlib.pyplot as plt
@@ -75,7 +75,7 @@ def        stats_text.append(
 from astropy.coordinates import SkyCoord
 from astropy.time import Time
 from gammapy.data import DataStore
-from gammapy.datasets import Datasets, SpectrumDataset
+from gammapy.datasets import Datasets, SpectrumDataset, FluxPointsDataset
 from gammapy.estimators import LightCurveEstimator
 from gammapy.makers import (
     ReflectedRegionsBackgroundMaker,
@@ -104,20 +104,18 @@ def make_exclusion_mask():
     )
 
     target_position = SkyCoord(ra=83.6333, dec=22.0145, unit="deg", frame="icrs")
-    on_region_radius = 0.3 * u.deg
+    on_region_radius = 0.4 * u.deg
     on_region = CircleSkyRegion(center=target_position, radius=on_region_radius)
 
     exclusions = [
-        CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=0.3 * u.deg)
-        for ra, dec in [
-            (81.9087, 21.937),
-            (82.6806, 22.4623),
-            (83.4118, 20.4742),
-            (84.1099, 21.9931),
-            (84.4112, 21.1425),
-            (84.8629, 21.7629),
-            (85.4782, 23.3262),
-            (85.5166, 22.6603),
+        CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=radius * u.deg)
+        for ra, dec, radius in [
+            (81.9087, 21.937, 0.3),
+            (82.6806, 22.4623, 0.3),
+            (83.4118, 20.4742, 0.3),
+            (84.4112, 21.1425, 0.3),
+            (84.8629, 21.7629, 0.3),
+            (85.4782, 23.3262, 0.3),
         ]
     ]
 
@@ -126,7 +124,11 @@ def make_exclusion_mask():
 
     fig = plt.figure()
     exclusion_mask.plot()
+    plt.savefig("/home/eshita/Documents/VERITAS/EventDisplay_ReleaseTests_code/release_tests/validation_plots/exclusion_mask.png")
     plt.close(fig)
+    plt.cla()
+    plt.clf()
+    gc.collect()
     return exclusion_mask
 
 
@@ -141,9 +143,6 @@ def make_datasets(data_store, obs_ids, exclusion_mask):
         geom=geom, energy_axis_true=energy_axis_true, name="crab"
     )
 
-    wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02)
-    exclusion_mask = wcsgeom.region_mask(geom.region, inside=False)
-
     maker = SpectrumDatasetMaker(
         containment_correction=False, selection=["counts", "exposure", "edisp"]
     )
@@ -193,28 +192,47 @@ def plot_lightcurve(args, lightcurve):
         figsize=(8, 6),
         gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98},
     )
-    ax.set_ylim(1e-12, 1e-9)
+    ax.set_ylim(1e-11, 0.5e-9)
     lightcurve.plot(ax=ax, sed_type="flux", marker="o", label="Gammapy")
+    
+    # Calculate statistics for Gammapy data
+    flux_table = lightcurve.to_table(format="lightcurve", sed_type="flux")
+    gammapy_flux = flux_table['flux'].quantity.value
+    
+    gammapy_mean = np.nanmean(gammapy_flux)
+    gammapy_median = np.nanmedian(gammapy_flux)
+    gammapy_std = np.nanstd(gammapy_flux)
+
+    # Format for scientific notation
+    gammapy_stats_text = (
+        f"Gammapy Stats:\n"
+        f"Mean: {gammapy_mean:.2e} cm$^{{-2}}$ s$^{{-1}}$\n"
+        f"Median: {gammapy_median:.2e} cm$^{{-2}}$ s$^{{-1}}$\n"
+        f"$\sigma$: {gammapy_std:.2e} cm$^{{-2}}$ s$^{{-1}}$"
+    )
 
     ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
     ax.xaxis.set_major_locator(mdates.AutoDateLocator())
     fig.autofmt_xdate()
 
     runlist_name = Path(args.runlist).stem
-    folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name)
+    folder_match = re.search(r"runlist_releaseTestingV6(.*)", runlist_name)
 
-    if folder_match:
+    if True: #folder_match:
         folder_name = folder_match.group(1)
         # Try to find and load the reference CSV file
-        base_path = f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel"
-        csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv")
+        #base_path = f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel"
+        csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv') #Path(f"{base_path}/V6_{folder_name}/LightCurve.csv")
 
         if csv_path.exists():
+            print()
+            print(f"Loading reference data from {csv_path}")
+            print()
             try:
                 ref_data = pd.read_csv(csv_path)
 
-                if "MDJ" in ref_data.columns and "Flux" in ref_data.columns:
-                    times = Time(ref_data["MDJ"], format="mjd")
+                if "MJD" in ref_data.columns and "Flux" in ref_data.columns:
+                    times = Time(ref_data["MJD"], format="mjd")
                     plot_times = [t.plot_date for t in times]
 
                     yerr = None
@@ -230,9 +248,26 @@ def plot_lightcurve(args, lightcurve):
                         label="Anasum",
                     )
 
+                    # Calculate statistics for Anasum data
+                    anasum_flux = ref_data["Flux"].values
+                    anasum_mean = np.mean(anasum_flux)
+                    anasum_median = np.median(anasum_flux)
+                    anasum_std = np.std(anasum_flux)
+
+                    # Add Anasum stats to the text
+                    anasum_stats_text = (
+                        f"Anasum Stats:\n"
+                        f"Mean: {anasum_mean:.2e} cm$^{{-2}}$ s$^{{-1}}$\n"
+                        f"Median: {anasum_median:.2e} cm$^{{-2}}$ s$^{{-1}}$\n"
+                        f"$\sigma$: {anasum_std:.2e} cm$^{{-2}}$ s$^{{-1}}$"
+                    )
+                    
+                    # Combined stats text
+                    stats_text = f"{gammapy_stats_text}\n\n{anasum_stats_text}"
+                    
                     if "MJD_width" in ref_data.columns:
                         for i, (mjd, flux, width) in enumerate(
-                            zip(ref_data["MDJ"], ref_data["Flux"], ref_data["MJD_width"])
+                            zip(ref_data["MJD"], ref_data["Flux"], ref_data["MJD_width"])
                         ):
                             left = Time(mjd - width / 2, format="mjd").plot_date
                             right = Time(mjd + width / 2, format="mjd").plot_date
@@ -240,11 +275,26 @@ def plot_lightcurve(args, lightcurve):
 
                     logging.info(f"Added reference data from {csv_path}")
                 else:
-                    logging.warning("CSV file missing required columns (MDJ, Flux)")
+                    logging.warning("CSV file missing required columns (MJD, Flux)")
+                    stats_text = gammapy_stats_text
             except Exception as e:
                 logging.warning(f"Failed to load or plot reference data: {e}")
+                stats_text = gammapy_stats_text
+        else:
+            stats_text = gammapy_stats_text
+        
+    # Add stats text box
+    ax.text(
+        0.05,
+        0.05,
+        stats_text,
+        transform=ax.transAxes,
+        fontsize=9,
+        verticalalignment="bottom",
+        bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
+    )
 
-    plt.legend()
+    plt.legend(loc='upper left')
 
     output_dir = Path("./validation_plots/knn05_MinMaxScaler")
     output_dir.mkdir(exist_ok=True, parents=True)
@@ -254,9 +304,28 @@ def plot_lightcurve(args, lightcurve):
     logging.info(f"Lightcurve plot saved to {output_path}")
     plt.close()
     plt.clf()
+    plt.cla()
     gc.collect()
 
 
+def get_anasum_runs_with_zenith(log_path):
+    """
+    Parse the anasum log file and return a set of run numbers with specified zenith
+    """
+    runs = set()
+    pattern = re.compile(r"RUN\s+(\d+).*?at\s+(\d+),")
+    with open(log_path) as f:
+        for line in f:
+            match = pattern.search(line)
+            if match:
+                run = int(match.group(1))
+                elevation = float(match.group(2))
+                zenith = 90 - elevation
+                if zenith < 55:
+                    runs.add(run)
+    return runs
+
+
 def plot_relative_difference(args, lightcurve):
     """Create a plot showing the relative difference between Gammapy and Anasum values."""
     runlist_name = Path(args.runlist).stem
@@ -271,180 +340,122 @@ def plot_relative_difference(args, lightcurve):
         )
         return
 
-    # Continue with existing code...
-    folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name)
-
-    if not folder_match:
-        logger.warning(f"Could not extract folder name from {runlist_name}, skipping relative plot")
+    log_path = "/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/anasum_releaseTestingV6.combined.log"
+    zenith_runs = get_anasum_runs_with_zenith(log_path)
+
+    # Read runs from runlist
+    with open(args.runlist) as f:
+        all_run_ids = [int(line.strip()) for line in f if line.strip()]
+    # Filter to only those with zenith < 55
+    filtered_run_ids = [run for run in all_run_ids if run in zenith_runs]
+
+    # Get Gammapy data
+    flux_table = lightcurve.to_table(format="lightcurve", sed_type="flux")
+    gammapy_flux = flux_table['flux'].quantity.value
+    gammapy_flux_err = flux_table['flux_err'].quantity.value
+
+    # Filter Gammapy data by run id
+    gammapy_run_ids = all_run_ids[:len(gammapy_flux)] if len(gammapy_flux) < len(all_run_ids) else all_run_ids
+    gammapy_flux_by_run = dict(zip(gammapy_run_ids, gammapy_flux))
+    gammapy_flux_err_by_run = dict(zip(gammapy_run_ids, gammapy_flux_err))
+
+    # Get Anasum data
+    csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv')
+    ref_data = pd.read_csv(csv_path)
+    anasum_flux = ref_data["Flux"].values
+    anasum_flux_err = ref_data.get("FluxError", np.zeros_like(anasum_flux))
+    for col in ref_data.columns:
+        if col.lower() in ["run", "runid", "obs_id", "obsid"]:
+            anasum_run_col = col
+            break
+    if anasum_run_col is not None:
+        print('anasum run col not none')
+        anasum_run_ids = ref_data[anasum_run_col].astype(int).values
+        anasum_flux_by_run = dict(zip(anasum_run_ids, anasum_flux))
+        anasum_flux_err_by_run = dict(zip(anasum_run_ids, anasum_flux_err))
+    else:
+        logger.error("No run number column found in Anasum CSV")
         return
 
-    folder_name = folder_match.group(1)
-    csv_path = Path(
-        f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6_{folder_name}/LightCurve.csv"
-    )
-
-    if not csv_path.exists():
-        logger.warning(f"No reference CSV found at {csv_path}, skipping relative plot")
+    # Now match only filtered runs
+    matched_gammapy_flux = []
+    matched_gammapy_flux_err = []
+    matched_anasum_flux = []
+    matched_anasum_flux_err = []
+    valid_run_ids = []
+    for run_id in filtered_run_ids:
+        if run_id in gammapy_flux_by_run and run_id in anasum_flux_by_run:
+            matched_gammapy_flux.append(gammapy_flux_by_run[run_id])
+            matched_gammapy_flux_err.append(gammapy_flux_err_by_run[run_id])
+            matched_anasum_flux.append(anasum_flux_by_run[run_id])
+            matched_anasum_flux_err.append(anasum_flux_err_by_run[run_id])
+            valid_run_ids.append(run_id)
+    if len(matched_gammapy_flux) == 0:
+        logger.error("No matching runs found between Gammapy and Anasum")
         return
+    logger.info(f"Comparing {len(matched_gammapy_flux)} runs between Gammapy and Anasum")
+    gc.collect()
+    # Calculate relative differences
+    rel_diffs = []
+    rel_diff_errors = []
+    for g_flux, g_err, a_flux, a_err in zip(matched_gammapy_flux, matched_gammapy_flux_err, matched_anasum_flux, matched_anasum_flux_err):
+        if not np.isfinite(g_flux) or not np.isfinite(a_flux):
+            continue
+        rel_diff = (g_flux - a_flux) / a_flux * 100
+        if a_err > 0:
+            rel_diff_err = np.sqrt((g_err / a_flux) ** 2 + (a_err * g_flux / (a_flux ** 2)) ** 2) * 100
+        else:
+            rel_diff_err = g_err / a_flux * 100
+        if np.isfinite(rel_diff) and np.isfinite(rel_diff_err):
+            rel_diffs.append(rel_diff)
+            rel_diff_errors.append(rel_diff_err)
+    if not rel_diffs:
+        logger.warning("No valid matching points found for relative difference plot")
+        return
+    fig, ax = plt.subplots(
+        figsize=(8, 6),
+        gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98},
+    )
+    rel_diffs = np.array(rel_diffs).flatten()
+    rel_diff_errors = np.array(rel_diff_errors).flatten()
+    ax.errorbar(
+        np.arange(len(rel_diffs)),
+        rel_diffs,
+        yerr=rel_diff_errors,
+        fmt="o",
+        color="purple",
+        label="Relative Difference",
+    )
+    ax.axhline(y=0, color="k", linestyle="-", alpha=0.3)
+    ax.set_ylabel(
+        r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$"
+    )
+    ax.set_title(f"Relative Flux Difference\n{runlist_name}")
+    ax.set_xlabel("Index")
+    fig.autofmt_xdate()
+    rel_diffs_array = np.array(rel_diffs)
+    mean_diff = np.mean(rel_diffs_array)
+    median_diff = np.median(rel_diffs_array)
+    std_diff = np.std(rel_diffs_array)
+    stats_text = (
+        f"Mean: {mean_diff:.2f}%\n" f"Median: {median_diff:.2f}%\n" f"Std Dev: {std_diff:.2f}%"
+    )
+    ax.text(
+        0.05,
+        0.95,
+        stats_text,
+        transform=ax.transAxes,
+        fontsize=10,
+        verticalalignment="top",
+        bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
+    )
+    fig.savefig(output_path, bbox_inches="tight")
+    plt.close(fig)
+    plt.clf()
+    gc.collect()
+    logger.info(f"Relative difference plot saved to {output_path}")
+    
 
-    try:
-        try:
-            ref_data = pd.read_csv(csv_path)
-            if len(ref_data) == 0:
-                logger.warning("Anasum CSV file is empty, skipping relative plot")
-                return
-        except Exception as e:
-            logger.warning(f"Failed to load Anasum data: {e}")
-            return
-
-        temp_fig, temp_ax = plt.subplots()
-        lightcurve.plot(ax=temp_ax, sed_type="flux")
-        lines = [child for child in temp_ax.get_children() if isinstance(child, plt.Line2D)]
-        if not lines or len(lines[0].get_xdata()) == 0:
-            logger.warning("No Gammapy flux data found, skipping relative plot")
-            plt.close(temp_fig)
-            return
-
-        gammapy_times_plot = lines[0].get_xdata()
-        gammapy_flux = lines[0].get_ydata()
-
-        if len(gammapy_times_plot) == 0:
-            logger.warning("No Gammapy time points found, skipping relative plot")
-            plt.close(temp_fig)
-            return
-
-        gammapy_flux_err = lightcurve.to_table(format="lightcurve", sed_type="flux")[
-            "flux_err"
-        ].quantity.value
-
-        # Convert plot x-values (dates) to MJD format for comparison
-        if isinstance(gammapy_times_plot[0], float):
-            gammapy_times = np.array([mdates.num2date(t).timestamp() for t in gammapy_times_plot])
-            gammapy_times = Time(gammapy_times, format="unix").mjd
-        elif isinstance(gammapy_times_plot[0], (np.datetime64, datetime.datetime)):
-            gammapy_times = Time(
-                [t.isoformat() if hasattr(t, "isoformat") else t for t in gammapy_times_plot]
-            ).mjd
-        plt.close(temp_fig)
-
-        ref_data = pd.read_csv(csv_path)
-
-        anasum_times = Time(ref_data["MDJ"], format="mjd")
-        anasum_flux = ref_data["Flux"]
-        anasum_flux_err = ref_data.get("FluxError", np.zeros_like(anasum_flux))
-
-        rel_diffs = []
-        rel_diff_errors = []
-        matched_times = []
-
-        total_gammapy_points = len(gammapy_times)
-        logger.info(f"Total Gammapy points: {total_gammapy_points}")
-
-        for g_time, g_flux, g_err in zip(gammapy_times, gammapy_flux, gammapy_flux_err):
-            # Find closest Anasum time point within tolerance
-            g_time_value = g_time.value if hasattr(g_time, "value") else g_time
-            time_diffs = np.abs(anasum_times.mjd - g_time_value)
-            closest_idx = np.argmin(time_diffs)
-
-            if float(time_diffs[closest_idx]) < (30 / 1440):  # 30 minutes in days
-                a_flux = anasum_flux.iloc[closest_idx]
-
-                # Skip extreme outliers where flux is too low
-                if hasattr(g_flux, "to_value"):
-                    g_flux_val = g_flux.to_value(u.Unit("1 / (cm2 s)"))
-                else:
-                    g_flux_val = float(g_flux)
-                if g_flux_val < 1e-15 or float(a_flux) < 1e-15:
-                    continue
-
-                a_flux_val = float(a_flux)
-                rel_diff = (g_flux_val - a_flux_val) / a_flux_val * 100
-
-                if hasattr(g_err, "to_value"):
-                    g_rel_err = g_err.to_value("") / a_flux_val * 100
-                else:
-                    g_rel_err = float(g_err) / a_flux_val * 100
-
-                # Calculate error on relative difference
-                g_rel_err = float(g_err) / a_flux_val * 100
-                a_rel_err = 0
-                if anasum_flux_err is not None and not pd.isna(anasum_flux_err.iloc[closest_idx]):
-                    a_err = float(anasum_flux_err.iloc[closest_idx])
-                    a_rel_err = a_err / a_flux_val * 100
-
-                # Combine errors in quadrature
-                rel_diff_err = np.sqrt(g_rel_err**2 + a_rel_err**2)
-
-                # Only append if finite
-                if np.isfinite(rel_diff) and np.isfinite(rel_diff_err):
-                    rel_diffs.append(rel_diff)
-                    rel_diff_errors.append(rel_diff_err)
-                    matched_times.append(g_time)
-
-        if not rel_diffs:
-            logger.warning("No valid matching time points found for relative difference plot")
-            return
-
-        fig, ax = plt.subplots(
-            figsize=(8, 6),
-            gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98},
-        )
-
-        matched_times = Time(matched_times, format="mjd")
-
-        ax.errorbar(
-            matched_times.plot_date,
-            rel_diffs,
-            yerr=rel_diff_errors,
-            fmt="o",
-            color="purple",
-            label="Relative Difference",
-        )
-
-        ax.axhline(y=0, color="k", linestyle="-", alpha=0.3)
-
-        ax.set_ylabel(
-            r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$"
-        )
-        ax.set_title(f"Relative Flux Difference\n{runlist_name}")
-
-        ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
-        ax.xaxis.set_major_locator(mdates.AutoDateLocator())
-        fig.autofmt_xdate()
-
-        rel_diffs_array = np.array(rel_diffs)
-        mean_diff = np.mean(rel_diffs_array)
-        median_diff = np.median(rel_diffs_array)
-        std_diff = np.std(rel_diffs_array)
-
-        stats_text = (
-            f"Mean: {mean_diff:.2f}%\n" f"Median: {median_diff:.2f}%\n" f"Std Dev: {std_diff:.2f}%"
-        )
-
-        ax.text(
-            0.05,
-            0.95,
-            stats_text,
-            transform=ax.transAxes,
-            fontsize=10,
-            verticalalignment="top",
-            bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
-        )
-
-        output_dir = Path("./validation_plots/knn05_MinMaxScaler")
-        output_dir.mkdir(exist_ok=True, parents=True)
-
-        output_path = output_dir / f"relative_diff_{runlist_name}.png"
-        fig.savefig(output_path, bbox_inches="tight")
-        plt.close(fig)
-        plt.clf()
-        gc.collect()
-        logger.info(f"Relative difference plot saved to {output_path}")
-
-    except Exception as e:
-        logger.error(f"Error creating relative difference plot: {e}", exc_info=True)
-
-        logger.error(traceback.format_exc())
 
 
 def plot_combined_relative_differences(runlist_path):
@@ -452,7 +463,7 @@ def plot_combined_relative_differences(runlist_path):
 
     # Get runlist name and subfolder
     runlist_name = Path(runlist_path).stem
-    folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name)
+    folder_match = re.search(r"runlist_releaseTestingV6(.*)", runlist_name)
 
     if not folder_match:
         logger.warning(f"Could not extract folder name from {runlist_name}, skipping combined plot")
@@ -460,7 +471,7 @@ def plot_combined_relative_differences(runlist_path):
 
     folder_name = folder_match.group(1)
     base_path = f"{DATA_DIR}EventDisplay_Release_v491/Crab/V6_moderate2tel"
-    anasum_csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv")
+    anasum_csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv') #Path(f"{base_path}/V6_{folder_name}/LightCurve.csv")
 
     if not anasum_csv_path.exists():
         logger.warning(f"No reference CSV found at {anasum_csv_path}, skipping combined plot")
@@ -469,7 +480,7 @@ def plot_combined_relative_differences(runlist_path):
     # Load Anasum reference data
     try:
         ref_data = pd.read_csv(anasum_csv_path)
-        anasum_times = Time(ref_data["MDJ"], format="mjd")
+        anasum_times = Time(ref_data["MJD"], format="mjd")
         anasum_flux = ref_data["Flux"]
     except Exception as e:
         logger.warning(f"Failed to load Anasum data: {e}")
@@ -489,13 +500,13 @@ def plot_combined_relative_differences(runlist_path):
 
     for idx, knn_val in enumerate(knn_values):
         try:
-            datastore_path = Path(f"./FITS/{knn_val}/{folder_name}/hdu-index.fits.gz")
+            datastore_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_ReleaseTests_code/FITS/knn05_MinMaxScaler/V6/') #Path(f"./FITS/{knn_val}/{folder_name}/hdu-index.fits.gz")
             if not datastore_path.exists():
                 logger.warning(f"No datastore found at {datastore_path}, skipping {knn_val}")
                 continue
 
             obs_ids = read_obs_ids(runlist_path)
-            data_store = DataStore.from_file(filename=str(datastore_path))
+            data_store = DataStore.from_dir(filename=str(datastore_path))
             exclusion_mask = make_exclusion_mask()
             datasets = make_datasets(data_store, obs_ids, exclusion_mask)
             lightcurve = estimate_lightcurve(datasets)
@@ -542,7 +553,7 @@ def plot_combined_relative_differences(runlist_path):
                 time_diffs = np.abs(anasum_times.mjd - g_time_value)
                 closest_idx = np.argmin(time_diffs)
 
-                if float(time_diffs[closest_idx]) < (30 / 1440):  # 30 minutes in days
+                if float(time_diffs[closest_idx]) <= 0.001:
                     a_flux = anasum_flux.iloc[closest_idx]
 
                     # Skip points with very low flux
@@ -553,6 +564,7 @@ def plot_combined_relative_differences(runlist_path):
                     if g_flux_val < 1e-15 or float(a_flux) < 1e-15:
                         continue
 
+
                     try:
                         g_flux_val = float(g_flux)
                         a_flux_val = float(a_flux)
@@ -621,7 +633,7 @@ def plot_combined_relative_differences(runlist_path):
         stats = all_stats[knn_val]
         stats_text.append(
             f"{knn_val}:\n"
-            f"  Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, c3: {stats['std']:.2f}%"
+            f"  Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, $\sigma$: {stats['std']:.2f}%"
         )
 
     ax.legend(handles, labels)
@@ -674,11 +686,15 @@ def main(args=None):
         parsed_args = parser.parse_args(args)
 
     obs_ids = read_obs_ids(parsed_args.runlist)
-    data_store = DataStore.from_file(filename=parsed_args.datastore)
+    data_store = DataStore.from_dir(parsed_args.datastore)
+
 
     exclusion_mask = make_exclusion_mask()
     datasets = make_datasets(data_store, obs_ids, exclusion_mask)
     lightcurve = estimate_lightcurve(datasets)
+    
+    # Use the correct, index-based function
+    identify_and_plot_high_diff_runs(parsed_args, lightcurve, threshold=50)
 
     # Only create the plots specified by the arguments
     if not hasattr(parsed_args, "only_relative") or not parsed_args.only_relative:
@@ -689,8 +705,148 @@ def main(args=None):
     ):
         plot_relative_difference(parsed_args, lightcurve)
 
+    #plot_energy_spectra(parsed_args, lightcurve)
+
     gc.collect()
 
 
 if __name__ == "__main__":
     main()
+
+def identify_and_plot_high_diff_runs(args, lightcurve, threshold=50):
+    """
+    Identifies indices with relative flux difference > threshold percent (or < -threshold percent)
+    and creates a plot showing just these indices for both Gammapy and Anasum.
+    Applies zenith cuts using the anasum log file.
+    """
+    runlist_name = Path(args.runlist).stem
+    csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv')
+    log_path = "/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/anasum_releaseTestingV6.combined.log"
+    if not csv_path.exists():
+        logger.warning(f"No reference CSV found at {csv_path}, skipping high diff analysis")
+        return
+    try:
+        zenith_runs = get_anasum_runs_with_zenith(log_path)
+
+        # Read runs from runlist and filter by zenith cut
+        with open(args.runlist) as f:
+            all_run_ids = [int(line.strip()) for line in f if line.strip()]
+        filtered_run_ids = [run for run in all_run_ids if run in zenith_runs]
+
+        # Get Gammapy data
+        flux_table = lightcurve.to_table(format="lightcurve", sed_type="flux")
+        gammapy_flux = flux_table['flux'].quantity.value
+        gammapy_flux_err = flux_table['flux_err'].quantity.value
+
+        # Filter Gammapy data by run id
+        gammapy_run_ids = all_run_ids[:len(gammapy_flux)] if len(gammapy_flux) < len(all_run_ids) else all_run_ids
+        gammapy_flux_by_run = dict(zip(gammapy_run_ids, gammapy_flux))
+        gammapy_flux_err_by_run = dict(zip(gammapy_run_ids, gammapy_flux_err))
+
+        # Get Anasum data
+        anasum_data = pd.read_csv(csv_path)
+        anasum_flux = anasum_data["Flux"].values
+        anasum_flux_err = anasum_data.get("FluxError", np.zeros_like(anasum_flux))
+        anasum_run_col = None
+        for col in anasum_data.columns:
+            if col.lower() in ["run", "runid", "obs_id", "obsid"]:
+                anasum_run_col = col
+                break
+        if anasum_run_col is not None:
+            anasum_run_ids = anasum_data[anasum_run_col].astype(int).values
+            anasum_flux_by_run = dict(zip(anasum_run_ids, anasum_flux))
+            anasum_flux_err_by_run = dict(zip(anasum_run_ids, anasum_flux_err))
+        else:
+            logger.error("No run number column found in Anasum CSV")
+            return
+
+        # Now match only filtered runs (zenith < 55 deg)
+        matched_gammapy_flux = []
+        matched_gammapy_flux_err = []
+        matched_anasum_flux = []
+        matched_anasum_flux_err = []
+        valid_run_ids = []
+        for run_id in filtered_run_ids:
+            if run_id in gammapy_flux_by_run and run_id in anasum_flux_by_run:
+                matched_gammapy_flux.append(gammapy_flux_by_run[run_id])
+                matched_gammapy_flux_err.append(gammapy_flux_err_by_run[run_id])
+                matched_anasum_flux.append(anasum_flux_by_run[run_id])
+                matched_anasum_flux_err.append(anasum_flux_err_by_run[run_id])
+                valid_run_ids.append(run_id)
+        if len(matched_gammapy_flux) == 0:
+            logger.error("No matching runs with zenith < 55found between Gammapy and Anasum")
+            return
+        logger.info(f"Comparing {len(matched_gammapy_flux)} runs with zenith < 55 between Gammapy and Anasum")
+
+        # Calculate relative differences
+        rel_diffs = np.array([((g - a) / a) * 100 for g, a in zip(matched_gammapy_flux, matched_anasum_flux)])
+        high_diff_indices = np.where((rel_diffs > threshold) | (rel_diffs < -threshold))[0]
+        if len(high_diff_indices) == 0:
+            logger.info(f"No indices found with >{threshold}% absolute difference")
+            return
+        # Save high difference runs to file
+        output_dir = Path("./validation_plots/high_diff_runs")
+        output_dir.mkdir(exist_ok=True, parents=True)
+        high_diff_runlist = output_dir / f"high_diff_runs_{runlist_name}.dat"
+        with open(high_diff_runlist, 'w') as f:
+            for idx in high_diff_indices:
+                f.write(f"{valid_run_ids[idx]}\n")
+        high_diff_details = output_dir / f"high_diff_details_{runlist_name}.csv"
+        with open(high_diff_details, 'w') as f:
+            f.write("Run,RelativeDifferencePercent\n")
+            for idx in high_diff_indices:
+                f.write(f"{valid_run_ids[idx]},{float(rel_diffs[idx]):.2f}\n")
+        logger.info(f"Found {len(high_diff_indices)} indices with >{threshold}% absolute difference")
+        logger.info(f"High difference runs saved to {high_diff_runlist}")
+        logger.info(f"Detailed information saved to {high_diff_details}")
+        # Extract data just for these indices
+        high_diff_gammapy_flux = [float(matched_gammapy_flux[i]) for i in high_diff_indices]
+        high_diff_anasum_flux = [float(matched_anasum_flux[i]) for i in high_diff_indices]
+        high_diff_times = [anasum_data["MJD"].values[anasum_data[anasum_run_col] == valid_run_ids[i]][0] if "MJD" in anasum_data.columns else i for i in high_diff_indices]
+        # Create the plot for high difference indices
+        output_path = output_dir / f"high_diff_{threshold}pct_{runlist_name}.png"
+        fig, ax = plt.subplots(figsize=(10, 6))
+        ax.set_yscale('log')
+        plot_times = Time(high_diff_times, format="mjd").plot_date if "MJD" in anasum_data.columns else high_diff_times
+        ax.errorbar(
+            plot_times,
+            high_diff_gammapy_flux,
+            fmt='o',
+            label="Gammapy",
+            color='blue'
+        )
+        ax.errorbar(
+            plot_times,
+            high_diff_anasum_flux,
+            fmt='s',
+            label="Anasum",
+            color='red'
+        )
+        for i, (x, y, run_id, rel_diff) in enumerate(zip(
+            plot_times, high_diff_gammapy_flux, [valid_run_ids[i] for i in range(len(high_diff_indices))],
+            [rel_diffs[idx] for idx in high_diff_indices]
+        )):
+            ax.annotate(
+                f"Run {run_id}\n({float(rel_diff):.1f}%)",
+                (x, y),
+                xytext=(0, 10),
+                textcoords='offset points',
+                ha='center',
+                fontsize=8
+            )
+        ax.set_title(f"Indices with >{threshold}% for zenith < 55deg, Flux Difference \n{runlist_name}")
+        ax.set_xlabel("Date" if "MJD" in anasum_data.columns else "Index")
+        ax.set_ylabel("Flux (cm$^{-2}$ s$^{-1}$)")
+        ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
+        ax.xaxis.set_major_locator(mdates.AutoDateLocator())
+        fig.autofmt_xdate()
+        plt.legend()
+        plt.tight_layout()
+        fig.savefig(output_path)
+        plt.close(fig)
+        logger.info(f"High difference plot saved to {output_path}")
+        return [valid_run_ids[i] for i in range(len(high_diff_indices))]
+    except Exception as e:
+        logger.error(f"Error identifying high difference indices: {e}", exc_info=True)
+        logger.error(traceback.format_exc())
+        return []
\ No newline at end of file