From cd4375cdca8a61cd205d884dbac4d5fd95d36fc4 Mon Sep 17 00:00:00 2001 From: tobiaskleiner Date: Mon, 19 May 2025 16:22:46 +0200 Subject: [PATCH 1/4] adding skeleton light curve validation script --- .../gammapy/crab_lightcurve_validation.py | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 release_tests/gammapy/crab_lightcurve_validation.py diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py new file mode 100644 index 0000000..7d2443f --- /dev/null +++ b/release_tests/gammapy/crab_lightcurve_validation.py @@ -0,0 +1,135 @@ + +""" +This script performs light curve estimation for Crab data using Gammapy v1.3. + +Functions: +- read_obs_ids(runlist_path): Reads obs IDs from a runlist file. +- make_exclusion_mask(): Creates an exclusion mask for regions to be excluded from analysis. +- make_datasets(data_store, obs_ids, exclusion_mask): Generates datasets for the given observations and exclusion mask. +- estimate_lightcurve(datasets): Estimates the light curve for the given datasets using a predefined spectral model. +- plot_lightcurve(lightcurve): Plots the estimated light curve. +- main(): Main function to parse arguments, process data, and generate the light curve. + +Usage: +Run the script with the `--runlist` argument pointing to a file containing observation IDs. +""" +import argparse +import logging +from pathlib import Path + +import astropy.units as u +import matplotlib.pyplot as plt +import numpy as np +from astropy.coordinates import SkyCoord +from gammapy.data import DataStore +from gammapy.datasets import Datasets, SpectrumDataset +from gammapy.estimators import LightCurveEstimator +from gammapy.makers import ( + ReflectedRegionsBackgroundMaker, + SafeMaskMaker, + SpectrumDatasetMaker, +) +from gammapy.maps import MapAxis, RegionGeom, WcsGeom +from gammapy.modeling.models import Models, PowerLawSpectralModel, SkyModel +from regions import CircleSkyRegion + + +def read_obs_ids(runlist_path): + with open(runlist_path) as f: + obs_ids = [int(line.strip()) for line in f if line.strip()] + return obs_ids + +def make_exclusion_mask(): + axis = MapAxis.from_edges( + np.logspace(-1.0, 1.0, 10), unit="TeV", name="energy", interp="log" + ) + geom = WcsGeom.create( + skydir=(83.6333, 22.0145), width=(4, 4), binsz=0.01, frame="icrs", axes=[axis] + ) + + target_position = SkyCoord(ra=83.6333, dec=22.0145, unit="deg", frame="icrs") + on_region_radius = 0.3 * u.deg + on_region = CircleSkyRegion(center=target_position, radius=on_region_radius) + + exclusions = [ + CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=0.3 * u.deg) + for ra, dec in [ + (81.9087, 21.937), (82.6806, 22.4623), (83.4118, 20.4742), + (84.1099, 21.9931), (84.4112, 21.1425), (84.8629, 21.7629), + (85.4782, 23.3262), (85.5166, 22.6603) + ] + ] + + regions = [on_region] + exclusions + exclusion_mask = ~geom.to_image().region_mask(regions) + + plt.figure() + exclusion_mask.plot() + return exclusion_mask + +def make_datasets(data_store, obs_ids, exclusion_mask): + observations = data_store.get_observations(obs_ids, required_irf="point-like") + + energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "10 TeV", nbin=15) + energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "20 TeV", nbin=40, name="energy_true") + + geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.11)", axes=[energy_axis]) + dataset_empty = SpectrumDataset.create(geom=geom, energy_axis_true=energy_axis_true, name="crab") + + wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02) + exclusion_mask = wcsgeom.region_mask(geom.region, inside=False) + + maker = SpectrumDatasetMaker() + safe_mask_maker = SafeMaskMaker(methods=["aeff-max"], aeff_percent=10) + bkg_maker = ReflectedRegionsBackgroundMaker(exclusion_mask=exclusion_mask) + + datasets = Datasets() + for obs in observations: + dataset = maker.run(dataset_empty.copy(), obs) + dataset = safe_mask_maker.run(dataset, obs) + dataset_on_off = bkg_maker.run(dataset, obs) + datasets.append(dataset) + + return datasets + +def estimate_lightcurve(datasets): + target_position = SkyCoord(ra=83.63308, dec=22.01450, unit="deg") + spectral_model = PowerLawSpectralModel( + index=2.5, + amplitude=3.5e-11 * u.Unit("1 / (cm2 s TeV)"), + reference=1 * u.TeV, + ) + sky_model = SkyModel(spectral_model=spectral_model, name="crab") + datasets.models = Models([sky_model]) + + lc_maker = LightCurveEstimator(energy_edges=[1, 10] * u.TeV, reoptimize=False) + return lc_maker.run(datasets) + +def plot_lightcurve(args, lightcurve): + fig, ax = plt.subplots( + figsize=(8, 6), + gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98}, + ) + lightcurve.plot(ax=ax, marker="o", label="1D") + plt.legend() + runlist_name = Path(args.runlist).stem + output_path = Path(f"./validation_plots/lightcurve_plot_{runlist_name}.png") + fig.savefig(output_path) + logging.info(f"Lightcurve plot saved to {output_path}") + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--runlist", required=True, help="Path to runlist file") + args = parser.parse_args() + + obs_ids = read_obs_ids(args.runlist) + data_store = DataStore.from_file(filename="./FITS/hdu-index.fits.gz") + + exclusion_mask = make_exclusion_mask() + datasets = make_datasets(data_store, obs_ids, exclusion_mask) + lightcurve = estimate_lightcurve(datasets) + plot_lightcurve(args, lightcurve) + +if __name__ == "__main__": + main() + From 4c9dbe776ed098d8b05bab3e5555c7d3aa48fcd7 Mon Sep 17 00:00:00 2001 From: Eshita Joshi Date: Wed, 21 May 2025 09:54:50 +0200 Subject: [PATCH 2/4] autoplotting lightcurves for all runlists --- .../gammapy/crab_lightcurve_validation.py | 13 ++--- .../gammapy/runlist_lightcurve_runner.py | 50 +++++++++++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 release_tests/gammapy/runlist_lightcurve_runner.py diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py index 7d2443f..d748ae1 100644 --- a/release_tests/gammapy/crab_lightcurve_validation.py +++ b/release_tests/gammapy/crab_lightcurve_validation.py @@ -73,27 +73,28 @@ def make_datasets(data_store, obs_ids, exclusion_mask): energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "10 TeV", nbin=15) energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "20 TeV", nbin=40, name="energy_true") - geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.11)", axes=[energy_axis]) + geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.08944272)", axes=[energy_axis]) dataset_empty = SpectrumDataset.create(geom=geom, energy_axis_true=energy_axis_true, name="crab") wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02) exclusion_mask = wcsgeom.region_mask(geom.region, inside=False) - maker = SpectrumDatasetMaker() + maker = SpectrumDatasetMaker( + containment_correction=False, selection=["counts", "exposure", "edisp"] + ) safe_mask_maker = SafeMaskMaker(methods=["aeff-max"], aeff_percent=10) bkg_maker = ReflectedRegionsBackgroundMaker(exclusion_mask=exclusion_mask) datasets = Datasets() for obs in observations: - dataset = maker.run(dataset_empty.copy(), obs) - dataset = safe_mask_maker.run(dataset, obs) + dataset = maker.run(dataset_empty.copy(name=f'{obs.obs_id}'), obs) dataset_on_off = bkg_maker.run(dataset, obs) - datasets.append(dataset) + dataset_on_off = safe_mask_maker.run(dataset, obs) + datasets.append(dataset_on_off) return datasets def estimate_lightcurve(datasets): - target_position = SkyCoord(ra=83.63308, dec=22.01450, unit="deg") spectral_model = PowerLawSpectralModel( index=2.5, amplitude=3.5e-11 * u.Unit("1 / (cm2 s TeV)"), diff --git a/release_tests/gammapy/runlist_lightcurve_runner.py b/release_tests/gammapy/runlist_lightcurve_runner.py new file mode 100644 index 0000000..ed22a02 --- /dev/null +++ b/release_tests/gammapy/runlist_lightcurve_runner.py @@ -0,0 +1,50 @@ +import os +import re +import sys +from pathlib import Path +from gammapy.data import DataStore +import crab_lightcurve_validation + + +# Function to read all run IDs from a runlist file +def read_runs_from_file(file_path): + with open(file_path) as f: + return [int(line.strip()) for line in f if line.strip()] + +# Directory containing runlists +download_dir = Path("/EventDisplay_Release_v491/Crab/runlists") +all_files = os.listdir(download_dir) +runlist_files = [f for f in all_files if re.match(r'runlist_releaseTesting.*\.dat', f)] + +print(f"Found {len(runlist_files)} runlist files.") + + +# Collect all unique run IDs +all_runs = set() +for runlist in runlist_files: + local_path = download_dir / runlist + runs = read_runs_from_file(local_path) + all_runs.update(runs) + +print(f"Found {len(all_runs)} unique runs across all runlists.") + +# Create a combined runlist file +combined_runlist_path = Path("./combined_runlist.dat") +with open(combined_runlist_path, "w") as f: + for run in sorted(all_runs): + f.write(f"{run}\n") + +print(f"Created combined runlist with {len(all_runs)} runs at {combined_runlist_path}") + + +combined_runlist_path = Path("./combined_runlist.dat") + +for runlist in runlist_files: + local_path = download_dir / runlist + print(f"Processing {runlist}") + sys.argv = [ + "crab_lightcurve_validation.py", + "--runlist", + str(local_path) + ] + crab_lightcurve_validation.main() \ No newline at end of file From 559189476944d8b8458412780835215c1ec65b13 Mon Sep 17 00:00:00 2001 From: Eshita Joshi Date: Tue, 27 May 2025 11:38:28 +0200 Subject: [PATCH 3/4] added comparative plots of relative differences --- .../gammapy/crab_lightcurve_validation.py | 612 +++++++++++++++++- .../gammapy/runlist_lightcurve_runner.py | 127 +++- 2 files changed, 683 insertions(+), 56 deletions(-) diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py index d748ae1..2cc8f49 100644 --- a/release_tests/gammapy/crab_lightcurve_validation.py +++ b/release_tests/gammapy/crab_lightcurve_validation.py @@ -1,26 +1,79 @@ - """ This script performs light curve estimation for Crab data using Gammapy v1.3. Functions: - read_obs_ids(runlist_path): Reads obs IDs from a runlist file. - make_exclusion_mask(): Creates an exclusion mask for regions to be excluded from analysis. -- make_datasets(data_store, obs_ids, exclusion_mask): Generates datasets for the given observations and exclusion mask. -- estimate_lightcurve(datasets): Estimates the light curve for the given datasets using a predefined spectral model. +- make_datasets(data_store, obs_ids, exclusion_mask): Generates datasets for the + analysis. +- main(args=None): Main function to parse arguments and run the workflow. + gc.collect() + parser = argparse.ArgumentParser() + parser.add_argument("--runlist", required=True, help="Path to runlist file") + parser.add_argument( + "--datastore", + def stats_text.append( + f"{knn_val}:\n" + f" Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, σ: {stats['std']:.2f}%" + )="./hdu-index.fits.gz", + help="Path to the datastore index file", + ) + parser.add_argument( + "--only-relative", + action="store_true", + help="Only create relative difference plot", + ) + parser.add_argument( + "--no-relative", + action="store_true", + help="Skip creating relative difference plot", + ) + + # Parse either command line arguments or the passed arguments + if args is None: + parsed_args = parser.parse_args() + else: + parsed_args = parser.parse_args(args) + + obs_ids = read_obs_ids(parsed_args.runlist) + data_store = DataStore.from_file(filename=parsed_args.datastore) + + exclusion_mask = make_exclusion_mask() + datasets = make_datasets(data_store, obs_ids, exclusion_mask) + lightcurve = estimate_lightcurve(datasets) + plot_lightcurve(parsed_args, lightcurve) + + # Generate relative difference plot unless explicitly disabled + if not hasattr(parsed_args, "no_relative") or not parsed_args.no_relative: + plot_relative_difference(parsed_args, lightcurve) + + gc.collect() and exclusion mask. +- estimate_lightcurve(datasets): Estimates the light curve for the given datasets + using a predefined spectral model. - plot_lightcurve(lightcurve): Plots the estimated light curve. +- plot_relative_difference(args, lightcurve): Plots the relative difference between + Gammapy and Anasum values. - main(): Main function to parse arguments, process data, and generate the light curve. Usage: Run the script with the `--runlist` argument pointing to a file containing observation IDs. """ + import argparse +import datetime +import gc import logging +import re +import traceback from pathlib import Path import astropy.units as u +import matplotlib.dates as mdates import matplotlib.pyplot as plt import numpy as np +import pandas as pd from astropy.coordinates import SkyCoord +from astropy.time import Time from gammapy.data import DataStore from gammapy.datasets import Datasets, SpectrumDataset from gammapy.estimators import LightCurveEstimator @@ -33,16 +86,19 @@ from gammapy.modeling.models import Models, PowerLawSpectralModel, SkyModel from regions import CircleSkyRegion +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +DATA_DIR = "/home/eshita/Documents/VERITAS/" + def read_obs_ids(runlist_path): with open(runlist_path) as f: obs_ids = [int(line.strip()) for line in f if line.strip()] return obs_ids + def make_exclusion_mask(): - axis = MapAxis.from_edges( - np.logspace(-1.0, 1.0, 10), unit="TeV", name="energy", interp="log" - ) + axis = MapAxis.from_edges(np.logspace(-1.0, 1.0, 10), unit="TeV", name="energy", interp="log") geom = WcsGeom.create( skydir=(83.6333, 22.0145), width=(4, 4), binsz=0.01, frame="icrs", axes=[axis] ) @@ -54,46 +110,56 @@ def make_exclusion_mask(): exclusions = [ CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=0.3 * u.deg) for ra, dec in [ - (81.9087, 21.937), (82.6806, 22.4623), (83.4118, 20.4742), - (84.1099, 21.9931), (84.4112, 21.1425), (84.8629, 21.7629), - (85.4782, 23.3262), (85.5166, 22.6603) + (81.9087, 21.937), + (82.6806, 22.4623), + (83.4118, 20.4742), + (84.1099, 21.9931), + (84.4112, 21.1425), + (84.8629, 21.7629), + (85.4782, 23.3262), + (85.5166, 22.6603), ] ] regions = [on_region] + exclusions exclusion_mask = ~geom.to_image().region_mask(regions) - plt.figure() + fig = plt.figure() exclusion_mask.plot() + plt.close(fig) return exclusion_mask + def make_datasets(data_store, obs_ids, exclusion_mask): observations = data_store.get_observations(obs_ids, required_irf="point-like") - energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "10 TeV", nbin=15) - energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "20 TeV", nbin=40, name="energy_true") + energy_axis = MapAxis.from_energy_bounds("0.5 TeV", "50 TeV", nbin=25) + energy_axis_true = MapAxis.from_energy_bounds("0.3 TeV", "70 TeV", nbin=50, name="energy_true") geom = RegionGeom.create(region="icrs;circle(83.63,22.01,0.08944272)", axes=[energy_axis]) - dataset_empty = SpectrumDataset.create(geom=geom, energy_axis_true=energy_axis_true, name="crab") + dataset_empty = SpectrumDataset.create( + geom=geom, energy_axis_true=energy_axis_true, name="crab" + ) wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02) exclusion_mask = wcsgeom.region_mask(geom.region, inside=False) maker = SpectrumDatasetMaker( - containment_correction=False, selection=["counts", "exposure", "edisp"] + containment_correction=False, selection=["counts", "exposure", "edisp"] ) safe_mask_maker = SafeMaskMaker(methods=["aeff-max"], aeff_percent=10) bkg_maker = ReflectedRegionsBackgroundMaker(exclusion_mask=exclusion_mask) datasets = Datasets() for obs in observations: - dataset = maker.run(dataset_empty.copy(name=f'{obs.obs_id}'), obs) + dataset = maker.run(dataset_empty.copy(name=f"{obs.obs_id}"), obs) dataset_on_off = bkg_maker.run(dataset, obs) dataset_on_off = safe_mask_maker.run(dataset, obs) datasets.append(dataset_on_off) return datasets + def estimate_lightcurve(datasets): spectral_model = PowerLawSpectralModel( index=2.5, @@ -103,34 +169,528 @@ def estimate_lightcurve(datasets): sky_model = SkyModel(spectral_model=spectral_model, name="crab") datasets.models = Models([sky_model]) - lc_maker = LightCurveEstimator(energy_edges=[1, 10] * u.TeV, reoptimize=False) - return lc_maker.run(datasets) + lc_maker = LightCurveEstimator(energy_edges=[0.5, 50] * u.TeV, reoptimize=False) + lightcurve = lc_maker.run(datasets) + + return lightcurve + def plot_lightcurve(args, lightcurve): + # Create output path + runlist_name = Path(args.runlist).stem + output_dir = Path("./validation_plots/knn05_MinMaxScaler") + output_dir.mkdir(exist_ok=True, parents=True) + output_path = output_dir / f"lightcurve_plot_{runlist_name}.png" + + # Check if the plot already exists + if output_path.exists(): + logging.info(f"Lightcurve plot already exists at {output_path}, skipping generation") + return + + plt.figure(figsize=(10, 6)) + fig, ax = plt.subplots( figsize=(8, 6), gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98}, ) - lightcurve.plot(ax=ax, marker="o", label="1D") - plt.legend() + ax.set_ylim(1e-12, 1e-9) + lightcurve.plot(ax=ax, sed_type="flux", marker="o", label="Gammapy") + + ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) + ax.xaxis.set_major_locator(mdates.AutoDateLocator()) + fig.autofmt_xdate() + runlist_name = Path(args.runlist).stem - output_path = Path(f"./validation_plots/lightcurve_plot_{runlist_name}.png") + folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name) + + if folder_match: + folder_name = folder_match.group(1) + # Try to find and load the reference CSV file + base_path = f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel" + csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv") + + if csv_path.exists(): + try: + ref_data = pd.read_csv(csv_path) + + if "MDJ" in ref_data.columns and "Flux" in ref_data.columns: + times = Time(ref_data["MDJ"], format="mjd") + plot_times = [t.plot_date for t in times] + + yerr = None + if "FluxError" in ref_data.columns: + yerr = ref_data["FluxError"] + + ax.errorbar( + plot_times, + ref_data["Flux"], + yerr=yerr, + fmt="s", + color="red", + label="Anasum", + ) + + if "MJD_width" in ref_data.columns: + for i, (mjd, flux, width) in enumerate( + zip(ref_data["MDJ"], ref_data["Flux"], ref_data["MJD_width"]) + ): + left = Time(mjd - width / 2, format="mjd").plot_date + right = Time(mjd + width / 2, format="mjd").plot_date + ax.hlines(flux, left, right, colors="red", lw=1.5) + + logging.info(f"Added reference data from {csv_path}") + else: + logging.warning("CSV file missing required columns (MDJ, Flux)") + except Exception as e: + logging.warning(f"Failed to load or plot reference data: {e}") + + plt.legend() + + output_dir = Path("./validation_plots/knn05_MinMaxScaler") + output_dir.mkdir(exist_ok=True, parents=True) + + output_path = output_dir / f"lightcurve_plot_{runlist_name}.png" fig.savefig(output_path) logging.info(f"Lightcurve plot saved to {output_path}") + plt.close() + plt.clf() + gc.collect() + + +def plot_relative_difference(args, lightcurve): + """Create a plot showing the relative difference between Gammapy and Anasum values.""" + runlist_name = Path(args.runlist).stem + output_dir = Path("./validation_plots/knn05_MinMaxScaler") + output_dir.mkdir(exist_ok=True, parents=True) + output_path = output_dir / f"relative_diff_{runlist_name}.png" + + # Check if the plot already exists + if output_path.exists(): + logger.info( + f"Relative difference plot already exists at {output_path}, skipping generation" + ) + return + + # Continue with existing code... + folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name) + + if not folder_match: + logger.warning(f"Could not extract folder name from {runlist_name}, skipping relative plot") + return + + folder_name = folder_match.group(1) + csv_path = Path( + f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6_{folder_name}/LightCurve.csv" + ) + + if not csv_path.exists(): + logger.warning(f"No reference CSV found at {csv_path}, skipping relative plot") + return + + try: + try: + ref_data = pd.read_csv(csv_path) + if len(ref_data) == 0: + logger.warning("Anasum CSV file is empty, skipping relative plot") + return + except Exception as e: + logger.warning(f"Failed to load Anasum data: {e}") + return + + temp_fig, temp_ax = plt.subplots() + lightcurve.plot(ax=temp_ax, sed_type="flux") + lines = [child for child in temp_ax.get_children() if isinstance(child, plt.Line2D)] + if not lines or len(lines[0].get_xdata()) == 0: + logger.warning("No Gammapy flux data found, skipping relative plot") + plt.close(temp_fig) + return + + gammapy_times_plot = lines[0].get_xdata() + gammapy_flux = lines[0].get_ydata() + + if len(gammapy_times_plot) == 0: + logger.warning("No Gammapy time points found, skipping relative plot") + plt.close(temp_fig) + return + + gammapy_flux_err = lightcurve.to_table(format="lightcurve", sed_type="flux")[ + "flux_err" + ].quantity.value + + # Convert plot x-values (dates) to MJD format for comparison + if isinstance(gammapy_times_plot[0], float): + gammapy_times = np.array([mdates.num2date(t).timestamp() for t in gammapy_times_plot]) + gammapy_times = Time(gammapy_times, format="unix").mjd + elif isinstance(gammapy_times_plot[0], (np.datetime64, datetime.datetime)): + gammapy_times = Time( + [t.isoformat() if hasattr(t, "isoformat") else t for t in gammapy_times_plot] + ).mjd + plt.close(temp_fig) + + ref_data = pd.read_csv(csv_path) + + anasum_times = Time(ref_data["MDJ"], format="mjd") + anasum_flux = ref_data["Flux"] + anasum_flux_err = ref_data.get("FluxError", np.zeros_like(anasum_flux)) + + rel_diffs = [] + rel_diff_errors = [] + matched_times = [] + + total_gammapy_points = len(gammapy_times) + logger.info(f"Total Gammapy points: {total_gammapy_points}") + + for g_time, g_flux, g_err in zip(gammapy_times, gammapy_flux, gammapy_flux_err): + # Find closest Anasum time point within tolerance + g_time_value = g_time.value if hasattr(g_time, "value") else g_time + time_diffs = np.abs(anasum_times.mjd - g_time_value) + closest_idx = np.argmin(time_diffs) + + if float(time_diffs[closest_idx]) < (30 / 1440): # 30 minutes in days + a_flux = anasum_flux.iloc[closest_idx] + + # Skip extreme outliers where flux is too low + if hasattr(g_flux, "to_value"): + g_flux_val = g_flux.to_value(u.Unit("1 / (cm2 s)")) + else: + g_flux_val = float(g_flux) + if g_flux_val < 1e-15 or float(a_flux) < 1e-15: + continue + + a_flux_val = float(a_flux) + rel_diff = (g_flux_val - a_flux_val) / a_flux_val * 100 + + if hasattr(g_err, "to_value"): + g_rel_err = g_err.to_value("") / a_flux_val * 100 + else: + g_rel_err = float(g_err) / a_flux_val * 100 + + # Calculate error on relative difference + g_rel_err = float(g_err) / a_flux_val * 100 + a_rel_err = 0 + if anasum_flux_err is not None and not pd.isna(anasum_flux_err.iloc[closest_idx]): + a_err = float(anasum_flux_err.iloc[closest_idx]) + a_rel_err = a_err / a_flux_val * 100 + + # Combine errors in quadrature + rel_diff_err = np.sqrt(g_rel_err**2 + a_rel_err**2) + + # Only append if finite + if np.isfinite(rel_diff) and np.isfinite(rel_diff_err): + rel_diffs.append(rel_diff) + rel_diff_errors.append(rel_diff_err) + matched_times.append(g_time) -def main(): + if not rel_diffs: + logger.warning("No valid matching time points found for relative difference plot") + return + + fig, ax = plt.subplots( + figsize=(8, 6), + gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98}, + ) + + matched_times = Time(matched_times, format="mjd") + + ax.errorbar( + matched_times.plot_date, + rel_diffs, + yerr=rel_diff_errors, + fmt="o", + color="purple", + label="Relative Difference", + ) + + ax.axhline(y=0, color="k", linestyle="-", alpha=0.3) + + ax.set_ylabel( + r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$" + ) + ax.set_title(f"Relative Flux Difference\n{runlist_name}") + + ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) + ax.xaxis.set_major_locator(mdates.AutoDateLocator()) + fig.autofmt_xdate() + + rel_diffs_array = np.array(rel_diffs) + mean_diff = np.mean(rel_diffs_array) + median_diff = np.median(rel_diffs_array) + std_diff = np.std(rel_diffs_array) + + stats_text = ( + f"Mean: {mean_diff:.2f}%\n" f"Median: {median_diff:.2f}%\n" f"Std Dev: {std_diff:.2f}%" + ) + + ax.text( + 0.05, + 0.95, + stats_text, + transform=ax.transAxes, + fontsize=10, + verticalalignment="top", + bbox=dict(boxstyle="round", facecolor="white", alpha=0.8), + ) + + output_dir = Path("./validation_plots/knn05_MinMaxScaler") + output_dir.mkdir(exist_ok=True, parents=True) + + output_path = output_dir / f"relative_diff_{runlist_name}.png" + fig.savefig(output_path, bbox_inches="tight") + plt.close(fig) + plt.clf() + gc.collect() + logger.info(f"Relative difference plot saved to {output_path}") + + except Exception as e: + logger.error(f"Error creating relative difference plot: {e}", exc_info=True) + + logger.error(traceback.format_exc()) + + +def plot_combined_relative_differences(runlist_path): + """Create a combined plot showing relative differences compared to Anasum.""" + + # Get runlist name and subfolder + runlist_name = Path(runlist_path).stem + folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name) + + if not folder_match: + logger.warning(f"Could not extract folder name from {runlist_name}, skipping combined plot") + return + + folder_name = folder_match.group(1) + base_path = f"{DATA_DIR}EventDisplay_Release_v491/Crab/V6_moderate2tel" + anasum_csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv") + + if not anasum_csv_path.exists(): + logger.warning(f"No reference CSV found at {anasum_csv_path}, skipping combined plot") + return + + # Load Anasum reference data + try: + ref_data = pd.read_csv(anasum_csv_path) + anasum_times = Time(ref_data["MDJ"], format="mjd") + anasum_flux = ref_data["Flux"] + except Exception as e: + logger.warning(f"Failed to load Anasum data: {e}") + return + + fig, ax = plt.subplots( + figsize=(10, 6), + gridspec_kw={"left": 0.14, "bottom": 0.14, "top": 0.92, "right": 0.92}, + ) + + # List of kNN values to process + knn_values = ["knn05_sec", "knn05", "knn05_MinMaxScaler"] + markers = ["o", "s", "^"] + colors = ["purple", "green", "orange"] + + all_stats = {} + + for idx, knn_val in enumerate(knn_values): + try: + datastore_path = Path(f"./FITS/{knn_val}/{folder_name}/hdu-index.fits.gz") + if not datastore_path.exists(): + logger.warning(f"No datastore found at {datastore_path}, skipping {knn_val}") + continue + + obs_ids = read_obs_ids(runlist_path) + data_store = DataStore.from_file(filename=str(datastore_path)) + exclusion_mask = make_exclusion_mask() + datasets = make_datasets(data_store, obs_ids, exclusion_mask) + lightcurve = estimate_lightcurve(datasets) + + flux_points_table = lightcurve.to_table(format="lightcurve", sed_type="flux") + if ( + "flux" not in flux_points_table.colnames + or "flux_err" not in flux_points_table.colnames + ): + logger.error(f"Missing flux or flux_err columns in lightcurve data for {knn_val}") + continue + + gammapy_flux = flux_points_table["flux"].quantity.value + gammapy_flux_err = flux_points_table["flux_err"].quantity.value + + time_col = flux_points_table["time_min"] + if hasattr(time_col, "mjd"): + # Already a Time object or Quantity with .mjd + gammapy_times = time_col.mjd + elif isinstance(time_col[0], (float, np.floating, int)): + # Already MJD floats + gammapy_times = time_col + else: + # Try to convert to Time, then get mjd + gammapy_times = Time(time_col).mjd + + logger.info( + f"Retrieved {len(gammapy_flux)} flux points with proper errors for {knn_val}" + ) + + if len(gammapy_times) == 0: + logger.warning(f"No valid time points found for {knn_val}, skipping") + continue + + rel_diffs = [] + rel_diff_errors = [] + matched_times = [] + + total_gammapy_points = len(gammapy_times) + logger.info(f"Total Gammapy points for {knn_val}: {total_gammapy_points}") + + for g_time, g_flux, g_err in zip(gammapy_times, gammapy_flux, gammapy_flux_err): + g_time_value = g_time.value if hasattr(g_time, "value") else g_time + time_diffs = np.abs(anasum_times.mjd - g_time_value) + closest_idx = np.argmin(time_diffs) + + if float(time_diffs[closest_idx]) < (30 / 1440): # 30 minutes in days + a_flux = anasum_flux.iloc[closest_idx] + + # Skip points with very low flux + if hasattr(g_flux, "to_value"): + g_flux_val = g_flux.to_value(u.Unit("1 / (cm2 s)")) + else: + g_flux_val = float(g_flux) + if g_flux_val < 1e-15 or float(a_flux) < 1e-15: + continue + + try: + g_flux_val = float(g_flux) + a_flux_val = float(a_flux) + + rel_diff = (g_flux_val - a_flux_val) / a_flux_val * 100 + + if not np.isfinite(rel_diff): + continue + + g_rel_err = float(g_err) / a_flux_val * 100 + + if np.isfinite(rel_diff) and np.isfinite(g_rel_err): + rel_diffs.append(rel_diff) + rel_diff_errors.append(g_rel_err) + matched_times.append(g_time) + + except (ValueError, ZeroDivisionError): + continue + + if not rel_diffs: + logger.warning(f"No valid matching time points found for {knn_val}, skipping") + continue + + # Plot this kNN's relative difference + matched_times = Time(matched_times, format="mjd") + ax.errorbar( + matched_times.plot_date, + rel_diffs, + yerr=rel_diff_errors, + fmt=markers[idx], + color=colors[idx], + label=f"{knn_val}", + alpha=0.7, + ) + + # Calculate statistics for this kNN value + rel_diffs_array = np.array(rel_diffs) + all_stats[knn_val] = { + "mean": np.mean(rel_diffs_array), + "median": np.median(rel_diffs_array), + "std": np.std(rel_diffs_array), + } + + except Exception: + logger.error(f"Error processing {knn_val}") + continue + + # If we didn't plot anything, return + if not all_stats: + logger.warning("No data to plot for any kNN value, skipping combined plot") + plt.close(fig) + return + + ax.axhline(y=0, color="k", linestyle="-", alpha=0.3) + + ax.set_ylabel(r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$") + + ax.set_title(f"Relative Flux Difference\n{runlist_name}") + ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) + ax.xaxis.set_major_locator(mdates.AutoDateLocator()) + fig.autofmt_xdate() + handles, labels = ax.get_legend_handles_labels() + + stats_text = [] + for knn_val in all_stats: + stats = all_stats[knn_val] + stats_text.append( + f"{knn_val}:\n" + f" Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, c3: {stats['std']:.2f}%" + ) + + ax.legend(handles, labels) + + stat_box = "\n".join(stats_text) + ax.text( + 0.02, + 0.02, + stat_box, + transform=ax.transAxes, + fontsize=9, + verticalalignment="bottom", + horizontalalignment="left", + bbox=dict(boxstyle="round", facecolor="white", alpha=0.8), + ) + + # Save the combined plot + output_dir = Path("./validation_plots/combined") + output_dir.mkdir(exist_ok=True, parents=True) + output_path = output_dir / f"combined_relative_diff_{runlist_name}.png" + fig.savefig(output_path, bbox_inches="tight") + plt.close(fig) + + logger.info(f"Combined relative difference plot saved to {output_path}") + + +def main(args=None): + gc.collect() parser = argparse.ArgumentParser() parser.add_argument("--runlist", required=True, help="Path to runlist file") - args = parser.parse_args() + parser.add_argument( + "--datastore", + default="./hdu-index.fits.gz", + help="Path to the datastore index file", + ) + parser.add_argument( + "--only-relative", + action="store_true", + help="Only create relative difference plot", + ) + parser.add_argument( + "--no-relative", + action="store_true", + help="Skip creating relative difference plot", + ) - obs_ids = read_obs_ids(args.runlist) - data_store = DataStore.from_file(filename="./FITS/hdu-index.fits.gz") + if args is None: + parsed_args = parser.parse_args() + else: + parsed_args = parser.parse_args(args) + + obs_ids = read_obs_ids(parsed_args.runlist) + data_store = DataStore.from_file(filename=parsed_args.datastore) exclusion_mask = make_exclusion_mask() datasets = make_datasets(data_store, obs_ids, exclusion_mask) lightcurve = estimate_lightcurve(datasets) - plot_lightcurve(args, lightcurve) + + # Only create the plots specified by the arguments + if not hasattr(parsed_args, "only_relative") or not parsed_args.only_relative: + plot_lightcurve(parsed_args, lightcurve) + + if (not hasattr(parsed_args, "no_relative") or not parsed_args.no_relative) and ( + not hasattr(parsed_args, "only_relative") or parsed_args.only_relative + ): + plot_relative_difference(parsed_args, lightcurve) + + gc.collect() + if __name__ == "__main__": main() - diff --git a/release_tests/gammapy/runlist_lightcurve_runner.py b/release_tests/gammapy/runlist_lightcurve_runner.py index ed22a02..35ef624 100644 --- a/release_tests/gammapy/runlist_lightcurve_runner.py +++ b/release_tests/gammapy/runlist_lightcurve_runner.py @@ -1,9 +1,56 @@ +import gc +import logging +import multiprocessing as mp import os import re -import sys +from functools import partial from pathlib import Path -from gammapy.data import DataStore + import crab_lightcurve_validation +from gammapy.data import DataStore + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(processName)s - %(message)s") +logger = logging.getLogger(__name__) + +DATA_DIR = "/home/eshita/Documents/VERITAS/" + + +def generate_index_files(runlist_path, runlist_name): + path = Path(DATA_DIR + "Data/kNN/v2dl3_moderate2tel_point-like_knn05_MinMaxScaler") + + # Extract the runlist subfolder name from the runlist filename + folder_match = re.search(r"runlist_releaseTestingV6_(.*)", Path(runlist_name).stem) + if folder_match: + subfolder = folder_match.group(1) + else: + subfolder = Path(runlist_name).stem # Use full name if pattern doesn't match + + # Read the run numbers from a file + with open(runlist_path, "r") as f: + selected_runs = { + int(line.strip()) for line in f if line.strip().isdigit() + } # Convert to a set of integers + # Find all FITS files and filter by run number + paths = [ + p + for p in path.rglob("*.fits.gz") + if any(f"{run}." in p.name for run in selected_runs) # Match run number in filename + ] + + if not paths: + raise ValueError(f"No matching FITS files found for {runlist_name}") + + data_store = DataStore.from_events_files(paths) + + # Create output directory + output_dir = Path(f"./FITS/knn05_MinMaxScaler/{subfolder}") + output_dir.mkdir(parents=True, exist_ok=True) + + # Write the index files + data_store.hdu_table.write(output_dir / "hdu-index.fits.gz", overwrite=True) + data_store.obs_table.write(output_dir / "obs-index.fits.gz", overwrite=True) + + return output_dir # Function to read all run IDs from a runlist file @@ -11,40 +58,60 @@ def read_runs_from_file(file_path): with open(file_path) as f: return [int(line.strip()) for line in f if line.strip()] -# Directory containing runlists -download_dir = Path("/EventDisplay_Release_v491/Crab/runlists") -all_files = os.listdir(download_dir) -runlist_files = [f for f in all_files if re.match(r'runlist_releaseTesting.*\.dat', f)] -print(f"Found {len(runlist_files)} runlist files.") +def process_runlist(download_dir, runlist): + try: + local_path = download_dir / runlist + logger.info(f"Starting to process {runlist}") + # Check if combined plot already exists + runlist_name = Path(runlist).stem + combined_plot_path = Path( + f"./validation_plots/combined/combined_relative_diff_{runlist_name}.png" + ) -# Collect all unique run IDs -all_runs = set() -for runlist in runlist_files: - local_path = download_dir / runlist - runs = read_runs_from_file(local_path) - all_runs.update(runs) + if combined_plot_path.exists(): + logger.info(f"Combined plot already exists for {runlist}, skipping processing") + return True -print(f"Found {len(all_runs)} unique runs across all runlists.") + # Generate index files first if they don't exist + logger.info(f"Generating index files for {runlist} if needed...") + output_dir = generate_index_files(local_path, runlist) + datastore_path = output_dir / "hdu-index.fits.gz" -# Create a combined runlist file -combined_runlist_path = Path("./combined_runlist.dat") -with open(combined_runlist_path, "w") as f: - for run in sorted(all_runs): - f.write(f"{run}\n") + logger.info(f"Creating plots for {runlist}") + crab_lightcurve_validation.main( + ["--runlist", str(local_path), "--datastore", str(datastore_path), "--no-relative"] + ) + crab_lightcurve_validation.plot_combined_relative_differences(str(local_path)) -print(f"Created combined runlist with {len(all_runs)} runs at {combined_runlist_path}") + logger.info(f"Successfully completed processing {runlist}") + return True + except Exception as e: + logger.error(f"Error processing {runlist}: {e}") + return False -combined_runlist_path = Path("./combined_runlist.dat") +if __name__ == "__main__": + # Directory containing runlists + download_dir = Path(DATA_DIR + "EventDisplay_Release_v491/Crab/runlists") + all_files = os.listdir(download_dir) + runlist_files = [f for f in all_files if re.match(r"runlist_releaseTestingV6_..*\.dat", f)] -for runlist in runlist_files: - local_path = download_dir / runlist - print(f"Processing {runlist}") - sys.argv = [ - "crab_lightcurve_validation.py", - "--runlist", - str(local_path) - ] - crab_lightcurve_validation.main() \ No newline at end of file + print(f"Found {len(runlist_files)} runlist files") + + # Number of parallel processes to use + num_processes = max(1, mp.cpu_count() - 1) + logger.info(f"Using {num_processes} parallel processes") + + # Create a partial function with the download_dir argument fixed + process_func = partial(process_runlist, download_dir) + + # Process runlists in parallel + with mp.Pool(processes=num_processes) as pool: + results = pool.map(process_func, runlist_files) + + # Summary of results + successful = sum(results) + logger.info(f"Processing complete: {successful} successful.") + gc.collect() From 0e7abd3a29ee0cac31c2533f56795e7ebea41a33 Mon Sep 17 00:00:00 2001 From: Eshita Joshi Date: Fri, 13 Jun 2025 11:49:35 +0200 Subject: [PATCH 4/4] plotting high diff runs --- .../gammapy/crab_lightcurve_validation.py | 566 +++++++++++------- 1 file changed, 361 insertions(+), 205 deletions(-) diff --git a/release_tests/gammapy/crab_lightcurve_validation.py b/release_tests/gammapy/crab_lightcurve_validation.py index 2cc8f49..51cf5e4 100644 --- a/release_tests/gammapy/crab_lightcurve_validation.py +++ b/release_tests/gammapy/crab_lightcurve_validation.py @@ -14,7 +14,7 @@ "--datastore", def stats_text.append( f"{knn_val}:\n" - f" Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, σ: {stats['std']:.2f}%" + f" Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, \sigma: {stats['std']:.2f}%" )="./hdu-index.fits.gz", help="Path to the datastore index file", ) @@ -66,7 +66,7 @@ def stats_text.append( import re import traceback from pathlib import Path - +from gammapy.estimators import FluxPointsEstimator, FluxPoints import astropy.units as u import matplotlib.dates as mdates import matplotlib.pyplot as plt @@ -75,7 +75,7 @@ def stats_text.append( from astropy.coordinates import SkyCoord from astropy.time import Time from gammapy.data import DataStore -from gammapy.datasets import Datasets, SpectrumDataset +from gammapy.datasets import Datasets, SpectrumDataset, FluxPointsDataset from gammapy.estimators import LightCurveEstimator from gammapy.makers import ( ReflectedRegionsBackgroundMaker, @@ -104,20 +104,18 @@ def make_exclusion_mask(): ) target_position = SkyCoord(ra=83.6333, dec=22.0145, unit="deg", frame="icrs") - on_region_radius = 0.3 * u.deg + on_region_radius = 0.4 * u.deg on_region = CircleSkyRegion(center=target_position, radius=on_region_radius) exclusions = [ - CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=0.3 * u.deg) - for ra, dec in [ - (81.9087, 21.937), - (82.6806, 22.4623), - (83.4118, 20.4742), - (84.1099, 21.9931), - (84.4112, 21.1425), - (84.8629, 21.7629), - (85.4782, 23.3262), - (85.5166, 22.6603), + CircleSkyRegion(SkyCoord(ra, dec, unit="deg", frame="icrs"), radius=radius * u.deg) + for ra, dec, radius in [ + (81.9087, 21.937, 0.3), + (82.6806, 22.4623, 0.3), + (83.4118, 20.4742, 0.3), + (84.4112, 21.1425, 0.3), + (84.8629, 21.7629, 0.3), + (85.4782, 23.3262, 0.3), ] ] @@ -126,7 +124,11 @@ def make_exclusion_mask(): fig = plt.figure() exclusion_mask.plot() + plt.savefig("/home/eshita/Documents/VERITAS/EventDisplay_ReleaseTests_code/release_tests/validation_plots/exclusion_mask.png") plt.close(fig) + plt.cla() + plt.clf() + gc.collect() return exclusion_mask @@ -141,9 +143,6 @@ def make_datasets(data_store, obs_ids, exclusion_mask): geom=geom, energy_axis_true=energy_axis_true, name="crab" ) - wcsgeom = WcsGeom.create(skydir=geom.center_skydir, width=5, binsz=0.02) - exclusion_mask = wcsgeom.region_mask(geom.region, inside=False) - maker = SpectrumDatasetMaker( containment_correction=False, selection=["counts", "exposure", "edisp"] ) @@ -193,28 +192,47 @@ def plot_lightcurve(args, lightcurve): figsize=(8, 6), gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98}, ) - ax.set_ylim(1e-12, 1e-9) + ax.set_ylim(1e-11, 0.5e-9) lightcurve.plot(ax=ax, sed_type="flux", marker="o", label="Gammapy") + + # Calculate statistics for Gammapy data + flux_table = lightcurve.to_table(format="lightcurve", sed_type="flux") + gammapy_flux = flux_table['flux'].quantity.value + + gammapy_mean = np.nanmean(gammapy_flux) + gammapy_median = np.nanmedian(gammapy_flux) + gammapy_std = np.nanstd(gammapy_flux) + + # Format for scientific notation + gammapy_stats_text = ( + f"Gammapy Stats:\n" + f"Mean: {gammapy_mean:.2e} cm$^{{-2}}$ s$^{{-1}}$\n" + f"Median: {gammapy_median:.2e} cm$^{{-2}}$ s$^{{-1}}$\n" + f"$\sigma$: {gammapy_std:.2e} cm$^{{-2}}$ s$^{{-1}}$" + ) ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) ax.xaxis.set_major_locator(mdates.AutoDateLocator()) fig.autofmt_xdate() runlist_name = Path(args.runlist).stem - folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name) + folder_match = re.search(r"runlist_releaseTestingV6(.*)", runlist_name) - if folder_match: + if True: #folder_match: folder_name = folder_match.group(1) # Try to find and load the reference CSV file - base_path = f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel" - csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv") + #base_path = f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel" + csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv') #Path(f"{base_path}/V6_{folder_name}/LightCurve.csv") if csv_path.exists(): + print() + print(f"Loading reference data from {csv_path}") + print() try: ref_data = pd.read_csv(csv_path) - if "MDJ" in ref_data.columns and "Flux" in ref_data.columns: - times = Time(ref_data["MDJ"], format="mjd") + if "MJD" in ref_data.columns and "Flux" in ref_data.columns: + times = Time(ref_data["MJD"], format="mjd") plot_times = [t.plot_date for t in times] yerr = None @@ -230,9 +248,26 @@ def plot_lightcurve(args, lightcurve): label="Anasum", ) + # Calculate statistics for Anasum data + anasum_flux = ref_data["Flux"].values + anasum_mean = np.mean(anasum_flux) + anasum_median = np.median(anasum_flux) + anasum_std = np.std(anasum_flux) + + # Add Anasum stats to the text + anasum_stats_text = ( + f"Anasum Stats:\n" + f"Mean: {anasum_mean:.2e} cm$^{{-2}}$ s$^{{-1}}$\n" + f"Median: {anasum_median:.2e} cm$^{{-2}}$ s$^{{-1}}$\n" + f"$\sigma$: {anasum_std:.2e} cm$^{{-2}}$ s$^{{-1}}$" + ) + + # Combined stats text + stats_text = f"{gammapy_stats_text}\n\n{anasum_stats_text}" + if "MJD_width" in ref_data.columns: for i, (mjd, flux, width) in enumerate( - zip(ref_data["MDJ"], ref_data["Flux"], ref_data["MJD_width"]) + zip(ref_data["MJD"], ref_data["Flux"], ref_data["MJD_width"]) ): left = Time(mjd - width / 2, format="mjd").plot_date right = Time(mjd + width / 2, format="mjd").plot_date @@ -240,11 +275,26 @@ def plot_lightcurve(args, lightcurve): logging.info(f"Added reference data from {csv_path}") else: - logging.warning("CSV file missing required columns (MDJ, Flux)") + logging.warning("CSV file missing required columns (MJD, Flux)") + stats_text = gammapy_stats_text except Exception as e: logging.warning(f"Failed to load or plot reference data: {e}") + stats_text = gammapy_stats_text + else: + stats_text = gammapy_stats_text + + # Add stats text box + ax.text( + 0.05, + 0.05, + stats_text, + transform=ax.transAxes, + fontsize=9, + verticalalignment="bottom", + bbox=dict(boxstyle="round", facecolor="white", alpha=0.8), + ) - plt.legend() + plt.legend(loc='upper left') output_dir = Path("./validation_plots/knn05_MinMaxScaler") output_dir.mkdir(exist_ok=True, parents=True) @@ -254,9 +304,28 @@ def plot_lightcurve(args, lightcurve): logging.info(f"Lightcurve plot saved to {output_path}") plt.close() plt.clf() + plt.cla() gc.collect() +def get_anasum_runs_with_zenith(log_path): + """ + Parse the anasum log file and return a set of run numbers with specified zenith + """ + runs = set() + pattern = re.compile(r"RUN\s+(\d+).*?at\s+(\d+),") + with open(log_path) as f: + for line in f: + match = pattern.search(line) + if match: + run = int(match.group(1)) + elevation = float(match.group(2)) + zenith = 90 - elevation + if zenith < 55: + runs.add(run) + return runs + + def plot_relative_difference(args, lightcurve): """Create a plot showing the relative difference between Gammapy and Anasum values.""" runlist_name = Path(args.runlist).stem @@ -271,180 +340,122 @@ def plot_relative_difference(args, lightcurve): ) return - # Continue with existing code... - folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name) - - if not folder_match: - logger.warning(f"Could not extract folder name from {runlist_name}, skipping relative plot") + log_path = "/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/anasum_releaseTestingV6.combined.log" + zenith_runs = get_anasum_runs_with_zenith(log_path) + + # Read runs from runlist + with open(args.runlist) as f: + all_run_ids = [int(line.strip()) for line in f if line.strip()] + # Filter to only those with zenith < 55 + filtered_run_ids = [run for run in all_run_ids if run in zenith_runs] + + # Get Gammapy data + flux_table = lightcurve.to_table(format="lightcurve", sed_type="flux") + gammapy_flux = flux_table['flux'].quantity.value + gammapy_flux_err = flux_table['flux_err'].quantity.value + + # Filter Gammapy data by run id + gammapy_run_ids = all_run_ids[:len(gammapy_flux)] if len(gammapy_flux) < len(all_run_ids) else all_run_ids + gammapy_flux_by_run = dict(zip(gammapy_run_ids, gammapy_flux)) + gammapy_flux_err_by_run = dict(zip(gammapy_run_ids, gammapy_flux_err)) + + # Get Anasum data + csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv') + ref_data = pd.read_csv(csv_path) + anasum_flux = ref_data["Flux"].values + anasum_flux_err = ref_data.get("FluxError", np.zeros_like(anasum_flux)) + for col in ref_data.columns: + if col.lower() in ["run", "runid", "obs_id", "obsid"]: + anasum_run_col = col + break + if anasum_run_col is not None: + print('anasum run col not none') + anasum_run_ids = ref_data[anasum_run_col].astype(int).values + anasum_flux_by_run = dict(zip(anasum_run_ids, anasum_flux)) + anasum_flux_err_by_run = dict(zip(anasum_run_ids, anasum_flux_err)) + else: + logger.error("No run number column found in Anasum CSV") return - folder_name = folder_match.group(1) - csv_path = Path( - f"{DATA_DIR}/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6_{folder_name}/LightCurve.csv" - ) - - if not csv_path.exists(): - logger.warning(f"No reference CSV found at {csv_path}, skipping relative plot") + # Now match only filtered runs + matched_gammapy_flux = [] + matched_gammapy_flux_err = [] + matched_anasum_flux = [] + matched_anasum_flux_err = [] + valid_run_ids = [] + for run_id in filtered_run_ids: + if run_id in gammapy_flux_by_run and run_id in anasum_flux_by_run: + matched_gammapy_flux.append(gammapy_flux_by_run[run_id]) + matched_gammapy_flux_err.append(gammapy_flux_err_by_run[run_id]) + matched_anasum_flux.append(anasum_flux_by_run[run_id]) + matched_anasum_flux_err.append(anasum_flux_err_by_run[run_id]) + valid_run_ids.append(run_id) + if len(matched_gammapy_flux) == 0: + logger.error("No matching runs found between Gammapy and Anasum") return + logger.info(f"Comparing {len(matched_gammapy_flux)} runs between Gammapy and Anasum") + gc.collect() + # Calculate relative differences + rel_diffs = [] + rel_diff_errors = [] + for g_flux, g_err, a_flux, a_err in zip(matched_gammapy_flux, matched_gammapy_flux_err, matched_anasum_flux, matched_anasum_flux_err): + if not np.isfinite(g_flux) or not np.isfinite(a_flux): + continue + rel_diff = (g_flux - a_flux) / a_flux * 100 + if a_err > 0: + rel_diff_err = np.sqrt((g_err / a_flux) ** 2 + (a_err * g_flux / (a_flux ** 2)) ** 2) * 100 + else: + rel_diff_err = g_err / a_flux * 100 + if np.isfinite(rel_diff) and np.isfinite(rel_diff_err): + rel_diffs.append(rel_diff) + rel_diff_errors.append(rel_diff_err) + if not rel_diffs: + logger.warning("No valid matching points found for relative difference plot") + return + fig, ax = plt.subplots( + figsize=(8, 6), + gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98}, + ) + rel_diffs = np.array(rel_diffs).flatten() + rel_diff_errors = np.array(rel_diff_errors).flatten() + ax.errorbar( + np.arange(len(rel_diffs)), + rel_diffs, + yerr=rel_diff_errors, + fmt="o", + color="purple", + label="Relative Difference", + ) + ax.axhline(y=0, color="k", linestyle="-", alpha=0.3) + ax.set_ylabel( + r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$" + ) + ax.set_title(f"Relative Flux Difference\n{runlist_name}") + ax.set_xlabel("Index") + fig.autofmt_xdate() + rel_diffs_array = np.array(rel_diffs) + mean_diff = np.mean(rel_diffs_array) + median_diff = np.median(rel_diffs_array) + std_diff = np.std(rel_diffs_array) + stats_text = ( + f"Mean: {mean_diff:.2f}%\n" f"Median: {median_diff:.2f}%\n" f"Std Dev: {std_diff:.2f}%" + ) + ax.text( + 0.05, + 0.95, + stats_text, + transform=ax.transAxes, + fontsize=10, + verticalalignment="top", + bbox=dict(boxstyle="round", facecolor="white", alpha=0.8), + ) + fig.savefig(output_path, bbox_inches="tight") + plt.close(fig) + plt.clf() + gc.collect() + logger.info(f"Relative difference plot saved to {output_path}") + - try: - try: - ref_data = pd.read_csv(csv_path) - if len(ref_data) == 0: - logger.warning("Anasum CSV file is empty, skipping relative plot") - return - except Exception as e: - logger.warning(f"Failed to load Anasum data: {e}") - return - - temp_fig, temp_ax = plt.subplots() - lightcurve.plot(ax=temp_ax, sed_type="flux") - lines = [child for child in temp_ax.get_children() if isinstance(child, plt.Line2D)] - if not lines or len(lines[0].get_xdata()) == 0: - logger.warning("No Gammapy flux data found, skipping relative plot") - plt.close(temp_fig) - return - - gammapy_times_plot = lines[0].get_xdata() - gammapy_flux = lines[0].get_ydata() - - if len(gammapy_times_plot) == 0: - logger.warning("No Gammapy time points found, skipping relative plot") - plt.close(temp_fig) - return - - gammapy_flux_err = lightcurve.to_table(format="lightcurve", sed_type="flux")[ - "flux_err" - ].quantity.value - - # Convert plot x-values (dates) to MJD format for comparison - if isinstance(gammapy_times_plot[0], float): - gammapy_times = np.array([mdates.num2date(t).timestamp() for t in gammapy_times_plot]) - gammapy_times = Time(gammapy_times, format="unix").mjd - elif isinstance(gammapy_times_plot[0], (np.datetime64, datetime.datetime)): - gammapy_times = Time( - [t.isoformat() if hasattr(t, "isoformat") else t for t in gammapy_times_plot] - ).mjd - plt.close(temp_fig) - - ref_data = pd.read_csv(csv_path) - - anasum_times = Time(ref_data["MDJ"], format="mjd") - anasum_flux = ref_data["Flux"] - anasum_flux_err = ref_data.get("FluxError", np.zeros_like(anasum_flux)) - - rel_diffs = [] - rel_diff_errors = [] - matched_times = [] - - total_gammapy_points = len(gammapy_times) - logger.info(f"Total Gammapy points: {total_gammapy_points}") - - for g_time, g_flux, g_err in zip(gammapy_times, gammapy_flux, gammapy_flux_err): - # Find closest Anasum time point within tolerance - g_time_value = g_time.value if hasattr(g_time, "value") else g_time - time_diffs = np.abs(anasum_times.mjd - g_time_value) - closest_idx = np.argmin(time_diffs) - - if float(time_diffs[closest_idx]) < (30 / 1440): # 30 minutes in days - a_flux = anasum_flux.iloc[closest_idx] - - # Skip extreme outliers where flux is too low - if hasattr(g_flux, "to_value"): - g_flux_val = g_flux.to_value(u.Unit("1 / (cm2 s)")) - else: - g_flux_val = float(g_flux) - if g_flux_val < 1e-15 or float(a_flux) < 1e-15: - continue - - a_flux_val = float(a_flux) - rel_diff = (g_flux_val - a_flux_val) / a_flux_val * 100 - - if hasattr(g_err, "to_value"): - g_rel_err = g_err.to_value("") / a_flux_val * 100 - else: - g_rel_err = float(g_err) / a_flux_val * 100 - - # Calculate error on relative difference - g_rel_err = float(g_err) / a_flux_val * 100 - a_rel_err = 0 - if anasum_flux_err is not None and not pd.isna(anasum_flux_err.iloc[closest_idx]): - a_err = float(anasum_flux_err.iloc[closest_idx]) - a_rel_err = a_err / a_flux_val * 100 - - # Combine errors in quadrature - rel_diff_err = np.sqrt(g_rel_err**2 + a_rel_err**2) - - # Only append if finite - if np.isfinite(rel_diff) and np.isfinite(rel_diff_err): - rel_diffs.append(rel_diff) - rel_diff_errors.append(rel_diff_err) - matched_times.append(g_time) - - if not rel_diffs: - logger.warning("No valid matching time points found for relative difference plot") - return - - fig, ax = plt.subplots( - figsize=(8, 6), - gridspec_kw={"left": 0.16, "bottom": 0.2, "top": 0.98, "right": 0.98}, - ) - - matched_times = Time(matched_times, format="mjd") - - ax.errorbar( - matched_times.plot_date, - rel_diffs, - yerr=rel_diff_errors, - fmt="o", - color="purple", - label="Relative Difference", - ) - - ax.axhline(y=0, color="k", linestyle="-", alpha=0.3) - - ax.set_ylabel( - r"$\frac{\mathrm{Gammapy} - \mathrm{Anasum}}{\mathrm{Anasum}} \times 100\,(\%)$" - ) - ax.set_title(f"Relative Flux Difference\n{runlist_name}") - - ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) - ax.xaxis.set_major_locator(mdates.AutoDateLocator()) - fig.autofmt_xdate() - - rel_diffs_array = np.array(rel_diffs) - mean_diff = np.mean(rel_diffs_array) - median_diff = np.median(rel_diffs_array) - std_diff = np.std(rel_diffs_array) - - stats_text = ( - f"Mean: {mean_diff:.2f}%\n" f"Median: {median_diff:.2f}%\n" f"Std Dev: {std_diff:.2f}%" - ) - - ax.text( - 0.05, - 0.95, - stats_text, - transform=ax.transAxes, - fontsize=10, - verticalalignment="top", - bbox=dict(boxstyle="round", facecolor="white", alpha=0.8), - ) - - output_dir = Path("./validation_plots/knn05_MinMaxScaler") - output_dir.mkdir(exist_ok=True, parents=True) - - output_path = output_dir / f"relative_diff_{runlist_name}.png" - fig.savefig(output_path, bbox_inches="tight") - plt.close(fig) - plt.clf() - gc.collect() - logger.info(f"Relative difference plot saved to {output_path}") - - except Exception as e: - logger.error(f"Error creating relative difference plot: {e}", exc_info=True) - - logger.error(traceback.format_exc()) def plot_combined_relative_differences(runlist_path): @@ -452,7 +463,7 @@ def plot_combined_relative_differences(runlist_path): # Get runlist name and subfolder runlist_name = Path(runlist_path).stem - folder_match = re.search(r"runlist_releaseTestingV6_(.*)", runlist_name) + folder_match = re.search(r"runlist_releaseTestingV6(.*)", runlist_name) if not folder_match: logger.warning(f"Could not extract folder name from {runlist_name}, skipping combined plot") @@ -460,7 +471,7 @@ def plot_combined_relative_differences(runlist_path): folder_name = folder_match.group(1) base_path = f"{DATA_DIR}EventDisplay_Release_v491/Crab/V6_moderate2tel" - anasum_csv_path = Path(f"{base_path}/V6_{folder_name}/LightCurve.csv") + anasum_csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv') #Path(f"{base_path}/V6_{folder_name}/LightCurve.csv") if not anasum_csv_path.exists(): logger.warning(f"No reference CSV found at {anasum_csv_path}, skipping combined plot") @@ -469,7 +480,7 @@ def plot_combined_relative_differences(runlist_path): # Load Anasum reference data try: ref_data = pd.read_csv(anasum_csv_path) - anasum_times = Time(ref_data["MDJ"], format="mjd") + anasum_times = Time(ref_data["MJD"], format="mjd") anasum_flux = ref_data["Flux"] except Exception as e: logger.warning(f"Failed to load Anasum data: {e}") @@ -489,13 +500,13 @@ def plot_combined_relative_differences(runlist_path): for idx, knn_val in enumerate(knn_values): try: - datastore_path = Path(f"./FITS/{knn_val}/{folder_name}/hdu-index.fits.gz") + datastore_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_ReleaseTests_code/FITS/knn05_MinMaxScaler/V6/') #Path(f"./FITS/{knn_val}/{folder_name}/hdu-index.fits.gz") if not datastore_path.exists(): logger.warning(f"No datastore found at {datastore_path}, skipping {knn_val}") continue obs_ids = read_obs_ids(runlist_path) - data_store = DataStore.from_file(filename=str(datastore_path)) + data_store = DataStore.from_dir(filename=str(datastore_path)) exclusion_mask = make_exclusion_mask() datasets = make_datasets(data_store, obs_ids, exclusion_mask) lightcurve = estimate_lightcurve(datasets) @@ -542,7 +553,7 @@ def plot_combined_relative_differences(runlist_path): time_diffs = np.abs(anasum_times.mjd - g_time_value) closest_idx = np.argmin(time_diffs) - if float(time_diffs[closest_idx]) < (30 / 1440): # 30 minutes in days + if float(time_diffs[closest_idx]) <= 0.001: a_flux = anasum_flux.iloc[closest_idx] # Skip points with very low flux @@ -553,6 +564,7 @@ def plot_combined_relative_differences(runlist_path): if g_flux_val < 1e-15 or float(a_flux) < 1e-15: continue + try: g_flux_val = float(g_flux) a_flux_val = float(a_flux) @@ -621,7 +633,7 @@ def plot_combined_relative_differences(runlist_path): stats = all_stats[knn_val] stats_text.append( f"{knn_val}:\n" - f" Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, c3: {stats['std']:.2f}%" + f" Mean: {stats['mean']:.2f}%, Med: {stats['median']:.2f}%, $\sigma$: {stats['std']:.2f}%" ) ax.legend(handles, labels) @@ -674,11 +686,15 @@ def main(args=None): parsed_args = parser.parse_args(args) obs_ids = read_obs_ids(parsed_args.runlist) - data_store = DataStore.from_file(filename=parsed_args.datastore) + data_store = DataStore.from_dir(parsed_args.datastore) + exclusion_mask = make_exclusion_mask() datasets = make_datasets(data_store, obs_ids, exclusion_mask) lightcurve = estimate_lightcurve(datasets) + + # Use the correct, index-based function + identify_and_plot_high_diff_runs(parsed_args, lightcurve, threshold=50) # Only create the plots specified by the arguments if not hasattr(parsed_args, "only_relative") or not parsed_args.only_relative: @@ -689,8 +705,148 @@ def main(args=None): ): plot_relative_difference(parsed_args, lightcurve) + #plot_energy_spectra(parsed_args, lightcurve) + gc.collect() if __name__ == "__main__": main() + +def identify_and_plot_high_diff_runs(args, lightcurve, threshold=50): + """ + Identifies indices with relative flux difference > threshold percent (or < -threshold percent) + and creates a plot showing just these indices for both Gammapy and Anasum. + Applies zenith cuts using the anasum log file. + """ + runlist_name = Path(args.runlist).stem + csv_path = Path('/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/LightCurve.csv') + log_path = "/home/eshita/Documents/VERITAS/EventDisplay_Release_v491/Crab/V6_moderate2tel/V6/anasum_releaseTestingV6.combined.log" + if not csv_path.exists(): + logger.warning(f"No reference CSV found at {csv_path}, skipping high diff analysis") + return + try: + zenith_runs = get_anasum_runs_with_zenith(log_path) + + # Read runs from runlist and filter by zenith cut + with open(args.runlist) as f: + all_run_ids = [int(line.strip()) for line in f if line.strip()] + filtered_run_ids = [run for run in all_run_ids if run in zenith_runs] + + # Get Gammapy data + flux_table = lightcurve.to_table(format="lightcurve", sed_type="flux") + gammapy_flux = flux_table['flux'].quantity.value + gammapy_flux_err = flux_table['flux_err'].quantity.value + + # Filter Gammapy data by run id + gammapy_run_ids = all_run_ids[:len(gammapy_flux)] if len(gammapy_flux) < len(all_run_ids) else all_run_ids + gammapy_flux_by_run = dict(zip(gammapy_run_ids, gammapy_flux)) + gammapy_flux_err_by_run = dict(zip(gammapy_run_ids, gammapy_flux_err)) + + # Get Anasum data + anasum_data = pd.read_csv(csv_path) + anasum_flux = anasum_data["Flux"].values + anasum_flux_err = anasum_data.get("FluxError", np.zeros_like(anasum_flux)) + anasum_run_col = None + for col in anasum_data.columns: + if col.lower() in ["run", "runid", "obs_id", "obsid"]: + anasum_run_col = col + break + if anasum_run_col is not None: + anasum_run_ids = anasum_data[anasum_run_col].astype(int).values + anasum_flux_by_run = dict(zip(anasum_run_ids, anasum_flux)) + anasum_flux_err_by_run = dict(zip(anasum_run_ids, anasum_flux_err)) + else: + logger.error("No run number column found in Anasum CSV") + return + + # Now match only filtered runs (zenith < 55 deg) + matched_gammapy_flux = [] + matched_gammapy_flux_err = [] + matched_anasum_flux = [] + matched_anasum_flux_err = [] + valid_run_ids = [] + for run_id in filtered_run_ids: + if run_id in gammapy_flux_by_run and run_id in anasum_flux_by_run: + matched_gammapy_flux.append(gammapy_flux_by_run[run_id]) + matched_gammapy_flux_err.append(gammapy_flux_err_by_run[run_id]) + matched_anasum_flux.append(anasum_flux_by_run[run_id]) + matched_anasum_flux_err.append(anasum_flux_err_by_run[run_id]) + valid_run_ids.append(run_id) + if len(matched_gammapy_flux) == 0: + logger.error("No matching runs with zenith < 55found between Gammapy and Anasum") + return + logger.info(f"Comparing {len(matched_gammapy_flux)} runs with zenith < 55 between Gammapy and Anasum") + + # Calculate relative differences + rel_diffs = np.array([((g - a) / a) * 100 for g, a in zip(matched_gammapy_flux, matched_anasum_flux)]) + high_diff_indices = np.where((rel_diffs > threshold) | (rel_diffs < -threshold))[0] + if len(high_diff_indices) == 0: + logger.info(f"No indices found with >{threshold}% absolute difference") + return + # Save high difference runs to file + output_dir = Path("./validation_plots/high_diff_runs") + output_dir.mkdir(exist_ok=True, parents=True) + high_diff_runlist = output_dir / f"high_diff_runs_{runlist_name}.dat" + with open(high_diff_runlist, 'w') as f: + for idx in high_diff_indices: + f.write(f"{valid_run_ids[idx]}\n") + high_diff_details = output_dir / f"high_diff_details_{runlist_name}.csv" + with open(high_diff_details, 'w') as f: + f.write("Run,RelativeDifferencePercent\n") + for idx in high_diff_indices: + f.write(f"{valid_run_ids[idx]},{float(rel_diffs[idx]):.2f}\n") + logger.info(f"Found {len(high_diff_indices)} indices with >{threshold}% absolute difference") + logger.info(f"High difference runs saved to {high_diff_runlist}") + logger.info(f"Detailed information saved to {high_diff_details}") + # Extract data just for these indices + high_diff_gammapy_flux = [float(matched_gammapy_flux[i]) for i in high_diff_indices] + high_diff_anasum_flux = [float(matched_anasum_flux[i]) for i in high_diff_indices] + high_diff_times = [anasum_data["MJD"].values[anasum_data[anasum_run_col] == valid_run_ids[i]][0] if "MJD" in anasum_data.columns else i for i in high_diff_indices] + # Create the plot for high difference indices + output_path = output_dir / f"high_diff_{threshold}pct_{runlist_name}.png" + fig, ax = plt.subplots(figsize=(10, 6)) + ax.set_yscale('log') + plot_times = Time(high_diff_times, format="mjd").plot_date if "MJD" in anasum_data.columns else high_diff_times + ax.errorbar( + plot_times, + high_diff_gammapy_flux, + fmt='o', + label="Gammapy", + color='blue' + ) + ax.errorbar( + plot_times, + high_diff_anasum_flux, + fmt='s', + label="Anasum", + color='red' + ) + for i, (x, y, run_id, rel_diff) in enumerate(zip( + plot_times, high_diff_gammapy_flux, [valid_run_ids[i] for i in range(len(high_diff_indices))], + [rel_diffs[idx] for idx in high_diff_indices] + )): + ax.annotate( + f"Run {run_id}\n({float(rel_diff):.1f}%)", + (x, y), + xytext=(0, 10), + textcoords='offset points', + ha='center', + fontsize=8 + ) + ax.set_title(f"Indices with >{threshold}% for zenith < 55deg, Flux Difference \n{runlist_name}") + ax.set_xlabel("Date" if "MJD" in anasum_data.columns else "Index") + ax.set_ylabel("Flux (cm$^{-2}$ s$^{-1}$)") + ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d")) + ax.xaxis.set_major_locator(mdates.AutoDateLocator()) + fig.autofmt_xdate() + plt.legend() + plt.tight_layout() + fig.savefig(output_path) + plt.close(fig) + logger.info(f"High difference plot saved to {output_path}") + return [valid_run_ids[i] for i in range(len(high_diff_indices))] + except Exception as e: + logger.error(f"Error identifying high difference indices: {e}", exc_info=True) + logger.error(traceback.format_exc()) + return [] \ No newline at end of file