From 615b06a598346a2a332784223cea6859cf7d5c26 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Thu, 6 Nov 2025 13:43:44 +0000 Subject: [PATCH 01/25] WIP --- examples/metadetect/config.yml | 4 +- examples/metadetect/pipeline.yml | 1 + txpipe/__init__.py | 2 +- txpipe/delta_sigma.py | 257 +++++++++++++++++++++++++++++++ txpipe/lens_selector.py | 3 +- 5 files changed, 263 insertions(+), 4 deletions(-) create mode 100644 txpipe/delta_sigma.py diff --git a/examples/metadetect/config.yml b/examples/metadetect/config.yml index 64b346258..ee741f611 100755 --- a/examples/metadetect/config.yml +++ b/examples/metadetect/config.yml @@ -359,8 +359,8 @@ PZRealizationsPlotLens: photoz_realizations_plot: lens_photoz_realizations_plot TXShearCalibration: - add_fiducial_distance: True - copy_redshift: True + add_fiducial_distance: true + copy_redshift: true redshift_name: '00/redshift_true' TXTwoPointSCIA: diff --git a/examples/metadetect/pipeline.yml b/examples/metadetect/pipeline.yml index 8f2800abb..d56e09404 100644 --- a/examples/metadetect/pipeline.yml +++ b/examples/metadetect/pipeline.yml @@ -120,6 +120,7 @@ stages: - name: HOSFSB # Disabled since not yet synchronised with new Treecorr MPI - name: TXTwoPointSCIA # Self-calibration intrinsic alignments of galaxies + - name: TXDeltaSigma # Disabling these as they takes too long for a quick test. # The latter two need NaMaster diff --git a/txpipe/__init__.py b/txpipe/__init__.py index 39dbd2a35..2166f0ad7 100644 --- a/txpipe/__init__.py +++ b/txpipe/__init__.py @@ -41,7 +41,7 @@ from .simulation import TXLogNormalGlass from .magnification import TXSSIMagnification from .covariance_nmt import TXFourierNamasterCovariance, TXRealNamasterCovariance - +from .delta_sigma import TXDeltaSigma # We no longer import all the extensions automatically here to avoid # some dependency problems when running under the LSST environment on NERSC. # You can still import them explicitly in your pipeline scripts by doing: diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py new file mode 100644 index 000000000..8a2c203aa --- /dev/null +++ b/txpipe/delta_sigma.py @@ -0,0 +1,257 @@ +from .twopoint import TXTwoPoint, TREECORR_CONFIG, SHEAR_POS +from .data_types import SACCFile, ShearCatalog, HDFFile, QPNOfZFile, TextFile +import numpy as np +from ceci.config import StageParameter +import os + + +class TXDeltaSigma(TXTwoPoint): + """Delta Sigma two-point correlation function class. + + This class handles the computation and storage of the Delta Sigma two-point correlation function. + """ + + name = "TXDeltaSigma" + + inputs = [ + ("binned_shear_catalog", ShearCatalog), + ("binned_lens_catalog", HDFFile), + ("binned_random_catalog", HDFFile), + ("shear_photoz_stack", QPNOfZFile), + ("lens_photoz_stack", QPNOfZFile), + ("tracer_metadata", HDFFile), + ] + outputs = [("delta_sigma", SACCFile)] + + config_options = TREECORR_CONFIG | { + "source_bins": StageParameter(list, [-1], msg="List of source bins to use (-1 means all)"), + "lens_bins": StageParameter(list, [-1], msg="List of lens bins to use (-1 means all)"), + "var_method": StageParameter(str, "jackknife", msg="Method for computing variance (jackknife, sample, etc.)"), + "use_randoms": StageParameter(bool, True, msg="Whether to use random catalogs"), + "low_mem": StageParameter(bool, False, msg="Whether to use low memory mode"), + "patch_dir": StageParameter(str, "./cache/delta-sigma-patches", msg="Directory for storing patch files"), + "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk"), + "share_patch_files": StageParameter(bool, False, msg="Whether to share patch files across processes"), + "metric": StageParameter(str, "Euclidean", msg="Distance metric to use (Euclidean, Arc, etc.)"), + "gaussian_sims_factor": StageParameter( + list, + default=[1.0], + msg="Factor by which to decrease lens density to account for increased density contrast.", + ), + "use_subsampled_randoms": StageParameter(bool, True, msg="Use subsampled randoms file for RR calculation"), + "redshift_slice_size": StageParameter(float, 0.01, msg="Size of redshift slices for re-weighting") + } + + + + def read_nbin(self): + """ + For Delta Sigma we are splitting both the source and lens bins into + much smaller slices so that we can re-weight and combine later. We + first use the parent class methods to read the actual number of tomographic + bins, before splitting each of those up into tomographic slices. + + So the total number of source bins will be: + nbin_source_total = nslice * nbin_source_tomography + and similarly for the lens bins. + """ + if self.config["source_bins"] == [-1] and self.config["lens_bins"] == [-1]: + source_list, lens_list = self._read_nbin_from_tomography() + else: + source_list, lens_list = self._read_nbin_from_config() + + zmin = np.inf + zmax = -np.inf + with self.open_input("shear_photoz_stack", wrapper=True) as f: + for i in source_list: + z, Nz = f.get_bin_n_of_z(i) + zmin = min(zmin, z[np.where(Nz > 0)].min()) + zmax = max(zmax, z[np.where(Nz > 0)].max()) + with self.open_input("lens_photoz_stack", wrapper=True) as f: + for i in lens_list: + z, Nz = f.get_bin_n_of_z(i) + zmin = min(zmin, z[np.where(Nz > 0)].min()) + zmax = max(zmax, z[np.where(Nz > 0)].max()) + + # This is the splitting used for the narrow redshift slices + self.zmin = zmin + self.zmax = zmax + self.zbins = np.arange(zmin, zmax + self.config["redshift_slice_size"]*0.1, self.config["redshift_slice_size"]) + self.nslice = len(self.zbins) - 1 + nbin_source_total = self.nslice * len(source_list) + nbin_lens_total = self.nslice * len(lens_list) + print(lens_list, source_list, nbin_lens_total, nbin_source_total) + return np.arange(nbin_source_total), np.arange(nbin_lens_total) + + def index_to_tomograpic_bin_and_slice(self, index): + """ + Given a bin index (for either source or lens), return the + tomographic bin and slice index. + + Parameters + ---------- + index : int + The overall bin index. + nbin : int + The number of tomographic bins (not including slices). + + Returns + ------- + tomo_bin : int + The tomographic bin index. + slice_index : int + The slice index within the tomographic bin. + """ + tomo_bin = index // self.nslice + slice_index = index % self.nslice + return tomo_bin, slice_index + + + def prepare_patches(self, calcs, meta): + # We are going to slightly abuse the parent class prepare_patches + # here. The individual redshift slices are too small to usefully use + # patches, so a "patch" will actually be the full catalog for a given + # slice-bin-kind combination (where kind=lens/source, bin=tomo-bin). + source_bins = set() + lens_bins = set() + for i, j, k in calcs: + assert k == SHEAR_POS, "Delta Sigma only supports shear-position correlations" + source_bins.add(i) + lens_bins.add(j) + + self.split_cat_by_redshift("source") + self.split_cat_by_redshift("lens") + self.split_cat_by_redshift("randoms") + + def get_slice_filename(self, kind, tomo_bin, slice_index): + output_dir = self.config["patch_dir"] + filename = f"{output_dir}/{kind}_{tomo_bin}_slice_{slice_index}.hdf5" + return filename + + def split_cat_by_redshift(self, kind): + import h5py + + # Open the correct file depending on the kind of thing we are splitting + if kind == "source": + input_cat = self.open_input("binned_shear_catalog") + group = input_cat["shear"] + elif kind == "lens": + input_cat = self.open_input("binned_lens_catalog") + group = input_cat["lens"] + elif kind == "randoms": + input_cat = self.open_input("binned_random_catalog") + group = input_cat["randoms"] + else: + raise ValueError(f"Unknown kind {kind} for splitting catalog by redshift") + + nbin = group.attrs["nbin"] + # The maximum bin is the 2D "all" bin. + bins = [i for i in range(nbin) if i in bins_to_use] + print(bins_to_use) + + # Split the tomographic bins by redshift slices + for i in self.split_tasks_by_rank(bins): + bin_name = f"bin_{i}" + subgroup = group[bin_name] + z = subgroup["z"][:] + + # If we are in low memory mode, we read columns on demand + # otherwise we read everything into memory at the start + if self.config["low_mem"]: + get_col = lambda k: subgroup[k][:] + else: + cache = {} + for k in subgroup.keys(): + cache[k] = subgroup[k][:] + get_col = lambda k: cache[k] + + # Loop through the narrow redshift slices + for slice_index in range(self.nslice): + zmin = self.zbins[slice_index] + zmax = self.zbins[slice_index + 1] + + # select the part of the catalog in this redshift slice + mask = (z >= zmin) & (z < zmax) + out_path = self.get_slice_filename(kind, bin_name, slice_index) + with h5py.File(out_path, "w") as f_out: + for key in subgroup.keys(): + data = get_col(key) + f_out.create_dataset(key, data=data[mask]) + + input_cat.close() + + def touch_patches(self, cat): + pass + + def select_calculations(self, source_list, lens_list): + calcs = [] + + # For shear-position we use all pairs + k = SHEAR_POS + for i in source_list: + for j in lens_list: + calcs.append((i, j, k)) + + return calcs + + + def get_shear_catalog(self, i): + import treecorr + tomo_bin, slice_index = self.index_to_tomograpic_bin_and_slice(i) + + filename = self.get_slice_filename("source", tomo_bin, slice_index) + # Load and calibrate the appropriate bin data + cat = treecorr.Catalog( + filename, + g1_col="g1", + g2_col="g2", + ra_col="ra", + dec_col="dec", + w_col="weight", + ra_units="degree", + dec_units="degree", + flip_g1=self.config["flip_g1"], + flip_g2=self.config["flip_g2"], + ) + + return cat + + def get_lens_catalog(self, i): + import treecorr + tomo_bin, slice_index = self.index_to_tomograpic_bin_and_slice(i) + + filename = self.get_slice_filename("lens", tomo_bin, slice_index) + # Load and calibrate the appropriate bin data + cat = treecorr.Catalog( + filename, + ra_col="ra", + dec_col="dec", + w_col="weight", + ra_units="degree", + dec_units="degree", + ) + + return cat + + + def get_random_catalog(self, i): + import treecorr + tomo_bin, slice_index = self.index_to_tomograpic_bin_and_slice(i) + + filename = self.get_slice_filename("randoms", tomo_bin, slice_index) + # Load and calibrate the appropriate bin data + cat = treecorr.Catalog( + filename, + ra_col="ra", + dec_col="dec", + w_col="weight", + ra_units="degree", + dec_units="degree", + ) + + return cat + + + def write_output(self, source_list, lens_list, meta, results): + print("I honestly did not think we'd get this far...") + breakpoint() diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py index 0c264ec92..c6bd0bc68 100755 --- a/txpipe/lens_selector.py +++ b/txpipe/lens_selector.py @@ -608,7 +608,7 @@ def run(self): extra_cols = [c for c in self.config["extra_cols"] if c] # Regular columns. - cols = ["ra", "dec", "weight", "comoving_distance"] + cols = ["ra", "dec", "weight", "comoving_distance", "z"] # Object we use to make the separate lens bins catalog cat_output = self.open_output(self.get_binned_lens_name(), parallel=True) @@ -683,6 +683,7 @@ def add_redshifts(self, iterator): d = np.zeros(len(a)) d[z > 0] = pyccl.comoving_radial_distance(cosmo, a[z > 0]) data["comoving_distance"] = d + data["z"] = z yield s, e, data From 8e804c8353bad529583f54743537eeb431c6c177 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Fri, 7 Nov 2025 10:35:59 +0000 Subject: [PATCH 02/25] Add DeltaSigma stages --- examples/metadetect/pipeline.yml | 1 + txpipe/data_types/types.py | 21 ++ txpipe/delta_sigma.py | 422 +++++++++++++++++-------------- txpipe/twopoint.py | 6 +- txpipe/utils/blank.py | 3 + 5 files changed, 259 insertions(+), 194 deletions(-) create mode 100644 txpipe/utils/blank.py diff --git a/examples/metadetect/pipeline.yml b/examples/metadetect/pipeline.yml index d56e09404..5cf705b0d 100644 --- a/examples/metadetect/pipeline.yml +++ b/examples/metadetect/pipeline.yml @@ -121,6 +121,7 @@ stages: # Disabled since not yet synchronised with new Treecorr MPI - name: TXTwoPointSCIA # Self-calibration intrinsic alignments of galaxies - name: TXDeltaSigma + - name: TXDeltaSigmaPlots # Disabling these as they takes too long for a quick test. # The latter two need NaMaster diff --git a/txpipe/data_types/types.py b/txpipe/data_types/types.py index adf0787b1..2414dfbd4 100755 --- a/txpipe/data_types/types.py +++ b/txpipe/data_types/types.py @@ -468,6 +468,27 @@ def close(self): class FiducialCosmology(YamlFile): + + def to_astropy(self): + from astropy.cosmology import w0waCDM + + with open(self.path, "r") as fp: + params = yaml.load(fp, Loader=yaml.Loader) + + astropy_params = dict( + H0=params["H0"], + Om0=params["Omega_m"], + Ode0=1 - params["Omega_k"] - params["Omega_m"], + w0=params["w0"], + wa=params["wa"], + Neff=params["Neff"], + m_nu=params["sum_nu_masses"] + + ) + + cosmo = w0waCDM(**astropy_params) + + return cosmo # TODO replace when CCL has more complete serialization tools. def to_ccl(self, **kwargs): import pyccl as ccl diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 8a2c203aa..7f7746e45 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -1,14 +1,16 @@ from .twopoint import TXTwoPoint, TREECORR_CONFIG, SHEAR_POS -from .data_types import SACCFile, ShearCatalog, HDFFile, QPNOfZFile, TextFile +from .base_stage import PipelineStage +from .data_types import SACCFile, ShearCatalog, HDFFile, QPNOfZFile, FiducialCosmology, TextFile, PNGFile import numpy as np from ceci.config import StageParameter import os class TXDeltaSigma(TXTwoPoint): - """Delta Sigma two-point correlation function class. + """Compute Delta-Sigma, the excess surface density around lenses. - This class handles the computation and storage of the Delta Sigma two-point correlation function. + This is a subclass of TXTwoPoint that re-weights the lenses by the + inverse critical surface density based on the source redshift distribution. """ name = "TXDeltaSigma" @@ -20,6 +22,8 @@ class TXDeltaSigma(TXTwoPoint): ("shear_photoz_stack", QPNOfZFile), ("lens_photoz_stack", QPNOfZFile), ("tracer_metadata", HDFFile), + ("patch_centers", TextFile), + ("fiducial_cosmology", FiducialCosmology), ] outputs = [("delta_sigma", SACCFile)] @@ -30,7 +34,7 @@ class TXDeltaSigma(TXTwoPoint): "use_randoms": StageParameter(bool, True, msg="Whether to use random catalogs"), "low_mem": StageParameter(bool, False, msg="Whether to use low memory mode"), "patch_dir": StageParameter(str, "./cache/delta-sigma-patches", msg="Directory for storing patch files"), - "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk"), + "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk when making patches"), "share_patch_files": StageParameter(bool, False, msg="Whether to share patch files across processes"), "metric": StageParameter(str, "Euclidean", msg="Distance metric to use (Euclidean, Arc, etc.)"), "gaussian_sims_factor": StageParameter( @@ -38,220 +42,256 @@ class TXDeltaSigma(TXTwoPoint): default=[1.0], msg="Factor by which to decrease lens density to account for increased density contrast.", ), - "use_subsampled_randoms": StageParameter(bool, True, msg="Use subsampled randoms file for RR calculation"), - "redshift_slice_size": StageParameter(float, 0.01, msg="Size of redshift slices for re-weighting") + # "use_subsampled_randoms": StageParameter(bool, False, msg="Use subsampled randoms file for RR calculation"), + "delta_z": StageParameter(float, 0.001, msg="Z bin width for sigma_crit spline computation"), } - - def read_nbin(self): - """ - For Delta Sigma we are splitting both the source and lens bins into - much smaller slices so that we can re-weight and combine later. We - first use the parent class methods to read the actual number of tomographic - bins, before splitting each of those up into tomographic slices. - - So the total number of source bins will be: - nbin_source_total = nslice * nbin_source_tomography - and similarly for the lens bins. - """ - if self.config["source_bins"] == [-1] and self.config["lens_bins"] == [-1]: - source_list, lens_list = self._read_nbin_from_tomography() - else: - source_list, lens_list = self._read_nbin_from_config() - - zmin = np.inf - zmax = -np.inf - with self.open_input("shear_photoz_stack", wrapper=True) as f: - for i in source_list: - z, Nz = f.get_bin_n_of_z(i) - zmin = min(zmin, z[np.where(Nz > 0)].min()) - zmax = max(zmax, z[np.where(Nz > 0)].max()) - with self.open_input("lens_photoz_stack", wrapper=True) as f: - for i in lens_list: - z, Nz = f.get_bin_n_of_z(i) - zmin = min(zmin, z[np.where(Nz > 0)].min()) - zmax = max(zmax, z[np.where(Nz > 0)].max()) - - # This is the splitting used for the narrow redshift slices - self.zmin = zmin - self.zmax = zmax - self.zbins = np.arange(zmin, zmax + self.config["redshift_slice_size"]*0.1, self.config["redshift_slice_size"]) - self.nslice = len(self.zbins) - 1 - nbin_source_total = self.nslice * len(source_list) - nbin_lens_total = self.nslice * len(lens_list) - print(lens_list, source_list, nbin_lens_total, nbin_source_total) - return np.arange(nbin_source_total), np.arange(nbin_lens_total) - - def index_to_tomograpic_bin_and_slice(self, index): - """ - Given a bin index (for either source or lens), return the - tomographic bin and slice index. - - Parameters - ---------- - index : int - The overall bin index. - nbin : int - The number of tomographic bins (not including slices). - - Returns - ------- - tomo_bin : int - The tomographic bin index. - slice_index : int - The slice index within the tomographic bin. - """ - tomo_bin = index // self.nslice - slice_index = index % self.nslice - return tomo_bin, slice_index - - - def prepare_patches(self, calcs, meta): - # We are going to slightly abuse the parent class prepare_patches - # here. The individual redshift slices are too small to usefully use - # patches, so a "patch" will actually be the full catalog for a given - # slice-bin-kind combination (where kind=lens/source, bin=tomo-bin). - source_bins = set() - lens_bins = set() - for i, j, k in calcs: - assert k == SHEAR_POS, "Delta Sigma only supports shear-position correlations" - source_bins.add(i) - lens_bins.add(j) - - self.split_cat_by_redshift("source") - self.split_cat_by_redshift("lens") - self.split_cat_by_redshift("randoms") - - def get_slice_filename(self, kind, tomo_bin, slice_index): - output_dir = self.config["patch_dir"] - filename = f"{output_dir}/{kind}_{tomo_bin}_slice_{slice_index}.hdf5" - return filename - - def split_cat_by_redshift(self, kind): - import h5py - - # Open the correct file depending on the kind of thing we are splitting - if kind == "source": - input_cat = self.open_input("binned_shear_catalog") - group = input_cat["shear"] - elif kind == "lens": - input_cat = self.open_input("binned_lens_catalog") - group = input_cat["lens"] - elif kind == "randoms": - input_cat = self.open_input("binned_random_catalog") - group = input_cat["randoms"] - else: - raise ValueError(f"Unknown kind {kind} for splitting catalog by redshift") - - nbin = group.attrs["nbin"] - # The maximum bin is the 2D "all" bin. - bins = [i for i in range(nbin) if i in bins_to_use] - print(bins_to_use) - - # Split the tomographic bins by redshift slices - for i in self.split_tasks_by_rank(bins): - bin_name = f"bin_{i}" - subgroup = group[bin_name] - z = subgroup["z"][:] - - # If we are in low memory mode, we read columns on demand - # otherwise we read everything into memory at the start - if self.config["low_mem"]: - get_col = lambda k: subgroup[k][:] - else: - cache = {} - for k in subgroup.keys(): - cache[k] = subgroup[k][:] - get_col = lambda k: cache[k] - - # Loop through the narrow redshift slices - for slice_index in range(self.nslice): - zmin = self.zbins[slice_index] - zmax = self.zbins[slice_index + 1] - - # select the part of the catalog in this redshift slice - mask = (z >= zmin) & (z < zmax) - out_path = self.get_slice_filename(kind, bin_name, slice_index) - with h5py.File(out_path, "w") as f_out: - for key in subgroup.keys(): - data = get_col(key) - f_out.create_dataset(key, data=data[mask]) - - input_cat.close() - - def touch_patches(self, cat): - pass - def select_calculations(self, source_list, lens_list): calcs = [] - - # For shear-position we use all pairs + # Fill in these config items that are not relevant for Delta Sigma + # but are needed by the parent class. + self.config["do_shear_pos"] = True + self.config["do_shear_shear"] = False + self.config["do_pos_pos"] = False + self.config["use_subsampled_randoms"] = False + + # For Delta Sigma everything is just shear-position k = SHEAR_POS for i in source_list: for j in lens_list: calcs.append((i, j, k)) - + + if self.rank == 0: + print(f"Running {len(calcs)} calculations: {calcs}") + return calcs + + def read_nbin(self): + source_list, lens_list = super().read_nbin() + # This is a convenient place to also compute the sigma_crit_inverse splines + # that we need later + self.compute_sigma_crit_inverse_splines(source_list) + return source_list, lens_list + + + def compute_sigma_crit_inverse_spline(self, z, nz): + import scipy.interpolate + import scipy.integrate + + # We need a fiducial cosmology to convert redshifts to distances. + # This is why I don't like Delta Sigma. + with self.open_input("fiducial_cosmology", wrapper=True) as f: + cosmo = f.to_ccl() + + # Another option would be to use the QP object directly and use + # its .pdf() method. That's probably better than this. + zs_spline = scipy.interpolate.InterpolatedUnivariateSpline(z, nz, ext="zeros") + zmax = z.max() + 0.1 + dz = self.config["delta_z"] + + # We integrate this over source redshift for each lens redshift + # It's just a call to CCL weighted by the n(z) + def integrand(zl, zs): + w = np.where(zs > zl) + a_l = 1 / (1 + zl) + a_s = 1 / (1 + zs) + out = np.zeros_like(zs) + # https://ccl.readthedocs.io/en/latest/api/pyccl.cosmology.html#pyccl.cosmology.Cosmology.sigma_critical + sigma_crit = cosmo.sigma_critical(a_lens=a_l, a_source=a_s[w]) + n_z = zs_spline(zs[w]) + out[w] = n_z / sigma_crit + return out + + # Set up the grids needed by the integrals + zl_grid = np.arange(0, zmax, dz) + zs_grid = np.arange(0, zmax, dz) + sigma_crit_inv = np.zeros_like(zl_grid) + + # Do the integrals, just trapezoidal rule + for i, zli in enumerate(zl_grid[1:-1]): + integrand_vals = integrand(zli, zs_grid) + sigma_crit_inv[i] = scipy.integrate.trapezoid(integrand_vals, zs_grid) + + # This gives us the mean sigma_crit_inverse for each lens z + spline = scipy.interpolate.UnivariateSpline(zl_grid, sigma_crit_inv) + return spline + + + def compute_sigma_crit_inverse_splines(self, source_list): + """ + For each source bin, compute and store the spline + for sigma_crit_inverse as a function of lens redshift. + """ + from .utils import blank + splines = {} + with self.open_input("shear_photoz_stack", wrapper=True) as f: + for i in source_list: + if i == "all": + z, Nz = f.get_2d_n_of_z() + else: + z, Nz = f.get_bin_n_of_z(i) + if self.rank == 0: + print("Computing sigma_crit_inverse spline for source bin", i) + splines[i] = self.compute_sigma_crit_inverse_spline(z, Nz) + blank.sigma_crit_inverse_splines = splines def get_shear_catalog(self, i): - import treecorr - tomo_bin, slice_index = self.index_to_tomograpic_bin_and_slice(i) + """ + This is a total hack. We want the get_lens_catalog to re-scale + the weights based on which source bin is being used. So we store + the source bin index in self.active_shear_catalog here, so that + get_lens_catalog can access it. - filename = self.get_slice_filename("source", tomo_bin, slice_index) - # Load and calibrate the appropriate bin data - cat = treecorr.Catalog( - filename, - g1_col="g1", - g2_col="g2", - ra_col="ra", - dec_col="dec", - w_col="weight", - ra_units="degree", - dec_units="degree", - flip_g1=self.config["flip_g1"], - flip_g2=self.config["flip_g2"], - ) + This only works because the parent class calls get_shear_catalog + before get_lens_catalog. - return cat + If that does change it should be obvious because self.active_shear_catalog + will be undefined. + """ + self.active_shear_catalog = i + return super().get_shear_catalog(i) + def get_lens_catalog(self, i): import treecorr - tomo_bin, slice_index = self.index_to_tomograpic_bin_and_slice(i) - - filename = self.get_slice_filename("lens", tomo_bin, slice_index) - # Load and calibrate the appropriate bin data + from .utils import blank + treecorr.Catalog.eval_modules.append("txpipe.utils.blank as splines") + + # To get the scaling into Treecorr without re-creating multiple + # copies of all the files we have to pass the redshift into + # another module that treecorr can access and use the "w_eval" + # parameter to tell it to do it. + with self.open_input("binned_lens_catalog") as f: + redshift = f[f"/lens/bin_{i}/z"][:] + blank.redshift = redshift + + # Load the catalog. Note that we are not loading the weight + # from the file, because we want to scale it by 1/sigma_crit + # here. cat = treecorr.Catalog( - filename, + self.get_input("binned_lens_catalog"), + ext=f"/lens/bin_{i}", ra_col="ra", dec_col="dec", w_col="weight", ra_units="degree", dec_units="degree", + # Apply a function that rescales the weights + w_eval=f"weight / splines.sigma_crit_inverse_splines[{self.active_shear_catalog}](splines.redshift)", + patch_centers=self.get_input("patch_centers"), + save_patch_dir=self.get_patch_dir("binned_lens_catalog", i), ) return cat + + def write_output(self, source_list, lens_list, meta, results): + import sacc + with self.open_input("fiducial_cosmology", wrapper=True) as f: + cosmo = f.to_ccl() + # Get the source n(z) which are just passed into the output file + S = sacc.Sacc() + with self.open_input("shear_photoz_stack", wrapper=True) as f: + for i in source_list: + z, Nz = f.get_bin_n_of_z(i) + S.add_tracer("NZ", f"source_{i}", z, Nz) - def get_random_catalog(self, i): - import treecorr - tomo_bin, slice_index = self.index_to_tomograpic_bin_and_slice(i) - - filename = self.get_slice_filename("randoms", tomo_bin, slice_index) - # Load and calibrate the appropriate bin data - cat = treecorr.Catalog( - filename, - ra_col="ra", - dec_col="dec", - w_col="weight", - ra_units="degree", - dec_units="degree", - ) - - return cat - + # Get the lens n(z), from which we also need the mean z + # so that we can turn angles into physical radii. + with self.open_input("lens_photoz_stack", wrapper=True) as f: + # For both source and lens + qp_object = f.read_ensemble() - def write_output(self, source_list, lens_list, meta, results): - print("I honestly did not think we'd get this far...") - breakpoint() + # we skip the "all" bin for now as we are not running it anyway + lens_mean_z = qp_object.mean()[:-1] + for i in lens_list: + z, Nz = f.get_bin_n_of_z(i) + S.add_tracer("NZ", f"lens_{i}", z, Nz) + + # Loop through calculated results and add to SACC + for d in results: + tracer1 = f"source_{d.i}" + tracer2 = f"lens_{d.j}" + + xi = d.object.xi + theta = np.exp(d.object.meanlogr) + # Convert theta to a radius in physical units at the lens redshift + a_lens = 1 / (1 + np.mean(lens_mean_z[d.j])) + r_mpc = cosmo.angular_diameter_distance(a_lens) * np.radians(theta / 60) + + weight = d.object.weight + err = np.sqrt(d.object.varxi) + n = len(xi) + for i in range(n): + S.add_data_point( + "galaxy_shearDensity_deltasigma", + (tracer1, tracer2), + xi[i], + theta=theta[i], + r_mpc=r_mpc[i], + error=err[i], + weight=weight[i], + ) + + # add a few bits of handy metadata + meta["nbin_source"] = len(source_list) + meta["nbin_lens"] = len(lens_list) + self.write_metadata(S, meta) + S.save_fits(self.get_output("delta_sigma"), overwrite=True) + + +class TXDeltaSigmaPlots(PipelineStage): + """Make plots of Delta Sigma results. + + """ + name = "TXDeltaSigmaPlots" + inputs = [ + ("delta_sigma", SACCFile), + ("fiducial_cosmology", FiducialCosmology), + + ] + outputs = [ + ("delta_sigma_plot", PNGFile), + ("delta_sigma_r_plot", PNGFile), +] + config_options = {} + + def run(self): + import sacc + import matplotlib.pyplot as plt + + sacc_data = sacc.Sacc.load_fits(self.get_input("delta_sigma")) + + # Plot in theta coordinates + nbin_source = sacc_data.metadata['nbin_source'] + nbin_lens = sacc_data.metadata['nbin_lens'] + with self.open_output("delta_sigma_plot", wrapper=True, figsize=(5*nbin_lens, 4*nbin_source)) as fig: + axes = fig.file.subplots(nbin_source, nbin_lens, squeeze=False) + for s in range(nbin_source): + for l in range(nbin_lens): + axes[s, l].set_title(f"Source {s}, Lens {l}") + axes[s, l].set_xlabel("Radius [arcmin]") + axes[s, l].set_ylabel(r"$\Delta \Sigma [M_\odot / pc^2]") + axes[s, l].grid() + x = sacc_data.get_tag("theta", tracers=(f"source_{s}", f"lens_{l}")) + y = sacc_data.get_mean(tracers=(f"source_{s}", f"lens_{l}")) + axes[s, l].plot(x, y) + plt.subplots_adjust(hspace=0.3, wspace=0.3) + + # Plot in r coordinates + nbin_source = sacc_data.metadata['nbin_source'] + nbin_lens = sacc_data.metadata['nbin_lens'] + with self.open_output("delta_sigma_r_plot", wrapper=True, figsize=(5*nbin_lens, 4*nbin_source)) as fig: + axes = fig.file.subplots(nbin_source, nbin_lens, squeeze=False) + for s in range(nbin_source): + for l in range(nbin_lens): + axes[s, l].set_title(f"Source {s}, Lens {l}") + axes[s, l].set_xlabel("Radius [Mpc]") + axes[s, l].set_ylabel(r"$R \cdot \Delta \Sigma [M_\odot / pc^2]$") + axes[s, l].grid() + x = sacc_data.get_tag("r_mpc", tracers=(f"source_{s}", f"lens_{l}")) + y = sacc_data.get_mean(tracers=(f"source_{s}", f"lens_{l}")) + axes[s, l].plot(x, y * np.array(x)) + plt.subplots_adjust(hspace=0.3, wspace=0.3) \ No newline at end of file diff --git a/txpipe/twopoint.py b/txpipe/twopoint.py index 7202fc95c..2d5b0b8a4 100755 --- a/txpipe/twopoint.py +++ b/txpipe/twopoint.py @@ -81,7 +81,7 @@ class TXTwoPoint(PipelineStage): "use_randoms": StageParameter(bool, True, msg="Whether to use random catalogs"), "low_mem": StageParameter(bool, False, msg="Whether to use low memory mode"), "patch_dir": StageParameter(str, "./cache/patches", msg="Directory for storing patch files"), - "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk"), + "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk when making patches"), "share_patch_files": StageParameter(bool, False, msg="Whether to share patch files across processes"), "metric": StageParameter(str, "Euclidean", msg="Distance metric to use (Euclidean, Arc, etc.)"), "gaussian_sims_factor": StageParameter( @@ -423,7 +423,7 @@ def write_output(self, source_list, lens_list, meta, results): if self.config["do_shear_pos"] == True: S2.add_tracer("NZ", f"source_{i}", z, Nz) else: - sys.exit("Requesting a measurement that requires source galaxies but no source_list provided") + raise ValueError("Requesting a measurement that requires source galaxies but no source_list provided") if self.config["do_pos_pos"] or self.config["do_shear_pos"]: if lens_list: @@ -435,7 +435,7 @@ def write_output(self, source_list, lens_list, meta, results): if self.config["do_shear_pos"] == True: S2.add_tracer("NZ", f"lens_{i}", z, Nz) else: - sys.exit("Requesting a measurement that requires lens galaxies but no lens_list provided") + raise ValueError("Requesting a measurement that requires lens galaxies but no lens_list provided") # Now build up the collection of data points, adding them all to # the sacc data one by one. diff --git a/txpipe/utils/blank.py b/txpipe/utils/blank.py new file mode 100644 index 000000000..b3281116c --- /dev/null +++ b/txpipe/utils/blank.py @@ -0,0 +1,3 @@ +# This file is intentionally left blank. +# As part of a workaround in TreeCorr we use it as a namespace to +# store things needed to modify the weights in the DeltaSigma calculation. \ No newline at end of file From 4ef186fb7555b306944f548e172ac67731d321e9 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Fri, 7 Nov 2025 10:37:22 +0000 Subject: [PATCH 03/25] add doc --- txpipe/delta_sigma.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 7f7746e45..9fce791e8 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -11,6 +11,11 @@ class TXDeltaSigma(TXTwoPoint): This is a subclass of TXTwoPoint that re-weights the lenses by the inverse critical surface density based on the source redshift distribution. + + The strategy used here is to pre-compute sigma_crit^-1(z_lens) splines + for each source bin, and then use Treecorr's "w_eval" feature to apply + the appropriate weight to each lens based on its redshift when loading + the lens catalog. """ name = "TXDeltaSigma" From f8ab102f9bf4f8c847e9ff8634c7e656428dc67c Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Wed, 29 Apr 2026 16:55:26 -0400 Subject: [PATCH 04/25] desi catalog ingestion and selection stage (initial commit) --- examples/desi_mocks/config.yml | 252 +++++++++++++++++++++++++++++++ examples/desi_mocks/pipeline.yml | 113 ++++++++++++++ txpipe/ingest/desi.py | 144 ++++++++++++++++++ txpipe/lens_selector.py | 140 ++++++++++++++++- 4 files changed, 648 insertions(+), 1 deletion(-) create mode 100644 examples/desi_mocks/config.yml create mode 100644 examples/desi_mocks/pipeline.yml create mode 100644 txpipe/ingest/desi.py diff --git a/examples/desi_mocks/config.yml b/examples/desi_mocks/config.yml new file mode 100644 index 000000000..88e272983 --- /dev/null +++ b/examples/desi_mocks/config.yml @@ -0,0 +1,252 @@ +# Values in this section are accessible to all the different stages. +# They can be overridden by individual stages though. +global: + # This is read by many stages that read complete + # catalog data, and tells them how many rows to read + # at once + chunk_rows: 100000 + # These mapping options are also read by a range of stages + pixelization: healpix + nside: 512 + sparse: true # Generate sparse maps - faster if using small areas + +TXGCRTwoCatalogInput: + metacal_dir: /global/cscratch1/sd/desc/DC2/data/Run2.2i/dpdd/Run2.2i-t3828/metacal_table_summary + photo_dir: /global/cscratch1/sd/desc/DC2/data/Run2.2i/dpdd/Run2.2i-t3828/object_table_summary + +TXMetacalGCRInput: + cat_name: dc2_object_run2.1i_dr1b_with_metacal_griz + +TXExposureInfo: + dc2_name: 1.2p + +TXShearCalibration: {} + +TXCosmoDC2Mock: + cat_name: cosmoDC2_v1.1.4_image + visits_per_band: 16 + extra_cols: redshift_true size_true shear_1 shear_2 + flip_g2: true # to match metacal + +TXIngestRedmagic: + lens_zbin_edges: [0.1, 0.3, 0.5] + +TXIngestDESI: + mock: true + lens_zbin_edges: [0.0, 3.0] + dz: 0.001 + +TXDESISelector: + mock: true + ra_col: ra + dec_col: dec + z_col: redshift + w_col: weight + apply_mask: false + + +PZPDFMLZ: + nz: 301 + zmax: 3.0 + + +PZRailTrainSource: + class_name: BPZ_lite + zmin: 0.0 + zmax: 3.0 + dz: 0.01 + columns_file: ./data/bpz_riz.columns + data_path: ./data/example/rail-bpz-inputs + spectra_file: CWWSB4.list + prior_band: i + # Not sure about this + prior_file: hdfn_gen + p_min: 0.005 + gauss_kernel: 0.0 + mag_err_min: 0.005 + inform_options: {save_train: false, load_model: false, modelfile: BPZpriormodel.out} + madau_reddening: no + bands: riz + zp_errors: [0.01, 0.01, 0.01] + +PZRailEstimateSource: + convert_unseen: true # needed for BPZ + + +# Mock version of stacking: +TXSourceTrueNumberDensity: + nz: 301 + zmax: 3.0 + +# Mock version of stacking: +TXTrueNumberDensity: + nz: 301 + zmax: 3.0 + +TXSourceSelectorMetacal: + input_pz: false + true_z: true + bands: riz #used for selection + T_cut: 0.5 + s2n_cut: 10.0 + max_rows: 1000 + delta_gamma: 0.02 + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + +TXTruthLensSelector: + # Mag cuts + input_pz: false + true_z: true + lens_zbin_edges: [0.1, 0.3, 0.5] + cperp_cut: 0.2 + r_cpar_cut: 13.5 + r_lo_cut: 16.0 + r_hi_cut: 21.6 + i_lo_cut: 17.5 + i_hi_cut: 21.9 + r_i_cut: 2.0 + # may also need one for r_cpar_cut + +TXMeanLensSelector: + # Mag cuts + lens_zbin_edges: [0.0, 0.2, 0.4] + cperp_cut: 0.2 + r_cpar_cut: 13.5 + r_lo_cut: 16.0 + r_hi_cut: 21.6 + i_lo_cut: 17.5 + i_hi_cut: 21.9 + r_i_cut: 2.0 + +TXModeLensSelector: + # Mag cuts + lens_zbin_edges: [0.0, 0.2, 0.4] + cperp_cut: 0.2 + r_cpar_cut: 13.5 + r_lo_cut: 16.0 + r_hi_cut: 21.6 + i_lo_cut: 17.5 + i_hi_cut: 21.9 + r_i_cut: 2.0 + +TXRandomCat: + density: 10 # gals per sq arcmin + +TXTwoPoint: + bin_slop: 0.1 + delta_gamma: 0.02 + do_pos_pos: true + do_shear_shear: true + do_shear_pos: true + flip_g2: true # use true when using metacal shears + min_sep: 2.5 + max_sep: 60.0 + nbins: 10 + verbose: 0 + subtract_mean_shear: true + +TXGammaTBrightStars: {} + +TXGammaTDimStars: {} + +TXGammaTRandoms: {} + +TXGammaTFieldCenters: {} + +TXBlinding: + seed: 1972 ## seed uniquely specifies the shift in parameters + Omega_b: [0.0485, 0.001] ## fiducial_model_value, shift_sigma + Omega_c: [0.2545, 0.01] + w0: [-1.0, 0.1] + h: [0.682, 0.02] + sigma8: [0.801, 0.01] + n_s: [0.971, 0.03] + b0: 0.95 ## we use bias of the form b0/g + delete_unblinded: true + +TXSourceDiagnosticPlots: + nbins: 20 + g_min: -0.01 + g_max: 0.01 + psfT_min: 0.2 + psfT_max: 0.28 + T_min: 0.01 + T_max: 2.1 + s2n_min: 1.25 + s2n_max: 300 + +TXDiagnosticMaps: + sparse: true # Generate sparse maps - faster if using small areas + snr_threshold: 10.0 + snr_delta: 1.0 + # pixelization: gnomonic + pixel_size: 0.2 + ra_cent: 62. + dec_cent: -35. + npix_x: 60 + npix_y: 60 + depth_cut: 23.0 + +TXSourceMaps: + sparse: true # Generate sparse maps - faster if using small areas + +TXLensMaps: + sparse: true # Generate sparse maps - faster if using small areas + +# For redmagic mapping +TXExternalLensMaps: + chunk_rows: 100000 + sparse: true + pixelization: healpix + nside: 512 + + + +TXAuxiliarySourceMaps: + flag_exponent_max: 8 + +TXAuxiliaryLensMaps: + flag_exponent_max: 8 + bright_obj_threshold: 22.0 # The magnitude threshold for a object to be counted as bright + depth_band: i + snr_threshold: 10.0 # The S/N value to generate maps for (e.g. 5 for 5-sigma depth) + snr_delta: 1.0 # The range threshold +/- delta is used for finding objects at the boundary + +TXRealGaussianCovariance: + min_sep: 2.5 + max_sep: 60. + nbins: 10 + pickled_wigner_transform: data/example/inputs/wigner.pkl + + +TXJackknifeCenters: + npatch: 5 + + +TXTwoPointFourier: + flip_g2: true + bandwidth: 100 + +TXSimpleMask: + depth_cut: 23.5 + bright_object_max: 10.0 + +TXMapCorrelations: + supreme_path_root: data/example/inputs/supreme + outlier_fraction: 0.05 + nbin: 20 + +TXPhotozPlotSource: + name: TXPhotozPlotSource +TXPhotozPlotLens: + name: TXPhotozPlotLens +TXTruePhotozStackSource: + name: TXTruePhotozStackSource + weight_col: metacal/weight + redshift_group: metacal + zmax: 2.0 + nz: 201 diff --git a/examples/desi_mocks/pipeline.yml b/examples/desi_mocks/pipeline.yml new file mode 100644 index 000000000..df3eb8367 --- /dev/null +++ b/examples/desi_mocks/pipeline.yml @@ -0,0 +1,113 @@ +# Stages to run +stages: + # - name: TXSourceSelectorMetacal + # - name: TXSourceTomography + # - name: TXShearCalibration + # - name: TXExternalLensCatalogSplitter + # - name: TXStarCatalogSplitter + # - name: TXTruePhotozStackSource + # classname: TXTruePhotozStack + # aliases: + # tomography_catalog: shear_tomography_catalog + # catalog: shear_catalog + # weights_catalog: shear_catalog + # photoz_stack: shear_photoz_stack + # - name: TXIngestRedmagic + - name: TXDESISelector + - name: TXIngestDESI + # - name: TXTracerMetadata + # - name: TXSourceMaps + # - name: TXExternalLensMaps + # - name: TXAuxiliarySourceMaps + # - name: TXAuxiliaryLensMaps + # - name: TXSimpleMask + # - name: TXLSSWeightsUnit + # - name: TXDensityMaps + # - name: TXMapPlots + # - name: TXRandomCat + # - name: TXJackknifeCenters + # - name: TXTwoPoint + # threads_per_process: 2 + # - name: TXBlinding # Blind the data following Muir et al + # threads_per_process: 2 + # - name: TXTwoPointTheoryReal # compute theory using CCL to save in sacc file and plot later + # - name: TXPhotozPlotSource # Plot the bin n(z) + # classname: TXPhotozPlot + # aliases: + # photoz_stack: shear_photoz_stack + # nz_plot: nz_source + - name: TXPhotozPlotLens # Plot the bin n(z) + classname: TXPhotozPlot + aliases: + photoz_stack: lens_photoz_stack + nz_plot: nz_lens + # - name: TXDiagnosticQuantiles + # - name: TXSourceDiagnosticPlots # Make a suite of diagnostic plots + # - name: TXGammaTFieldCenters # Compute and plot gamma_t around center points + # threads_per_process: 2 + # - name: TXGammaTRandoms # Compute and plot gamma_t around randoms + # threads_per_process: 2 + # - name: TXGammaTStars # Compute and plot gamma_t around dim stars + # threads_per_process: 2 + # - name: TXRoweStatistics # Compute and plot Rowe statistics + # threads_per_process: 2 + # - name: TXPSFDiagnostics # Compute and plots other PSF diagnostics + # - name: TXBrighterFatterPlot # Make plots tracking the brighter-fatter effect + # - name: TXSourceNoiseMaps + # - name: TXExternalLensNoiseMaps + # - name: TXConvergenceMaps # Make convergence kappa maps from g1, g2 maps + # - name: TXConvergenceMapPlots # Plot the convergence map + # - name: TXTwoPointPlots + # - name: TXTwoPointFourier + # threads_per_process: 2 + # - name: TXRealGaussianCovariance # Compute covariance of real-space correlations + +# Where to put outputs +output_dir: data/example/outputs_desi_mocks + +# How to run the pipeline: mini, parsl, or cwl +launcher: + name: mini + interval: 1.0 + +# Where to run the pipeline: cori-interactive, cori-batch, or local +site: + name: local + max_threads: 2 + +# modules and packages to import that have pipeline +# stages defined in them +modules: txpipe + +# where to find any modules that are not in this repo, +# and any other code we need. +python_paths: + - submodules/WLMassMap/python/desc/ + +# configuration settings +config: examples/desi_mocks/config.yml + +# On NERSC, set this before running: +# export DATA=${LSST}/groups/WL/users/zuntz/data/metacal-testbed + +inputs: + # See README for paths to download these files + # shear_catalog: data/example/inputs/shear_catalog.hdf5 + # photometry_catalog: data/example/inputs/photometry_catalog.hdf5 + # photoz_trained_model: data/example/inputs/cosmoDC2_trees_i25.3.npy + # exposures: data/example/inputs/exposures.hdf5 + # star_catalog: data/example/inputs/star_catalog.hdf5 + desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/output_select_lsst_obs_cond_dp2_DESI_ELG_flagship.pq + # mask should be already a combination of source and lens. + mask: None # /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_eg.fits + # This file comes with the code + # fiducial_cosmology: data/fiducial_cosmology.yml + # spectroscopic_catalog: data/example/inputs/mock_spectroscopic_catalog.hdf5 + +# if supported by the launcher, restart the pipeline where it left off +# if interrupted +resume: true +# where to put output logs for individual stages +log_dir: data/example/logs_desi_mocks +# where to put an overall parsl pipeline log +pipeline_log: data/example/log_desi_mocks.txt \ No newline at end of file diff --git a/txpipe/ingest/desi.py b/txpipe/ingest/desi.py new file mode 100644 index 000000000..54ff5590c --- /dev/null +++ b/txpipe/ingest/desi.py @@ -0,0 +1,144 @@ +from ..base_stage import PipelineStage +from ..data_types import HDFFile, FitsFile, QPNOfZFile +import numpy as np +from ceci.config import StageParameter + + +class TXIngestDESI(PipelineStage): + """ + Ingest a DESI catalog (mock or real) + + This starts with the FITS file format. + """ + + name = "TXIngestDESI" + parallel = False + + inputs = [ + ("desi_catalog_selected", FitsFile), # from stage 1 +] + + outputs = [ + ("lens_catalog", HDFFile), + ("lens_tomography_catalog_unweighted", HDFFile), + ("lens_photoz_stack", QPNOfZFile), + ] + + # TODO: CHANGE CONFIG OPTIONS + config_options = { + "lens_zbin_edges": StageParameter(list, [float], msg="Edges of lens redshift bins."), + "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk."), + "zmin": StageParameter(float, 0.0, msg="Minimum redshift for binning."), + "zmax": StageParameter(float, 3.0, msg="Maximum redshift for binning."), + "dz": StageParameter(float, 0.01, msg="Redshift bin width."), + #"bands": StageParameter(str, "grizy", msg="Bands to use for DESI selection."), + "mock": StageParameter(bool, False, msg="Whether to use a mock catalog."), + } + + + def run(self): + import qp + + # Count number of objects + f = self.open_input("desi_catalog_selected") + n = f[1].get_nrows() + f.close() + + chunk_rows = self.config["chunk_rows"] + # bands = self.config["bands"] + zbin_edges = self.config["lens_zbin_edges"] + nbin_lens = len(zbin_edges) - 1 + + cat = self.open_output("lens_catalog") + tomo = self.open_output("lens_tomography_catalog_unweighted") + + # redshift grid + zmin = self.config["zmin"] + zmax = self.config["zmax"] + dz = self.config["dz"] + z_grid = np.arange(zmin, zmax, dz) + nz_grid = np.zeros((nbin_lens + 1, z_grid.size)) + nz = len(z_grid) + + # Create space in outputs + g = cat.create_group("lens") + # g.attrs["bands"] = bands + g.create_dataset("ra", (n,), dtype=np.float64) + g.create_dataset("dec", (n,), dtype=np.float64) + g.create_dataset("redshift", (n,), dtype=np.float64) + # for b in bands: + # g.create_dataset(f"mag_{b}", (n,), dtype=np.float64) + # g.create_dataset(f"mag_err_{b}", (n,), dtype=np.float64) + # g.attrs["bands"] = bands + + h = tomo.create_group("tomography") + h.create_dataset("bin", (n,), dtype=np.int32) + h.create_dataset("lens_weight", (n,), dtype=np.float64) + h.attrs["nbin"] = nbin_lens + h.attrs[f"zbin_edges"] = zbin_edges + h_counts = tomo.create_group("counts") + h_counts.create_dataset("counts", (nbin_lens,), dtype="i") + h_counts.create_dataset("counts_2d", (1,), dtype="i") + + # we keep track of the counts per-bin also + counts = np.zeros(nbin_lens, dtype=np.int64) + counts_2d = 0 + + # all cols that might be useful + cols = ["ra", "dec", "redshift"] + + for s, e, data in self.iterate_fits("desi_catalog_selected", 1, cols, chunk_rows): + n = data["ra"].size + z = data["redshift"] + # Unit weight still + weight = np.repeat(1.0, n) + + # work out the redshift bin for each object, if any. + # do any other selection here + zbin = np.digitize(z, zbin_edges) - 1 + zbin[zbin == nbin_lens] = -1 + + # # can select on any other criterion here, e.g. + # # mag or chisq. This is an example + # sel = data["chisq"] < 10 + # # deselect these objects + # zbin[~sel] = -1 + + # Build up the count of the n(z) histograms per-bin + z_grid_index = np.floor((z - zmin) / dz).astype(int) + for i, (i_z, b) in enumerate(zip(z_grid_index, zbin)): + if b >= 0: + nz_grid[b, i_z] += weight[i] + + # Build up the counts + any_bin = zbin >= 0 + counts += np.bincount(zbin[any_bin], minlength=nbin_lens) + counts_2d += any_bin.sum() + + # save data + g["ra"][s:e] = data["ra"] + g["dec"][s:e] = data["dec"] + g["redshift"][s:e] = data["redshift"] + + # # including mags + # for i, b in enumerate(bands): + # g[f"mag_{b}"][s:e] = data["mag"][:, i] + # g[f"mag_err_{b}"][s:e] = data["mag_err"][:, i] + + h["bin"][s:e] = zbin + if self.config["mock"]: + h["lens_weight"][s:e] = 1.0 + else: + h["lens_weight"][s:e] = data["weight"] + + # this is an overall count + h_counts["counts"][:] = counts + h_counts["counts_2d"][:] = counts_2d + + # Generate and save the 2D n(z) histogram also, just + # by summing up all the individual values. + nz_grid[-1] = nz_grid[:-1].sum(axis=0) + + stack_object = qp.Ensemble(qp.hist, data={"bins": z_grid, "pdfs": nz_grid[:, :-1]}) + with self.open_output("lens_photoz_stack", wrapper=True) as stack: + stack.write_ensemble(stack_object) \ No newline at end of file diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py index 0c264ec92..40f0ca1ff 100755 --- a/txpipe/lens_selector.py +++ b/txpipe/lens_selector.py @@ -8,6 +8,7 @@ PhotometryCatalog, FitsFile, MapsFile, + ParquetFile ) from .utils import LensNumberDensityStats, Splitter, rename_iterated from .binning import build_tomographic_classifier, apply_classifier, read_training_data @@ -289,7 +290,7 @@ def select_lens_boss(self, phot_data): lens_gals[lens_mask] = 1 return lens_gals - + def select_lens_maglim(self, phot_data): band = self.config["maglim_band"] limit = self.config["maglim_limit"] @@ -805,6 +806,143 @@ def data_iterator(self): return self.add_redshifts(iterator) +class TXDESISelector(PipelineStage): + """ + Select lens galaxies from a DESI spectroscopic catalog. + + Reads a raw DESI FITS catalog and applies: + TODO: ADJUST NECESSARY CUTS FOR MOCK AND REAL DATA + - ZWARN quality cut (good redshift fits only) + - Redshift range cut + - SPECTYPE filter (e.g. GALAXY only) + - Magnitude cuts (bright/faint limits in a chosen band) + - Optional survey mask + + Outputs a standardized HDF5 file with a photometry/ group containing + ra, dec, z. This file is consumed by TXIngestDESI. + """ + + name = "TXDESISelector" + parallel = False + + + outputs = [ + ("desi_catalog_selected", FitsFile), + ] + + # PLACEHOLDERS: WILL UPDATE. + config_options = { + "chunk_rows": StageParameter(int, 100_000, msg="Rows per chunk."), + "zmin": StageParameter(float, 0.0, msg="Minimum spectroscopic redshift."), + "zmax": StageParameter(float, 3.0, msg="Maximum spectroscopic redshift."), + "mag_band": StageParameter(str, "r", msg="Band to apply magnitude limits in."), + "apply_mask": StageParameter(bool, False, msg="Apply survey mask from the mask input."), + "ra_col": StageParameter(str, "ra", msg="RA column name in the DESI catalog."), + "dec_col": StageParameter(str, "dec", msg="Dec column name in the DESI catalog."), + "z_col": StageParameter(str, "redshift", msg="Spectroscopic redshift column name."), + "w_col": StageParameter(str, "weight", msg="Weight column name."), + "bands": StageParameter(str, "ugrizy", msg="Band labels for output magnitudes (one char each)."), + "mock": StageParameter(bool, False, msg="Whether to use a mock catalog."), + } + + # ParquetFile for mock catalogs; swap to FitsFile here if working with real DESI FITS + inputs = [ + ("desi_catalog", ParquetFile), + ("mask", MapsFile), + ] + + def run(self): + chunk_rows = self.config["chunk_rows"] + bands = self.config["bands"] + ra_col = self.config["ra_col"] + dec_col = self.config["dec_col"] + z_col = self.config["z_col"] + w_col = self.config["w_col"] + + if self.config["apply_mask"]: + with self.open_input("mask", wrapper=True) as f: + mask, mask_nside = f.read_healsparse("mask", return_all=True) + else: + mask = mask_nside = None + + if self.config["mock"]: + cols = [ra_col, dec_col, z_col] + [f"mag_{b}_lsst" for b in bands] + else: + cols = [ra_col, dec_col, z_col, w_col] + [f"mag_{b}_lsst" for b in bands] + + # Accumulate selected rows across chunks; write once at the end + out_cols = {name: [] for name in ["ra", "dec", "redshift", "weight"] + [f"mag_{b}" for b in bands]} + + n_total = 0 + n_selected = 0 + + iterator = self._iterate_catalog(cols, chunk_rows) + for s, e, data in iterator: + sel = self._apply_cuts(data, mask, mask_nside) + n_chunk = int(sel.sum()) + n_total += data[ra_col].size + n_selected += n_chunk + + print(f"Rows {s:,}-{e:,}: selected {n_chunk:,} / {data[ra_col].size:,}") + + if n_chunk == 0: + continue + + out_cols["ra"].append(data[ra_col][sel]) + out_cols["dec"].append(data[dec_col][sel]) + out_cols["redshift"].append(data[z_col][sel]) + if self.config["mock"]: + out_cols["weight"].append(np.ones(n_chunk)) + else: + out_cols["weight"].append(data[w_col][sel]) + for b in bands: + out_cols[f"mag_{b}"].append(data[f"mag_{b}_lsst"][sel]) + + print(f"Total selected: {n_selected:,} / {n_total:,}") + + from astropy.table import Table + table = Table({name: np.concatenate(chunks) for name, chunks in out_cols.items()}) + table.write(self.get_output("desi_catalog_selected"), overwrite=True) + + def _iterate_catalog(self, cols, chunk_rows): + if self.config["mock"]: + yield from self._iterate_parquet(cols, chunk_rows) + else: + yield from self.iterate_fits("desi_catalog", 1, cols, chunk_rows) + + def _iterate_parquet(self, cols, chunk_rows): + from pyarrow.parquet import ParquetFile as PQFile + fn = self.get_input("desi_catalog") + start = 0 + with PQFile(fn) as f: + for batch in f.iter_batches(columns=cols, batch_size=chunk_rows): + data = {name: batch[name].to_numpy(zero_copy_only=False) + for name in cols} + n = len(data[cols[0]]) + end = start + n + yield start, end, data + start = end + + def _apply_cuts(self, data, mask, mask_nside): + ra_col = self.config["ra_col"] + dec_col = self.config["dec_col"] + z_col = self.config["z_col"] + + n = data[ra_col].size + sel = np.ones(n, dtype=bool) + + # Redshift range + z = data[z_col] + sel &= (z >= self.config["zmin"]) & (z < self.config["zmax"]) + + # Survey mask + if mask is not None: + import healpy as hp + pix = hp.ang2pix(mask_nside, data[ra_col], data[dec_col], lonlat=True) + sel &= mask[pix] != hp.UNSEEN + + return sel + if __name__ == "__main__": PipelineStage.main() From 98d5c2e31475601072763f51b51f32f8ba8a6e7f Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Wed, 29 Apr 2026 17:23:12 -0400 Subject: [PATCH 05/25] change to iterator --- txpipe/lens_selector.py | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py index 40f0ca1ff..36fcb8ee2 100755 --- a/txpipe/lens_selector.py +++ b/txpipe/lens_selector.py @@ -852,7 +852,6 @@ class TXDESISelector(PipelineStage): ] def run(self): - chunk_rows = self.config["chunk_rows"] bands = self.config["bands"] ra_col = self.config["ra_col"] dec_col = self.config["dec_col"] @@ -861,14 +860,9 @@ def run(self): if self.config["apply_mask"]: with self.open_input("mask", wrapper=True) as f: - mask, mask_nside = f.read_healsparse("mask", return_all=True) - else: - mask = mask_nside = None - - if self.config["mock"]: - cols = [ra_col, dec_col, z_col] + [f"mag_{b}_lsst" for b in bands] + self.mask, self.mask_nside = f.read_healsparse("mask", return_all=True) else: - cols = [ra_col, dec_col, z_col, w_col] + [f"mag_{b}_lsst" for b in bands] + self.mask = self.mask_nside = None # Accumulate selected rows across chunks; write once at the end out_cols = {name: [] for name in ["ra", "dec", "redshift", "weight"] + [f"mag_{b}" for b in bands]} @@ -876,9 +870,8 @@ def run(self): n_total = 0 n_selected = 0 - iterator = self._iterate_catalog(cols, chunk_rows) - for s, e, data in iterator: - sel = self._apply_cuts(data, mask, mask_nside) + for s, e, data in self.data_iterator(): + sel = self._apply_cuts(data) n_chunk = int(sel.sum()) n_total += data[ra_col].size n_selected += n_chunk @@ -904,10 +897,19 @@ def run(self): table = Table({name: np.concatenate(chunks) for name, chunks in out_cols.items()}) table.write(self.get_output("desi_catalog_selected"), overwrite=True) - def _iterate_catalog(self, cols, chunk_rows): + def data_iterator(self): + chunk_rows = self.config["chunk_rows"] + ra_col = self.config["ra_col"] + dec_col = self.config["dec_col"] + z_col = self.config["z_col"] + w_col = self.config["w_col"] + bands = self.config["bands"] + if self.config["mock"]: + cols = [ra_col, dec_col, z_col] + [f"mag_{b}_lsst" for b in bands] yield from self._iterate_parquet(cols, chunk_rows) else: + cols = [ra_col, dec_col, z_col, w_col] + [f"mag_{b}_lsst" for b in bands] yield from self.iterate_fits("desi_catalog", 1, cols, chunk_rows) def _iterate_parquet(self, cols, chunk_rows): @@ -916,14 +918,13 @@ def _iterate_parquet(self, cols, chunk_rows): start = 0 with PQFile(fn) as f: for batch in f.iter_batches(columns=cols, batch_size=chunk_rows): - data = {name: batch[name].to_numpy(zero_copy_only=False) - for name in cols} + data = {name: batch[name].to_numpy(zero_copy_only=False) for name in cols} n = len(data[cols[0]]) end = start + n yield start, end, data start = end - def _apply_cuts(self, data, mask, mask_nside): + def _apply_cuts(self, data): ra_col = self.config["ra_col"] dec_col = self.config["dec_col"] z_col = self.config["z_col"] @@ -934,12 +935,12 @@ def _apply_cuts(self, data, mask, mask_nside): # Redshift range z = data[z_col] sel &= (z >= self.config["zmin"]) & (z < self.config["zmax"]) - + # Survey mask - if mask is not None: + if self.mask is not None: import healpy as hp - pix = hp.ang2pix(mask_nside, data[ra_col], data[dec_col], lonlat=True) - sel &= mask[pix] != hp.UNSEEN + pix = hp.ang2pix(self.mask_nside, data[ra_col], data[dec_col], lonlat=True) + sel &= self.mask[pix] != hp.UNSEEN return sel From a696fa6d47d4ba1f8022bc82ada28fd2805f76f0 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Thu, 30 Apr 2026 09:51:23 +0100 Subject: [PATCH 06/25] remove old delta-sigma version --- txpipe/delta_sigma.py | 241 ++++-------------------------------------- 1 file changed, 21 insertions(+), 220 deletions(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 9fce791e8..105d4f055 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -8,15 +8,8 @@ class TXDeltaSigma(TXTwoPoint): """Compute Delta-Sigma, the excess surface density around lenses. - - This is a subclass of TXTwoPoint that re-weights the lenses by the - inverse critical surface density based on the source redshift distribution. - - The strategy used here is to pre-compute sigma_crit^-1(z_lens) splines - for each source bin, and then use Treecorr's "w_eval" feature to apply - the appropriate weight to each lens based on its redshift when loading - the lens catalog. - """ + This version uses the dSigma code. + """ name = "TXDeltaSigma" @@ -26,225 +19,33 @@ class TXDeltaSigma(TXTwoPoint): ("binned_random_catalog", HDFFile), ("shear_photoz_stack", QPNOfZFile), ("lens_photoz_stack", QPNOfZFile), - ("tracer_metadata", HDFFile), - ("patch_centers", TextFile), + # ("tracer_metadata", HDFFile), ("fiducial_cosmology", FiducialCosmology), ] outputs = [("delta_sigma", SACCFile)] config_options = TREECORR_CONFIG | { - "source_bins": StageParameter(list, [-1], msg="List of source bins to use (-1 means all)"), - "lens_bins": StageParameter(list, [-1], msg="List of lens bins to use (-1 means all)"), - "var_method": StageParameter(str, "jackknife", msg="Method for computing variance (jackknife, sample, etc.)"), - "use_randoms": StageParameter(bool, True, msg="Whether to use random catalogs"), - "low_mem": StageParameter(bool, False, msg="Whether to use low memory mode"), - "patch_dir": StageParameter(str, "./cache/delta-sigma-patches", msg="Directory for storing patch files"), - "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk when making patches"), - "share_patch_files": StageParameter(bool, False, msg="Whether to share patch files across processes"), - "metric": StageParameter(str, "Euclidean", msg="Distance metric to use (Euclidean, Arc, etc.)"), - "gaussian_sims_factor": StageParameter( - list, - default=[1.0], - msg="Factor by which to decrease lens density to account for increased density contrast.", - ), - # "use_subsampled_randoms": StageParameter(bool, False, msg="Use subsampled randoms file for RR calculation"), - "delta_z": StageParameter(float, 0.001, msg="Z bin width for sigma_crit spline computation"), + # "source_bins": StageParameter(list, [-1], msg="List of source bins to use (-1 means all)"), + # "lens_bins": StageParameter(list, [-1], msg="List of lens bins to use (-1 means all)"), + # "var_method": StageParameter(str, "jackknife", msg="Method for computing variance (jackknife, sample, etc.)"), + # "use_randoms": StageParameter(bool, True, msg="Whether to use random catalogs"), + # "low_mem": StageParameter(bool, False, msg="Whether to use low memory mode"), + # "patch_dir": StageParameter(str, "./cache/delta-sigma-patches", msg="Directory for storing patch files"), + # "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk when making patches"), + # "share_patch_files": StageParameter(bool, False, msg="Whether to share patch files across processes"), + # "metric": StageParameter(str, "Euclidean", msg="Distance metric to use (Euclidean, Arc, etc.)"), + # "gaussian_sims_factor": StageParameter( + # list, + # default=[1.0], + # msg="Factor by which to decrease lens density to account for increased density contrast.", + # ), + # # "use_subsampled_randoms": StageParameter(bool, False, msg="Use subsampled randoms file for RR calculation"), + # "delta_z": StageParameter(float, 0.001, msg="Z bin width for sigma_crit spline computation"), } - - def select_calculations(self, source_list, lens_list): - calcs = [] - # Fill in these config items that are not relevant for Delta Sigma - # but are needed by the parent class. - self.config["do_shear_pos"] = True - self.config["do_shear_shear"] = False - self.config["do_pos_pos"] = False - self.config["use_subsampled_randoms"] = False - - # For Delta Sigma everything is just shear-position - k = SHEAR_POS - for i in source_list: - for j in lens_list: - calcs.append((i, j, k)) - - if self.rank == 0: - print(f"Running {len(calcs)} calculations: {calcs}") - - return calcs - - - def read_nbin(self): - source_list, lens_list = super().read_nbin() - # This is a convenient place to also compute the sigma_crit_inverse splines - # that we need later - self.compute_sigma_crit_inverse_splines(source_list) - return source_list, lens_list - - - def compute_sigma_crit_inverse_spline(self, z, nz): - import scipy.interpolate - import scipy.integrate - - # We need a fiducial cosmology to convert redshifts to distances. - # This is why I don't like Delta Sigma. - with self.open_input("fiducial_cosmology", wrapper=True) as f: - cosmo = f.to_ccl() - - # Another option would be to use the QP object directly and use - # its .pdf() method. That's probably better than this. - zs_spline = scipy.interpolate.InterpolatedUnivariateSpline(z, nz, ext="zeros") - zmax = z.max() + 0.1 - dz = self.config["delta_z"] - - # We integrate this over source redshift for each lens redshift - # It's just a call to CCL weighted by the n(z) - def integrand(zl, zs): - w = np.where(zs > zl) - a_l = 1 / (1 + zl) - a_s = 1 / (1 + zs) - out = np.zeros_like(zs) - # https://ccl.readthedocs.io/en/latest/api/pyccl.cosmology.html#pyccl.cosmology.Cosmology.sigma_critical - sigma_crit = cosmo.sigma_critical(a_lens=a_l, a_source=a_s[w]) - n_z = zs_spline(zs[w]) - out[w] = n_z / sigma_crit - return out - - # Set up the grids needed by the integrals - zl_grid = np.arange(0, zmax, dz) - zs_grid = np.arange(0, zmax, dz) - sigma_crit_inv = np.zeros_like(zl_grid) - - # Do the integrals, just trapezoidal rule - for i, zli in enumerate(zl_grid[1:-1]): - integrand_vals = integrand(zli, zs_grid) - sigma_crit_inv[i] = scipy.integrate.trapezoid(integrand_vals, zs_grid) - - # This gives us the mean sigma_crit_inverse for each lens z - spline = scipy.interpolate.UnivariateSpline(zl_grid, sigma_crit_inv) - return spline - - - def compute_sigma_crit_inverse_splines(self, source_list): - """ - For each source bin, compute and store the spline - for sigma_crit_inverse as a function of lens redshift. - """ - from .utils import blank - splines = {} - with self.open_input("shear_photoz_stack", wrapper=True) as f: - for i in source_list: - if i == "all": - z, Nz = f.get_2d_n_of_z() - else: - z, Nz = f.get_bin_n_of_z(i) - if self.rank == 0: - print("Computing sigma_crit_inverse spline for source bin", i) - splines[i] = self.compute_sigma_crit_inverse_spline(z, Nz) - blank.sigma_crit_inverse_splines = splines - - def get_shear_catalog(self, i): - """ - This is a total hack. We want the get_lens_catalog to re-scale - the weights based on which source bin is being used. So we store - the source bin index in self.active_shear_catalog here, so that - get_lens_catalog can access it. - - This only works because the parent class calls get_shear_catalog - before get_lens_catalog. - - If that does change it should be obvious because self.active_shear_catalog - will be undefined. - """ - self.active_shear_catalog = i - return super().get_shear_catalog(i) - - - def get_lens_catalog(self, i): - import treecorr - from .utils import blank - treecorr.Catalog.eval_modules.append("txpipe.utils.blank as splines") - - # To get the scaling into Treecorr without re-creating multiple - # copies of all the files we have to pass the redshift into - # another module that treecorr can access and use the "w_eval" - # parameter to tell it to do it. - with self.open_input("binned_lens_catalog") as f: - redshift = f[f"/lens/bin_{i}/z"][:] - blank.redshift = redshift + def run(self): + import dSigma - # Load the catalog. Note that we are not loading the weight - # from the file, because we want to scale it by 1/sigma_crit - # here. - cat = treecorr.Catalog( - self.get_input("binned_lens_catalog"), - ext=f"/lens/bin_{i}", - ra_col="ra", - dec_col="dec", - w_col="weight", - ra_units="degree", - dec_units="degree", - # Apply a function that rescales the weights - w_eval=f"weight / splines.sigma_crit_inverse_splines[{self.active_shear_catalog}](splines.redshift)", - patch_centers=self.get_input("patch_centers"), - save_patch_dir=self.get_patch_dir("binned_lens_catalog", i), - ) - - return cat - - def write_output(self, source_list, lens_list, meta, results): - import sacc - with self.open_input("fiducial_cosmology", wrapper=True) as f: - cosmo = f.to_ccl() - - # Get the source n(z) which are just passed into the output file - S = sacc.Sacc() - with self.open_input("shear_photoz_stack", wrapper=True) as f: - for i in source_list: - z, Nz = f.get_bin_n_of_z(i) - S.add_tracer("NZ", f"source_{i}", z, Nz) - - # Get the lens n(z), from which we also need the mean z - # so that we can turn angles into physical radii. - with self.open_input("lens_photoz_stack", wrapper=True) as f: - # For both source and lens - qp_object = f.read_ensemble() - - # we skip the "all" bin for now as we are not running it anyway - lens_mean_z = qp_object.mean()[:-1] - for i in lens_list: - z, Nz = f.get_bin_n_of_z(i) - S.add_tracer("NZ", f"lens_{i}", z, Nz) - - # Loop through calculated results and add to SACC - for d in results: - tracer1 = f"source_{d.i}" - tracer2 = f"lens_{d.j}" - - xi = d.object.xi - theta = np.exp(d.object.meanlogr) - # Convert theta to a radius in physical units at the lens redshift - a_lens = 1 / (1 + np.mean(lens_mean_z[d.j])) - r_mpc = cosmo.angular_diameter_distance(a_lens) * np.radians(theta / 60) - - weight = d.object.weight - err = np.sqrt(d.object.varxi) - n = len(xi) - for i in range(n): - S.add_data_point( - "galaxy_shearDensity_deltasigma", - (tracer1, tracer2), - xi[i], - theta=theta[i], - r_mpc=r_mpc[i], - error=err[i], - weight=weight[i], - ) - - # add a few bits of handy metadata - meta["nbin_source"] = len(source_list) - meta["nbin_lens"] = len(lens_list) - self.write_metadata(S, meta) - S.save_fits(self.get_output("delta_sigma"), overwrite=True) class TXDeltaSigmaPlots(PipelineStage): From f1203cf3aae7cf5137751f8b90f7b5a1afa9bfce Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Thu, 30 Apr 2026 15:40:04 +0100 Subject: [PATCH 07/25] Update to using dSigma in delta-sigma calculation --- txpipe/data_types.py | 23 +++ txpipe/delta_sigma.py | 384 +++++++++++++++++++++++++++++++++++++----- txpipe/twopoint.py | 20 +-- 3 files changed, 362 insertions(+), 65 deletions(-) diff --git a/txpipe/data_types.py b/txpipe/data_types.py index ada051b33..82331a70f 100755 --- a/txpipe/data_types.py +++ b/txpipe/data_types.py @@ -709,6 +709,29 @@ def read_provenance(self): return provenance + @classmethod + def add_metadata(cls, sacc_object, provenance, meta): + # We also save the associated metadata to the file + for k, v in meta.items(): + if np.isscalar(v): + sacc_object.metadata[k] = v + else: + for i, vi in enumerate(v): + sacc_object.metadata[f"{k}_{i}"] = vi + + # Add provenance metadata. In managed formats this is done + # automatically, but because the Sacc library is external + # we do it manually here. + provenance.update(SACCFile.generate_provenance()) + for key, value in provenance.items(): + if isinstance(value, str) and "\n" in value: + values = value.split("\n") + for i, v in enumerate(values): + sacc_object.metadata[f"provenance/{key}_{i}"] = v + else: + sacc_object.metadata[f"provenance/{key}"] = value + + def close(self): pass diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 105d4f055..50d83d96d 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -9,14 +9,18 @@ class TXDeltaSigma(TXTwoPoint): """Compute Delta-Sigma, the excess surface density around lenses. This version uses the dSigma code. - """ + """ name = "TXDeltaSigma" inputs = [ ("binned_shear_catalog", ShearCatalog), ("binned_lens_catalog", HDFFile), + # we use both the binned randoms for the case where we split + # the lens catalog tomographically and the full version for + # when we do the 2D stack of all the lenses together ("binned_random_catalog", HDFFile), + ("random_cats", HDFFile), ("shear_photoz_stack", QPNOfZFile), ("lens_photoz_stack", QPNOfZFile), # ("tracer_metadata", HDFFile), @@ -24,72 +28,360 @@ class TXDeltaSigma(TXTwoPoint): ] outputs = [("delta_sigma", SACCFile)] - config_options = TREECORR_CONFIG | { - # "source_bins": StageParameter(list, [-1], msg="List of source bins to use (-1 means all)"), - # "lens_bins": StageParameter(list, [-1], msg="List of lens bins to use (-1 means all)"), - # "var_method": StageParameter(str, "jackknife", msg="Method for computing variance (jackknife, sample, etc.)"), - # "use_randoms": StageParameter(bool, True, msg="Whether to use random catalogs"), - # "low_mem": StageParameter(bool, False, msg="Whether to use low memory mode"), - # "patch_dir": StageParameter(str, "./cache/delta-sigma-patches", msg="Directory for storing patch files"), - # "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk when making patches"), - # "share_patch_files": StageParameter(bool, False, msg="Whether to share patch files across processes"), - # "metric": StageParameter(str, "Euclidean", msg="Distance metric to use (Euclidean, Arc, etc.)"), - # "gaussian_sims_factor": StageParameter( - # list, - # default=[1.0], - # msg="Factor by which to decrease lens density to account for increased density contrast.", - # ), - # # "use_subsampled_randoms": StageParameter(bool, False, msg="Use subsampled randoms file for RR calculation"), - # "delta_z": StageParameter(float, 0.001, msg="Z bin width for sigma_crit spline computation"), + config_options = { + "source_bins": StageParameter(list, [-1], msg="List of source bins to use (-1 means all)"), + "lens_bins": StageParameter(list, [-1], msg="List of lens bins to use (-1 means all)"), + "r_min": StageParameter(float, 0.1, msg="Minimum radius to use in Mpc"), + "r_max": StageParameter(float, 10.0, msg="Maximum radius to use in Mpc"), + "nbins": StageParameter(int, 10, msg="Number of radial bins"), } def run(self): - import dSigma - + import dsigma + import sacc + + bin_pairs = self.get_bin_pairs() + source_bin = 3 + lens_bin = 0 + + with self.open_input("fiducial_cosmology", wrapper=True) as cosmo_file: + cosmo = cosmo_file.to_astropy() + source_n_of_z = self.load_redshift_distribution("shear_photoz_stack") + + # The lens n_of_z is only used when saving at the end + lens_n_of_z = self.load_redshift_distribution("lens_photoz_stack") + + bins = np.logspace(np.log10(self.config["r_min"]), np.log10(self.config["r_max"]), self.config["nbins"]) + + results = [] + last_source_bin = None + + # suppress numpy division warnings. Real scientists divide by zero + # all the time. + numpy_error_settings = np.seterr(divide="ignore", invalid="ignore") + + for source_bin, lens_bin in self.split_tasks_by_rank(bin_pairs): + # Load the new source bin if changed + if source_bin != last_source_bin: + last_source_bin = source_bin + source_table = self.load_source_table(source_bin) + + # Always reload the lens bins because we will add some + # columns to them in-place in a minute. + lens_table = self.load_lens_table(lens_bin) + randoms_table = self.load_random_table(lens_bin) + + # Add columns to two tables in-place to do most of the pre-computation work. + # We should look at using the n_jobs multiprocessing option here + # but I don't know if it will play well with MPI on NERSC that we are + # using to split the bin pairs across ranks + print( + f"Computing excess surface density for source = {source_bin}, lens = {lens_bin}, " + f"with {len(source_table)} sources, {len(lens_table)} lenses, {len(randoms_table)} randoms" + ) + dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo) + dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo) + + # stack to get the excess surface density Delta(Sigma) + result = dsigma.stacking.excess_surface_density( + lens_table, table_r=randoms_table, random_subtraction=True, boost_correction=True, return_table=True + ) + results.append((source_bin, lens_bin, result)) + + # restore numpy settings + np.seterr(**numpy_error_settings) + + # Collate results from different ranks and save to SACC file + self.save_results(results, source_n_of_z, lens_n_of_z) + + def get_bin_pairs(self): + """ + Determine the list of (source, lens) bin pairs to measure. + + Returns + ------- + bin_pairs: list of tuples + A list of (source_bin, lens_bin) pairs to measure. + """ + + # Get the number of bins of each type from the input files. + with self.open_input("binned_shear_catalog") as shear_file: + nbin_source = shear_file["shear"].attrs["nbin_source"] + with self.open_input("binned_lens_catalog") as lens_file: + nbin_lens = lens_file["lens"].attrs["nbin_lens"] + + # User can override the list of bins to do in the options. + # but if they do not specify then we use everything + source_bins = self.config["source_bins"] + lens_bins = self.config["lens_bins"] + + if source_bins == [-1]: + source_bins = list(range(nbin_source)) + source_bins.append("all") + + if lens_bins == [-1]: + lens_bins = list(range(nbin_lens)) + lens_bins.append("all") + + # Collect all bin pairs together. + # This is everything to be done by all processes. + bin_pairs = [] + for s in source_bins: + for l in lens_bins: + bin_pairs.append((s, l)) + return bin_pairs + + def load_table(self, group, names): + """ + Helper function to read tables from HDF5 into the astropy + format that dSigma expects, with the right column names. + + Parameters + ---------- + group: h5py.Group + The group in the HDF5 file where the data is stored + names: dict + A mapping from the column names that dSigma expects to the column names in TXPipe. + """ + from astropy.table import Table + + table = Table() + for dsigma_name, txpipe_name in names.items(): + table[dsigma_name] = group[txpipe_name][:] + return table + + def load_source_table(self, bin_index): + """ + Load the lens table for a given bin index + as an astropy table with the columns we need for dSigma. + + Parameters + ---------- + bin_index: int or str + The index of the source bin to load, or "all" for the 2D stack of all the objects + """ + names = { + "ra": "ra", + "dec": "dec", + "z": "z", + "w": "weight", + "e_1": "g1", + "e_2": "g2", + } + with self.open_input("binned_shear_catalog") as shear_file: + group = shear_file[f"shear/bin_{bin_index}"] + table = self.load_table(group, names) + nbin = shear_file["shear"].attrs["nbin_source"] + + if bin_index == "all": + table["z_bin"] = np.repeat(nbin, len(table)) + else: + table["z_bin"] = np.repeat(bin_index, len(table)) + + return table + + def load_redshift_distribution(self, file_tag): + """ + Load the redshift distribution for a given source bin index as a tuple of (z, n(z)). + + dSigma wants all the redshift distributions for all the source bins together in a single table, + so we load them all and check they are consistent. + + Parameters + ---------- + file_tag: str + The tag for the input file containing the redshift distribution + + Returns + ------- + table: astropy.table.Table + A table with columns 'z' and 'n' + """ + from astropy.table import Table + + zs = [] + ns = [] + with self.open_input(file_tag, wrapper=True) as f: + nbin = f.get_nbin() + for i in range(nbin): + z, n_of_z = f.get_bin_n_of_z(i) + zs.append(z) + ns.append(n_of_z) + z, n_of_z = f.get_2d_n_of_z() + zs.append(z) + ns.append(n_of_z) + + # check all the z arrays are the same + for i in range(1, len(zs)): + if not np.allclose(zs[i], zs[0]): + raise ValueError("Z arrays for different bins are not the same, cannot use for dSigma") + # stack the n_of_z arrays into a 2D array of shape (nz, nbin) + n_of_z = np.stack(ns, axis=0).T # shape (nz, nbin) + + table = Table({"z": z, "n": n_of_z}) + return table + + def load_lens_table(self, bin_index): + """ + Load the lens table for a given bin index + as an astropy table with the columns we need for dSigma. + + Parameters + ---------- + bin_index: int or str + The index of the lens bin to load, or "all" for the 2D stack of all the objects + + Returns + ------- + table: astropy.table.Table + A table in the format that dSigma expects for lens samples + """ + names = { + "ra": "ra", + "dec": "dec", + "z": "z", + "w_sys": "weight", + } + with self.open_input("binned_lens_catalog") as lens_file: + group = lens_file[f"lens/bin_{bin_index}"] + table = self.load_table(group, names) + + return table + + def load_random_table(self, bin_index): + """ + Load the randoms table for a given bin index + as an astropy table with the columns we need for dSigma. + + Parameters + ---------- + bin_index: int or str + The index of the randoms bin to load, or "all" for the 2D stack of all the objects + + Returns + ------- + table: astropy.table.Table + A table in the format that dSigma expects for lens samples + """ + names = { + "ra": "ra", + "dec": "dec", + "z": "z", + } + + if bin_index == "all": + with self.open_input("random_cats") as random_file: + group = random_file["randoms"] + table = self.load_table(group, names) + else: + with self.open_input("binned_random_catalog") as random_file: + group = random_file[f"randoms/bin_{bin_index}"] + table = self.load_table(group, names) + + # randoms should be constructed to have unit weights + for col in list(table.colnames): + table[col] = table[col].astype(np.float64) + table["w_sys"] = np.ones(len(table)) + + return table + + def save_results(self, results, shear_photoz_stack, lens_photoz_stack): + import sacc + + if self.comm is not None: + # Gather results from all ranks to the root rank + results = self.comm.gather(results, root=0) + + # Only the root process saves the output file. + if self.rank != 0: + return + + # sort by the bin pairs to put the results in a deterministic order + results = sorted(results, key=lambda x: (str(x[0]), str(x[1]))) + + s = sacc.Sacc() + + # Create tracers for the source sample + # as SACC objects + z = shear_photoz_stack["z"] + Nz = shear_photoz_stack["n"] + nbin_source = Nz.shape[1] + for i in range(nbin_source): + if i == nbin_source - 1: + i = "all" + s.add_tracer("NZ", f"source_{i}", z, Nz) + + # Create tracers for the lens sample + # as SACC objects + z = lens_photoz_stack["z"] + Nz = lens_photoz_stack["n"] + nbin_lens = Nz.shape[1] + for i in range(nbin_lens): + if i == nbin_lens - 1: + i = "all" + + s.add_tracer("NZ", f"lens_{i}", z, Nz) + + # for each bin pair's results, add all the + # measurements in the output data table + for source_bin, lens_bin, result in results: + tracer1 = f"source_{source_bin}" + tracer2 = f"lens_{lens_bin}" + + for row in result: + # Add the data point and all the various tags + s.add_data_point( + "galaxy_shearDensity_deltasigma", + (tracer1, tracer2), + row["ds"], # Final corrected Delta(Sigma) measurement + rp=row["rp"], # radius of the bin + rp_min=row["rp_min"], # minimum radius of the bin + rp_max=row["rp_max"], # maximum radius of the bin + raw_value=row["ds_raw"], # the raw Delta(Sigma) measurement before boost correction + boost=row["b"], # the boost factor that was applied to get the final Delta(Sigma) + random_subtraction=row["ds_r"], # the contribution from the randoms that was subtracted off + n_pairs=row["n_pairs"], # the number of pairs in the bin + ) + + # Add provenance and potentially other metadata stuff. + provenance = self.gather_provenance() + other_metadata = {"nbin_source": nbin_source - 1, "nbin_lens": nbin_lens - 1} + + SACCFile.add_metadata(s, provenance, other_metadata) + output_filename = self.get_output("delta_sigma") + # switch to HDF5 backend for sacc since metadata + # handlind is better + print("Saving results to ", output_filename) + s.save_hdf5(output_filename, overwrite=True) class TXDeltaSigmaPlots(PipelineStage): - """Make plots of Delta Sigma results. - - """ + """Make plots of Delta Sigma results.""" + name = "TXDeltaSigmaPlots" inputs = [ ("delta_sigma", SACCFile), ("fiducial_cosmology", FiducialCosmology), - ] outputs = [ ("delta_sigma_plot", PNGFile), - ("delta_sigma_r_plot", PNGFile), -] + ] config_options = {} def run(self): import sacc import matplotlib.pyplot as plt - sacc_data = sacc.Sacc.load_fits(self.get_input("delta_sigma")) + sacc_data = sacc.Sacc.load_hdf5(self.get_input("delta_sigma")) # Plot in theta coordinates - nbin_source = sacc_data.metadata['nbin_source'] - nbin_lens = sacc_data.metadata['nbin_lens'] - with self.open_output("delta_sigma_plot", wrapper=True, figsize=(5*nbin_lens, 4*nbin_source)) as fig: - axes = fig.file.subplots(nbin_source, nbin_lens, squeeze=False) - for s in range(nbin_source): - for l in range(nbin_lens): - axes[s, l].set_title(f"Source {s}, Lens {l}") - axes[s, l].set_xlabel("Radius [arcmin]") - axes[s, l].set_ylabel(r"$\Delta \Sigma [M_\odot / pc^2]") - axes[s, l].grid() - x = sacc_data.get_tag("theta", tracers=(f"source_{s}", f"lens_{l}")) - y = sacc_data.get_mean(tracers=(f"source_{s}", f"lens_{l}")) - axes[s, l].plot(x, y) - plt.subplots_adjust(hspace=0.3, wspace=0.3) + nbin_source = sacc_data.metadata["nbin_source"] + nbin_lens = sacc_data.metadata["nbin_lens"] # Plot in r coordinates - nbin_source = sacc_data.metadata['nbin_source'] - nbin_lens = sacc_data.metadata['nbin_lens'] - with self.open_output("delta_sigma_r_plot", wrapper=True, figsize=(5*nbin_lens, 4*nbin_source)) as fig: + nbin_source = sacc_data.metadata["nbin_source"] + nbin_lens = sacc_data.metadata["nbin_lens"] + with self.open_output("delta_sigma_plot", wrapper=True, figsize=(5 * nbin_lens, 4 * nbin_source)) as fig: axes = fig.file.subplots(nbin_source, nbin_lens, squeeze=False) for s in range(nbin_source): for l in range(nbin_lens): @@ -97,7 +389,7 @@ def run(self): axes[s, l].set_xlabel("Radius [Mpc]") axes[s, l].set_ylabel(r"$R \cdot \Delta \Sigma [M_\odot / pc^2]$") axes[s, l].grid() - x = sacc_data.get_tag("r_mpc", tracers=(f"source_{s}", f"lens_{l}")) + x = sacc_data.get_tag("rp", tracers=(f"source_{s}", f"lens_{l}")) y = sacc_data.get_mean(tracers=(f"source_{s}", f"lens_{l}")) axes[s, l].plot(x, y * np.array(x)) - plt.subplots_adjust(hspace=0.3, wspace=0.3) \ No newline at end of file + plt.subplots_adjust(hspace=0.3, wspace=0.3) diff --git a/txpipe/twopoint.py b/txpipe/twopoint.py index ba22ca3a2..f92164a24 100755 --- a/txpipe/twopoint.py +++ b/txpipe/twopoint.py @@ -466,26 +466,8 @@ def write_output(self, source_list, lens_list, meta, results): S2.save_fits(self.get_output("twopoint_gamma_x"), overwrite=True) def write_metadata(self, S, meta): - # We also save the associated metadata to the file - for k, v in meta.items(): - if np.isscalar(v): - S.metadata[k] = v - else: - for i, vi in enumerate(v): - S.metadata[f"{k}_{i}"] = vi - - # Add provenance metadata. In managed formats this is done - # automatically, but because the Sacc library is external - # we do it manually here. provenance = self.gather_provenance() - provenance.update(SACCFile.generate_provenance()) - for key, value in provenance.items(): - if isinstance(value, str) and "\n" in value: - values = value.split("\n") - for i, v in enumerate(values): - S.metadata[f"provenance/{key}_{i}"] = v - else: - S.metadata[f"provenance/{key}"] = value + SACCFile.add_metadata(S, provenance, meta) def call_treecorr(self, i, j, k): """ From fa9757341c626d6d1fb6d261da2112e9b4e5e1c0 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Thu, 30 Apr 2026 16:04:16 +0100 Subject: [PATCH 08/25] Add dsigma to install --- bin/environment-local.yml | 1 + bin/environment-perlmutter.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/bin/environment-local.yml b/bin/environment-local.yml index a20c5cf98..46ddae269 100644 --- a/bin/environment-local.yml +++ b/bin/environment-local.yml @@ -51,3 +51,4 @@ dependencies: # Numpy has to be here to make sure that pypi doesn't # try to install a different version - numpy==2.3.* + - dsigma=1.1.* diff --git a/bin/environment-perlmutter.yml b/bin/environment-perlmutter.yml index f2100d08b..e77f395e5 100644 --- a/bin/environment-perlmutter.yml +++ b/bin/environment-perlmutter.yml @@ -62,3 +62,4 @@ dependencies: - numpy==2.3.* # NERSC-specific - git+https://github.com/LSSTDESC/dataregistry.git + - dsigma=1.1.* \ No newline at end of file From 8f5cb461b3ae960583f20e947b104ee1f3a6172d Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Thu, 30 Apr 2026 16:58:27 +0100 Subject: [PATCH 09/25] fix spec --- bin/environment-local.yml | 2 +- bin/environment-perlmutter.yml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/environment-local.yml b/bin/environment-local.yml index 46ddae269..533c8b208 100644 --- a/bin/environment-local.yml +++ b/bin/environment-local.yml @@ -51,4 +51,4 @@ dependencies: # Numpy has to be here to make sure that pypi doesn't # try to install a different version - numpy==2.3.* - - dsigma=1.1.* + - dsigma==1.1.* diff --git a/bin/environment-perlmutter.yml b/bin/environment-perlmutter.yml index e77f395e5..cdc1a857c 100644 --- a/bin/environment-perlmutter.yml +++ b/bin/environment-perlmutter.yml @@ -62,4 +62,5 @@ dependencies: - numpy==2.3.* # NERSC-specific - git+https://github.com/LSSTDESC/dataregistry.git - - dsigma=1.1.* \ No newline at end of file + - dsigma==1.1.* + \ No newline at end of file From 6baf2e67754d3957c56e335c7cd10468d484c022 Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Thu, 30 Apr 2026 15:40:59 -0400 Subject: [PATCH 10/25] mask working and add a new stage name to init --- examples/desi_mocks/config.yml | 3 ++- examples/desi_mocks/pipeline.yml | 2 +- txpipe/ingest/__init__.py | 1 + txpipe/lens_selector.py | 5 +++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/desi_mocks/config.yml b/examples/desi_mocks/config.yml index 88e272983..1fee469bf 100644 --- a/examples/desi_mocks/config.yml +++ b/examples/desi_mocks/config.yml @@ -42,7 +42,8 @@ TXDESISelector: dec_col: dec z_col: redshift w_col: weight - apply_mask: false + apply_mask: true + zmin: 0.5 PZPDFMLZ: diff --git a/examples/desi_mocks/pipeline.yml b/examples/desi_mocks/pipeline.yml index df3eb8367..f8b1a88df 100644 --- a/examples/desi_mocks/pipeline.yml +++ b/examples/desi_mocks/pipeline.yml @@ -99,7 +99,7 @@ inputs: # star_catalog: data/example/inputs/star_catalog.hdf5 desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/output_select_lsst_obs_cond_dp2_DESI_ELG_flagship.pq # mask should be already a combination of source and lens. - mask: None # /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_eg.fits + mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/dp2_23p5_combined_mask_nmin=500_nside=512.hdf5 # /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_eg.fits # This file comes with the code # fiducial_cosmology: data/fiducial_cosmology.yml # spectroscopic_catalog: data/example/inputs/mock_spectroscopic_catalog.hdf5 diff --git a/txpipe/ingest/__init__.py b/txpipe/ingest/__init__.py index 38573dda4..abdb2892f 100644 --- a/txpipe/ingest/__init__.py +++ b/txpipe/ingest/__init__.py @@ -2,6 +2,7 @@ from .mocks import TXCosmoDC2Mock, TXGaussianSimsMock from .gcr import TXMetacalGCRInput, TXIngestStars from .redmagic import TXIngestRedmagic +from .desi import TXIngestDESI from .ssi import ( TXIngestSSIGCR, TXMatchSSI, diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py index 36fcb8ee2..e996fe8bb 100755 --- a/txpipe/lens_selector.py +++ b/txpipe/lens_selector.py @@ -860,7 +860,8 @@ def run(self): if self.config["apply_mask"]: with self.open_input("mask", wrapper=True) as f: - self.mask, self.mask_nside = f.read_healsparse("mask", return_all=True) + self.mask = f.read_mask("mask") + self.mask_nside = f.read_map_info("mask")["nside"] else: self.mask = self.mask_nside = None @@ -939,7 +940,7 @@ def _apply_cuts(self, data): # Survey mask if self.mask is not None: import healpy as hp - pix = hp.ang2pix(self.mask_nside, data[ra_col], data[dec_col], lonlat=True) + pix = hp.ang2pix(self.mask_nside, data[ra_col], data[dec_col], lonlat=True, nest=True) sel &= self.mask[pix] != hp.UNSEEN return sel From 312cee8bc5960d45a60ba9606be586a3b93ad3ca Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Fri, 1 May 2026 12:22:42 -0400 Subject: [PATCH 11/25] made subclass for desi mocks due to file format difference. Tested on real DESI catalog as well --- examples/desi_dr1_stage3/config.yml | 252 ++++++++++++++++++++++++++ examples/desi_dr1_stage3/pipeline.yml | 113 ++++++++++++ examples/desi_mocks/config.yml | 4 +- examples/desi_mocks/pipeline.yml | 2 +- txpipe/__init__.py | 2 +- txpipe/ingest/desi.py | 2 +- txpipe/lens_selector.py | 112 +++++++----- 7 files changed, 437 insertions(+), 50 deletions(-) create mode 100644 examples/desi_dr1_stage3/config.yml create mode 100644 examples/desi_dr1_stage3/pipeline.yml diff --git a/examples/desi_dr1_stage3/config.yml b/examples/desi_dr1_stage3/config.yml new file mode 100644 index 000000000..2bc130acd --- /dev/null +++ b/examples/desi_dr1_stage3/config.yml @@ -0,0 +1,252 @@ +# Values in this section are accessible to all the different stages. +# They can be overridden by individual stages though. +global: + # This is read by many stages that read complete + # catalog data, and tells them how many rows to read + # at once + chunk_rows: 100000 + # These mapping options are also read by a range of stages + pixelization: healpix + nside: 512 + sparse: true # Generate sparse maps - faster if using small areas + +TXGCRTwoCatalogInput: + metacal_dir: /global/cscratch1/sd/desc/DC2/data/Run2.2i/dpdd/Run2.2i-t3828/metacal_table_summary + photo_dir: /global/cscratch1/sd/desc/DC2/data/Run2.2i/dpdd/Run2.2i-t3828/object_table_summary + +TXMetacalGCRInput: + cat_name: dc2_object_run2.1i_dr1b_with_metacal_griz + +TXExposureInfo: + dc2_name: 1.2p + +TXShearCalibration: {} + +TXCosmoDC2Mock: + cat_name: cosmoDC2_v1.1.4_image + visits_per_band: 16 + extra_cols: redshift_true size_true shear_1 shear_2 + flip_g2: true # to match metacal + +TXIngestRedmagic: + lens_zbin_edges: [0.1, 0.3, 0.5] + +TXIngestDESI: + mock: false + lens_zbin_edges: [0.0, 3.0] + dz: 0.001 + +TXDESISelector: + ra_col: RA + dec_col: DEC + z_col: Z + w_col: WEIGHT + apply_mask: false + zmin: 0.0 + + +PZPDFMLZ: + nz: 301 + zmax: 3.0 + + +PZRailTrainSource: + class_name: BPZ_lite + zmin: 0.0 + zmax: 3.0 + dz: 0.01 + columns_file: ./data/bpz_riz.columns + data_path: ./data/example/rail-bpz-inputs + spectra_file: CWWSB4.list + prior_band: i + # Not sure about this + prior_file: hdfn_gen + p_min: 0.005 + gauss_kernel: 0.0 + mag_err_min: 0.005 + inform_options: {save_train: false, load_model: false, modelfile: BPZpriormodel.out} + madau_reddening: no + bands: riz + zp_errors: [0.01, 0.01, 0.01] + +PZRailEstimateSource: + convert_unseen: true # needed for BPZ + + +# Mock version of stacking: +TXSourceTrueNumberDensity: + nz: 301 + zmax: 3.0 + +# Mock version of stacking: +TXTrueNumberDensity: + nz: 301 + zmax: 3.0 + +TXSourceSelectorMetacal: + input_pz: false + true_z: true + bands: riz #used for selection + T_cut: 0.5 + s2n_cut: 10.0 + max_rows: 1000 + delta_gamma: 0.02 + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + +TXTruthLensSelector: + # Mag cuts + input_pz: false + true_z: true + lens_zbin_edges: [0.1, 0.3, 0.5] + cperp_cut: 0.2 + r_cpar_cut: 13.5 + r_lo_cut: 16.0 + r_hi_cut: 21.6 + i_lo_cut: 17.5 + i_hi_cut: 21.9 + r_i_cut: 2.0 + # may also need one for r_cpar_cut + +TXMeanLensSelector: + # Mag cuts + lens_zbin_edges: [0.0, 0.2, 0.4] + cperp_cut: 0.2 + r_cpar_cut: 13.5 + r_lo_cut: 16.0 + r_hi_cut: 21.6 + i_lo_cut: 17.5 + i_hi_cut: 21.9 + r_i_cut: 2.0 + +TXModeLensSelector: + # Mag cuts + lens_zbin_edges: [0.0, 0.2, 0.4] + cperp_cut: 0.2 + r_cpar_cut: 13.5 + r_lo_cut: 16.0 + r_hi_cut: 21.6 + i_lo_cut: 17.5 + i_hi_cut: 21.9 + r_i_cut: 2.0 + +TXRandomCat: + density: 10 # gals per sq arcmin + +TXTwoPoint: + bin_slop: 0.1 + delta_gamma: 0.02 + do_pos_pos: true + do_shear_shear: true + do_shear_pos: true + flip_g2: true # use true when using metacal shears + min_sep: 2.5 + max_sep: 60.0 + nbins: 10 + verbose: 0 + subtract_mean_shear: true + +TXGammaTBrightStars: {} + +TXGammaTDimStars: {} + +TXGammaTRandoms: {} + +TXGammaTFieldCenters: {} + +TXBlinding: + seed: 1972 ## seed uniquely specifies the shift in parameters + Omega_b: [0.0485, 0.001] ## fiducial_model_value, shift_sigma + Omega_c: [0.2545, 0.01] + w0: [-1.0, 0.1] + h: [0.682, 0.02] + sigma8: [0.801, 0.01] + n_s: [0.971, 0.03] + b0: 0.95 ## we use bias of the form b0/g + delete_unblinded: true + +TXSourceDiagnosticPlots: + nbins: 20 + g_min: -0.01 + g_max: 0.01 + psfT_min: 0.2 + psfT_max: 0.28 + T_min: 0.01 + T_max: 2.1 + s2n_min: 1.25 + s2n_max: 300 + +TXDiagnosticMaps: + sparse: true # Generate sparse maps - faster if using small areas + snr_threshold: 10.0 + snr_delta: 1.0 + # pixelization: gnomonic + pixel_size: 0.2 + ra_cent: 62. + dec_cent: -35. + npix_x: 60 + npix_y: 60 + depth_cut: 23.0 + +TXSourceMaps: + sparse: true # Generate sparse maps - faster if using small areas + +TXLensMaps: + sparse: true # Generate sparse maps - faster if using small areas + +# For redmagic mapping +TXExternalLensMaps: + chunk_rows: 100000 + sparse: true + pixelization: healpix + nside: 512 + + + +TXAuxiliarySourceMaps: + flag_exponent_max: 8 + +TXAuxiliaryLensMaps: + flag_exponent_max: 8 + bright_obj_threshold: 22.0 # The magnitude threshold for a object to be counted as bright + depth_band: i + snr_threshold: 10.0 # The S/N value to generate maps for (e.g. 5 for 5-sigma depth) + snr_delta: 1.0 # The range threshold +/- delta is used for finding objects at the boundary + +TXRealGaussianCovariance: + min_sep: 2.5 + max_sep: 60. + nbins: 10 + pickled_wigner_transform: data/example/inputs/wigner.pkl + + +TXJackknifeCenters: + npatch: 5 + + +TXTwoPointFourier: + flip_g2: true + bandwidth: 100 + +TXSimpleMask: + depth_cut: 23.5 + bright_object_max: 10.0 + +TXMapCorrelations: + supreme_path_root: data/example/inputs/supreme + outlier_fraction: 0.05 + nbin: 20 + +TXPhotozPlotSource: + name: TXPhotozPlotSource +TXPhotozPlotLens: + name: TXPhotozPlotLens +TXTruePhotozStackSource: + name: TXTruePhotozStackSource + weight_col: metacal/weight + redshift_group: metacal + zmax: 2.0 + nz: 201 diff --git a/examples/desi_dr1_stage3/pipeline.yml b/examples/desi_dr1_stage3/pipeline.yml new file mode 100644 index 000000000..cd89d6ddd --- /dev/null +++ b/examples/desi_dr1_stage3/pipeline.yml @@ -0,0 +1,113 @@ +# Stages to run +stages: + # - name: TXSourceSelectorMetacal + # - name: TXSourceTomography + # - name: TXShearCalibration + # - name: TXExternalLensCatalogSplitter + # - name: TXStarCatalogSplitter + # - name: TXTruePhotozStackSource + # classname: TXTruePhotozStack + # aliases: + # tomography_catalog: shear_tomography_catalog + # catalog: shear_catalog + # weights_catalog: shear_catalog + # photoz_stack: shear_photoz_stack + # - name: TXIngestRedmagic + - name: TXDESISelector + - name: TXIngestDESI + # - name: TXTracerMetadata + # - name: TXSourceMaps + # - name: TXExternalLensMaps + # - name: TXAuxiliarySourceMaps + # - name: TXAuxiliaryLensMaps + # - name: TXSimpleMask + # - name: TXLSSWeightsUnit + # - name: TXDensityMaps + # - name: TXMapPlots + # - name: TXRandomCat + # - name: TXJackknifeCenters + # - name: TXTwoPoint + # threads_per_process: 2 + # - name: TXBlinding # Blind the data following Muir et al + # threads_per_process: 2 + # - name: TXTwoPointTheoryReal # compute theory using CCL to save in sacc file and plot later + # - name: TXPhotozPlotSource # Plot the bin n(z) + # classname: TXPhotozPlot + # aliases: + # photoz_stack: shear_photoz_stack + # nz_plot: nz_source + - name: TXPhotozPlotLens # Plot the bin n(z) + classname: TXPhotozPlot + aliases: + photoz_stack: lens_photoz_stack + nz_plot: nz_lens + # - name: TXDiagnosticQuantiles + # - name: TXSourceDiagnosticPlots # Make a suite of diagnostic plots + # - name: TXGammaTFieldCenters # Compute and plot gamma_t around center points + # threads_per_process: 2 + # - name: TXGammaTRandoms # Compute and plot gamma_t around randoms + # threads_per_process: 2 + # - name: TXGammaTStars # Compute and plot gamma_t around dim stars + # threads_per_process: 2 + # - name: TXRoweStatistics # Compute and plot Rowe statistics + # threads_per_process: 2 + # - name: TXPSFDiagnostics # Compute and plots other PSF diagnostics + # - name: TXBrighterFatterPlot # Make plots tracking the brighter-fatter effect + # - name: TXSourceNoiseMaps + # - name: TXExternalLensNoiseMaps + # - name: TXConvergenceMaps # Make convergence kappa maps from g1, g2 maps + # - name: TXConvergenceMapPlots # Plot the convergence map + # - name: TXTwoPointPlots + # - name: TXTwoPointFourier + # threads_per_process: 2 + # - name: TXRealGaussianCovariance # Compute covariance of real-space correlations + +# Where to put outputs +output_dir: data/example/outputs_desi_dr1_stage3 + +# How to run the pipeline: mini, parsl, or cwl +launcher: + name: mini + interval: 1.0 + +# Where to run the pipeline: cori-interactive, cori-batch, or local +site: + name: local + max_threads: 2 + +# modules and packages to import that have pipeline +# stages defined in them +modules: txpipe + +# where to find any modules that are not in this repo, +# and any other code we need. +python_paths: + - submodules/WLMassMap/python/desc/ + +# configuration settings +config: examples/desi_dr1_stage3/config.yml + +# On NERSC, set this before running: +# export DATA=${LSST}/groups/WL/users/zuntz/data/metacal-testbed + +inputs: + # See README for paths to download these files + # shear_catalog: data/example/inputs/shear_catalog.hdf5 + # photometry_catalog: data/example/inputs/photometry_catalog.hdf5 + # photoz_trained_model: data/example/inputs/cosmoDC2_trees_i25.3.npy + # exposures: data/example/inputs/exposures.hdf5 + # star_catalog: data/example/inputs/star_catalog.hdf5 + desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/LRG_SGC_clustering.dat.fits + # mask should be already a combination of source and lens. + mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/dp2_23p5_combined_mask_nmin=500_nside=512.hdf5 # /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_eg.fits + # This file comes with the code + # fiducial_cosmology: data/fiducial_cosmology.yml + # spectroscopic_catalog: data/example/inputs/mock_spectroscopic_catalog.hdf5 + +# if supported by the launcher, restart the pipeline where it left off +# if interrupted +resume: true +# where to put output logs for individual stages +log_dir: data/example/logs_desi_dr1_stage3 +# where to put an overall parsl pipeline log +pipeline_log: data/example/log_desi_dr1_stage3.txt \ No newline at end of file diff --git a/examples/desi_mocks/config.yml b/examples/desi_mocks/config.yml index 1fee469bf..8cb142a0b 100644 --- a/examples/desi_mocks/config.yml +++ b/examples/desi_mocks/config.yml @@ -36,8 +36,8 @@ TXIngestDESI: lens_zbin_edges: [0.0, 3.0] dz: 0.001 -TXDESISelector: - mock: true +TXDESIMockSelector: + bands: ugrizy ra_col: ra dec_col: dec z_col: redshift diff --git a/examples/desi_mocks/pipeline.yml b/examples/desi_mocks/pipeline.yml index f8b1a88df..6e4422361 100644 --- a/examples/desi_mocks/pipeline.yml +++ b/examples/desi_mocks/pipeline.yml @@ -13,7 +13,7 @@ stages: # weights_catalog: shear_catalog # photoz_stack: shear_photoz_stack # - name: TXIngestRedmagic - - name: TXDESISelector + - name: TXDESIMockSelector - name: TXIngestDESI # - name: TXTracerMetadata # - name: TXSourceMaps diff --git a/txpipe/__init__.py b/txpipe/__init__.py index 404b9434f..91c090c4e 100644 --- a/txpipe/__init__.py +++ b/txpipe/__init__.py @@ -11,7 +11,7 @@ TXSourceSelectorLensfit, TXSourceSelectorMetadetect, ) -from .lens_selector import TXMeanLensSelector +from .lens_selector import TXMeanLensSelector, TXDESISelector, TXDESIMockSelector from .photoz_stack import TXPhotozStack, TXPhotozPlot, TXTruePhotozStack from .random_cats import TXRandomCat from .twopoint_fourier import TXTwoPointFourier diff --git a/txpipe/ingest/desi.py b/txpipe/ingest/desi.py index 54ff5590c..89ee9187c 100644 --- a/txpipe/ingest/desi.py +++ b/txpipe/ingest/desi.py @@ -85,7 +85,7 @@ def run(self): counts_2d = 0 # all cols that might be useful - cols = ["ra", "dec", "redshift"] + cols = ["ra", "dec", "redshift", "weight"] for s, e, data in self.iterate_fits("desi_catalog_selected", 1, cols, chunk_rows): n = data["ra"].size diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py index c83578ca7..32c242a2b 100755 --- a/txpipe/lens_selector.py +++ b/txpipe/lens_selector.py @@ -8,7 +8,8 @@ PhotometryCatalog, FitsFile, MapsFile, - ParquetFile + ParquetFile, + DataFile ) from .utils import LensNumberDensityStats, Splitter, rename_iterated from .binning import build_tomographic_classifier, apply_classifier, read_training_data @@ -809,29 +810,23 @@ def data_iterator(self): class TXDESISelector(PipelineStage): """ - Select lens galaxies from a DESI spectroscopic catalog. + Select lens galaxies from a DESI spectroscopic FITS catalog. Reads a raw DESI FITS catalog and applies: - TODO: ADJUST NECESSARY CUTS FOR MOCK AND REAL DATA - - ZWARN quality cut (good redshift fits only) + TODO: ADJUST NECESSARY CUTS FOR REAL DATA - Redshift range cut - - SPECTYPE filter (e.g. GALAXY only) - - Magnitude cuts (bright/faint limits in a chosen band) - Optional survey mask - Outputs a standardized HDF5 file with a photometry/ group containing - ra, dec, z. This file is consumed by TXIngestDESI. + Outputs a standardized FITS file consumed by TXIngestDESI. """ name = "TXDESISelector" parallel = False - outputs = [ ("desi_catalog_selected", FitsFile), ] - # PLACEHOLDERS: WILL UPDATE. config_options = { "chunk_rows": StageParameter(int, 100_000, msg="Rows per chunk."), "zmin": StageParameter(float, 0.0, msg="Minimum spectroscopic redshift."), @@ -842,18 +837,16 @@ class TXDESISelector(PipelineStage): "dec_col": StageParameter(str, "dec", msg="Dec column name in the DESI catalog."), "z_col": StageParameter(str, "redshift", msg="Spectroscopic redshift column name."), "w_col": StageParameter(str, "weight", msg="Weight column name."), - "bands": StageParameter(str, "ugrizy", msg="Band labels for output magnitudes (one char each)."), - "mock": StageParameter(bool, False, msg="Whether to use a mock catalog."), + "bands": StageParameter(str, "", msg="Band labels for output magnitudes (one char each)."), } - # ParquetFile for mock catalogs; swap to FitsFile here if working with real DESI FITS inputs = [ - ("desi_catalog", ParquetFile), + ("desi_catalog", FitsFile), ("mask", MapsFile), ] def run(self): - bands = self.config["bands"] + self.bands = self.config["bands"] ra_col = self.config["ra_col"] dec_col = self.config["dec_col"] z_col = self.config["z_col"] @@ -863,11 +856,15 @@ def run(self): with self.open_input("mask", wrapper=True) as f: self.mask = f.read_mask("mask") self.mask_nside = f.read_map_info("mask")["nside"] + # self.mask_fmt = f.read_map_info("mask")["nest"] + # print(self.mask_fmt) else: self.mask = self.mask_nside = None - # Accumulate selected rows across chunks; write once at the end - out_cols = {name: [] for name in ["ra", "dec", "redshift", "weight"] + [f"mag_{b}" for b in bands]} + if len(self.bands) == 0: + out_cols = {name: [] for name in ["ra", "dec", "redshift", "weight"]} + else: + out_cols = {name: [] for name in ["ra", "dec", "redshift", "weight"] + [f"mag_{b}" for b in self.bands]} n_total = 0 n_selected = 0 @@ -886,12 +883,11 @@ def run(self): out_cols["ra"].append(data[ra_col][sel]) out_cols["dec"].append(data[dec_col][sel]) out_cols["redshift"].append(data[z_col][sel]) - if self.config["mock"]: - out_cols["weight"].append(np.ones(n_chunk)) - else: - out_cols["weight"].append(data[w_col][sel]) - for b in bands: - out_cols[f"mag_{b}"].append(data[f"mag_{b}_lsst"][sel]) + out_cols["weight"].append(self._get_weights(data, sel, n_chunk, w_col)) + + if len(self.bands) > 0: + for b in self.bands: + out_cols[f"mag_{b}"].append(data[f"mag_{b}_lsst"][sel]) print(f"Total selected: {n_selected:,} / {n_total:,}") @@ -899,32 +895,17 @@ def run(self): table = Table({name: np.concatenate(chunks) for name, chunks in out_cols.items()}) table.write(self.get_output("desi_catalog_selected"), overwrite=True) + def _get_weights(self, data, sel, n_selected, w_col): + return data[w_col][sel] + def data_iterator(self): chunk_rows = self.config["chunk_rows"] ra_col = self.config["ra_col"] dec_col = self.config["dec_col"] z_col = self.config["z_col"] w_col = self.config["w_col"] - bands = self.config["bands"] - - if self.config["mock"]: - cols = [ra_col, dec_col, z_col] + [f"mag_{b}_lsst" for b in bands] - yield from self._iterate_parquet(cols, chunk_rows) - else: - cols = [ra_col, dec_col, z_col, w_col] + [f"mag_{b}_lsst" for b in bands] - yield from self.iterate_fits("desi_catalog", 1, cols, chunk_rows) - - def _iterate_parquet(self, cols, chunk_rows): - from pyarrow.parquet import ParquetFile as PQFile - fn = self.get_input("desi_catalog") - start = 0 - with PQFile(fn) as f: - for batch in f.iter_batches(columns=cols, batch_size=chunk_rows): - data = {name: batch[name].to_numpy(zero_copy_only=False) for name in cols} - n = len(data[cols[0]]) - end = start + n - yield start, end, data - start = end + cols = [ra_col, dec_col, z_col, w_col] + yield from self.iterate_fits("desi_catalog", 1, cols, chunk_rows) def _apply_cuts(self, data): ra_col = self.config["ra_col"] @@ -934,11 +915,9 @@ def _apply_cuts(self, data): n = data[ra_col].size sel = np.ones(n, dtype=bool) - # Redshift range z = data[z_col] sel &= (z >= self.config["zmin"]) & (z < self.config["zmax"]) - # Survey mask if self.mask is not None: import healpy as hp pix = hp.ang2pix(self.mask_nside, data[ra_col], data[dec_col], lonlat=True, nest=True) @@ -947,5 +926,48 @@ def _apply_cuts(self, data): return sel +class TXDESIMockSelector(TXDESISelector): + """ + Select lens galaxies from a DESI mock Parquet catalog. + + Identical cuts to TXDESISelector but reads a Parquet file and assigns + unit weights (mocks carry no per-object weight column). + """ + + name = "TXDESIMockSelector" + + inputs = [ + ("desi_catalog", ParquetFile), + ("mask", MapsFile), + ] + + config_options = { + **TXDESISelector.config_options, + } + # w_col is unused for mocks; keep it in config_options for compatibility + # but override _get_weights to return ones instead. + + def _get_weights(self, data, sel, n_selected, w_col): + return np.ones(n_selected) + + def data_iterator(self): + chunk_rows = self.config["chunk_rows"] + ra_col = self.config["ra_col"] + dec_col = self.config["dec_col"] + z_col = self.config["z_col"] + cols = [ra_col, dec_col, z_col] + [f"mag_{b}_lsst" for b in self.bands] + + from pyarrow.parquet import ParquetFile as PQFile + fn = self.get_input("desi_catalog") + start = 0 + with PQFile(fn) as f: + for batch in f.iter_batches(columns=cols, batch_size=chunk_rows): + data = {name: batch[name].to_numpy(zero_copy_only=False) for name in cols} + n = len(data[cols[0]]) + end = start + n + yield start, end, data + start = end + + if __name__ == "__main__": PipelineStage.main() From 818a68505d680bee15a74e3cf05e7f95b4e54e4b Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Mon, 4 May 2026 17:42:15 -0400 Subject: [PATCH 12/25] make outputs compatible with dsigma step --- examples/desi_dr1_stage3/config.yml | 17 ++++++- examples/desi_dr1_stage3/pipeline.yml | 38 ++++++++++----- txpipe/ingest/desi.py | 70 ++++++++++++++++++++------- 3 files changed, 95 insertions(+), 30 deletions(-) diff --git a/examples/desi_dr1_stage3/config.yml b/examples/desi_dr1_stage3/config.yml index 2bc130acd..3d9324768 100644 --- a/examples/desi_dr1_stage3/config.yml +++ b/examples/desi_dr1_stage3/config.yml @@ -20,7 +20,12 @@ TXMetacalGCRInput: TXExposureInfo: dc2_name: 1.2p -TXShearCalibration: {} +TXShearCalibration: + use_true_shear: false + chunk_rows: 100000 + subtract_mean_shear: true + extra_cols: [''] + shear_catalog_type: hsc TXCosmoDC2Mock: cat_name: cosmoDC2_v1.1.4_image @@ -44,6 +49,16 @@ TXDESISelector: apply_mask: false zmin: 0.0 +TXSourceSelectorHSC: + input_pz: true + true_z: false + bands: i #used for selection + T_cut: 0.0 + s2n_cut: 10.0 + max_rows: 10000000000 + source_zbin_edges: [0.5, 1.5, 2.5, 3.5, 4.5] + shear_catalog_type: hsc + PZPDFMLZ: nz: 301 diff --git a/examples/desi_dr1_stage3/pipeline.yml b/examples/desi_dr1_stage3/pipeline.yml index cd89d6ddd..33de9f82b 100644 --- a/examples/desi_dr1_stage3/pipeline.yml +++ b/examples/desi_dr1_stage3/pipeline.yml @@ -1,6 +1,7 @@ # Stages to run stages: # - name: TXSourceSelectorMetacal + # - name: TXSourceSelectorMetadetect # - name: TXSourceTomography # - name: TXShearCalibration # - name: TXExternalLensCatalogSplitter @@ -15,7 +16,7 @@ stages: # - name: TXIngestRedmagic - name: TXDESISelector - name: TXIngestDESI - # - name: TXTracerMetadata + - name: TXTracerMetadata # - name: TXSourceMaps # - name: TXExternalLensMaps # - name: TXAuxiliarySourceMaps @@ -25,17 +26,19 @@ stages: # - name: TXDensityMaps # - name: TXMapPlots # - name: TXRandomCat - # - name: TXJackknifeCenters - # - name: TXTwoPoint - # threads_per_process: 2 + - name: TXJackknifeCenters + - name: TXTwoPoint + threads_per_process: 2 + - name: TXDeltaSigma + - name: TXDeltaSigmaPlots # - name: TXBlinding # Blind the data following Muir et al # threads_per_process: 2 # - name: TXTwoPointTheoryReal # compute theory using CCL to save in sacc file and plot later - # - name: TXPhotozPlotSource # Plot the bin n(z) - # classname: TXPhotozPlot - # aliases: - # photoz_stack: shear_photoz_stack - # nz_plot: nz_source + - name: TXPhotozPlotSource # Plot the bin n(z) + classname: TXPhotozPlot + aliases: + photoz_stack: shear_photoz_stack + nz_plot: nz_source - name: TXPhotozPlotLens # Plot the bin n(z) classname: TXPhotozPlot aliases: @@ -92,16 +95,27 @@ config: examples/desi_dr1_stage3/config.yml inputs: # See README for paths to download these files - # shear_catalog: data/example/inputs/shear_catalog.hdf5 + # shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/txpipe_allfield_shear.h5 + shear_tomography_classifier: none + shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/metadetect_desy6.hdf5 + shear_tomography_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/tomography_metadetect_desy6.hdf5 + binned_shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_metadetect_desy6.hdf5 + shear_photoz_stack: /scratch/gpfs/AAMON/desc_analysis/catalogs/shear_photoz_stack_metadetect_desy6.hdf5 + # photometry_catalog: data/example/inputs/photometry_catalog.hdf5 # photoz_trained_model: data/example/inputs/cosmoDC2_trees_i25.3.npy # exposures: data/example/inputs/exposures.hdf5 # star_catalog: data/example/inputs/star_catalog.hdf5 + random_cats: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 + binned_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 + binned_random_catalog_sub: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 + + spectroscopic_catalog: None desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/LRG_SGC_clustering.dat.fits # mask should be already a combination of source and lens. - mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/dp2_23p5_combined_mask_nmin=500_nside=512.hdf5 # /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_eg.fits + mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/dp2_23p5_combined_mask_nmin=500_nside=512.hdf5 # This file comes with the code - # fiducial_cosmology: data/fiducial_cosmology.yml + fiducial_cosmology: data/fiducial_cosmology.yml # spectroscopic_catalog: data/example/inputs/mock_spectroscopic_catalog.hdf5 # if supported by the launcher, restart the pipeline where it left off diff --git a/txpipe/ingest/desi.py b/txpipe/ingest/desi.py index 89ee9187c..3a1682f27 100644 --- a/txpipe/ingest/desi.py +++ b/txpipe/ingest/desi.py @@ -20,7 +20,9 @@ class TXIngestDESI(PipelineStage): outputs = [ ("lens_catalog", HDFFile), + ("binned_lens_catalog", HDFFile), ("lens_tomography_catalog_unweighted", HDFFile), + ("lens_tomography_catalog", HDFFile), ("lens_photoz_stack", QPNOfZFile), ] @@ -50,7 +52,9 @@ def run(self): nbin_lens = len(zbin_edges) - 1 cat = self.open_output("lens_catalog") - tomo = self.open_output("lens_tomography_catalog_unweighted") + cat_binned = self.open_output("binned_lens_catalog") + tomo_uw = self.open_output("lens_tomography_catalog_unweighted") + tomo_w = self.open_output("lens_tomography_catalog") # redshift grid zmin = self.config["zmin"] @@ -65,20 +69,38 @@ def run(self): # g.attrs["bands"] = bands g.create_dataset("ra", (n,), dtype=np.float64) g.create_dataset("dec", (n,), dtype=np.float64) - g.create_dataset("redshift", (n,), dtype=np.float64) + g.create_dataset("z", (n,), dtype=np.float64) # for b in bands: # g.create_dataset(f"mag_{b}", (n,), dtype=np.float64) # g.create_dataset(f"mag_err_{b}", (n,), dtype=np.float64) # g.attrs["bands"] = bands - h = tomo.create_group("tomography") - h.create_dataset("bin", (n,), dtype=np.int32) - h.create_dataset("lens_weight", (n,), dtype=np.float64) - h.attrs["nbin"] = nbin_lens - h.attrs[f"zbin_edges"] = zbin_edges - h_counts = tomo.create_group("counts") - h_counts.create_dataset("counts", (nbin_lens,), dtype="i") - h_counts.create_dataset("counts_2d", (1,), dtype="i") + gb = cat_binned.create_group("lens") + gb.attrs["nbin_lens"] = nbin_lens + for bn in range(nbin_lens): + gb_ = gb.create_group(f"bin_{bn}") + gb_.create_dataset("ra", (n,), dtype=np.float64) + gb_.create_dataset("dec", (n,), dtype=np.float64) + gb_.create_dataset("z", (n,), dtype=np.float64) + gb_.create_dataset("w_sys", (n,), dtype=np.float64) + + h_uw = tomo_uw.create_group("tomography") + h_uw.create_dataset("bin", (n,), dtype=np.int32) + h_uw.create_dataset("lens_weight", (n,), dtype=np.float64) + h_uw.attrs["nbin"] = nbin_lens + h_uw.attrs["zbin_edges"] = zbin_edges + h_counts_uw = tomo_uw.create_group("counts") + h_counts_uw.create_dataset("counts", (nbin_lens,), dtype="i") + h_counts_uw.create_dataset("counts_2d", (1,), dtype="i") + + h_w = tomo_w.create_group("tomography") + h_w.create_dataset("bin", (n,), dtype=np.int32) + h_w.create_dataset("lens_weight", (n,), dtype=np.float64) + h_w.attrs["nbin"] = nbin_lens + h_w.attrs["zbin_edges"] = zbin_edges + h_counts_w = tomo_w.create_group("counts") + h_counts_w.create_dataset("counts", (nbin_lens,), dtype="i") + h_counts_w.create_dataset("counts_2d", (1,), dtype="i") # we keep track of the counts per-bin also counts = np.zeros(nbin_lens, dtype=np.int64) @@ -115,25 +137,39 @@ def run(self): counts += np.bincount(zbin[any_bin], minlength=nbin_lens) counts_2d += any_bin.sum() - # save data + # save data to tomography catalog g["ra"][s:e] = data["ra"] g["dec"][s:e] = data["dec"] - g["redshift"][s:e] = data["redshift"] + g["z"][s:e] = data["redshift"] # # including mags # for i, b in enumerate(bands): # g[f"mag_{b}"][s:e] = data["mag"][:, i] # g[f"mag_err_{b}"][s:e] = data["mag_err"][:, i] - h["bin"][s:e] = zbin + h_uw["bin"][s:e] = zbin + h_uw["lens_weight"][s:e] = 1.0 + + h_w["bin"][s:e] = zbin if self.config["mock"]: - h["lens_weight"][s:e] = 1.0 + h_w["lens_weight"][s:e] = 1.0 else: - h["lens_weight"][s:e] = data["weight"] + h_w["lens_weight"][s:e] = data["weight"] + + # save data to binned lens catalog + for bn in range(nbin_lens): + sel = h_w["bin"][:] == bn + gb_ = gb[f"bin_{bn}"] + gb_["ra"][:] = g["ra"][:][sel] + gb_["dec"][:] = g["dec"][:][sel] + gb_["z"][:] = g["z"][:][sel] + gb_["w_sys"][:] = h_w["lens_weight"][:][sel] # this is an overall count - h_counts["counts"][:] = counts - h_counts["counts_2d"][:] = counts_2d + h_counts_uw["counts"][:] = counts + h_counts_uw["counts_2d"][:] = counts_2d + h_counts_w["counts"][:] = counts + h_counts_w["counts_2d"][:] = counts_2d # Generate and save the 2D n(z) histogram also, just # by summing up all the individual values. From fadac551a0387610d42842ef429e1b33c6fe57e7 Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Tue, 5 May 2026 19:03:20 -0400 Subject: [PATCH 13/25] updates to config options and select sources behind lens --- examples/desi_dr1_stage3/config.yml | 8 ++++++++ examples/desi_dr1_stage3/pipeline.yml | 14 +++++++------- txpipe/delta_sigma.py | 22 ++++++++++++++++++---- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/examples/desi_dr1_stage3/config.yml b/examples/desi_dr1_stage3/config.yml index 3d9324768..c40d2d65a 100644 --- a/examples/desi_dr1_stage3/config.yml +++ b/examples/desi_dr1_stage3/config.yml @@ -49,6 +49,14 @@ TXDESISelector: apply_mask: false zmin: 0.0 +TXDeltaSigma: + lens_bins: [0] + lens_source_sep: 0.1 + photoz: True + lower_bin_edge: [0.0, 0.358, 0.631, 0.872] + source_cat_w_col: w + lens_cat_w_col: w_sys + TXSourceSelectorHSC: input_pz: true true_z: false diff --git a/examples/desi_dr1_stage3/pipeline.yml b/examples/desi_dr1_stage3/pipeline.yml index 33de9f82b..7f39d5e58 100644 --- a/examples/desi_dr1_stage3/pipeline.yml +++ b/examples/desi_dr1_stage3/pipeline.yml @@ -95,19 +95,19 @@ config: examples/desi_dr1_stage3/config.yml inputs: # See README for paths to download these files - # shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/txpipe_allfield_shear.h5 - shear_tomography_classifier: none - shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/metadetect_desy6.hdf5 - shear_tomography_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/tomography_metadetect_desy6.hdf5 - binned_shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_metadetect_desy6.hdf5 - shear_photoz_stack: /scratch/gpfs/AAMON/desc_analysis/catalogs/shear_photoz_stack_metadetect_desy6.hdf5 + # shear_catalog: /scratch/gpfs/AAMON/desc_analysis/TXPipe/data/shear_catalog_desy3_unmasked_withfakez_v2.h5 + # shear_tomography_classifier: none + shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/metacal_desy3.hdf5 + shear_tomography_catalog: none + binned_shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_metacal_desy3.hdf5 + shear_photoz_stack: /scratch/gpfs/AAMON/desc_analysis/catalogs/shear_photoz_stack_metacal_desy3.hdf5 # photometry_catalog: data/example/inputs/photometry_catalog.hdf5 # photoz_trained_model: data/example/inputs/cosmoDC2_trees_i25.3.npy # exposures: data/example/inputs/exposures.hdf5 # star_catalog: data/example/inputs/star_catalog.hdf5 random_cats: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 - binned_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 + binned_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_randoms_bin0.hdf5 binned_random_catalog_sub: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 spectroscopic_catalog: None diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 50d83d96d..bf1df1d95 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -34,6 +34,11 @@ class TXDeltaSigma(TXTwoPoint): "r_min": StageParameter(float, 0.1, msg="Minimum radius to use in Mpc"), "r_max": StageParameter(float, 10.0, msg="Maximum radius to use in Mpc"), "nbins": StageParameter(int, 10, msg="Number of radial bins"), + "photoz": StageParameter(bool, False, msg="Whether or not objects have point photo-z estimate"), + "lens_source_sep": StageParameter(float, 0.1, msg="Minimum redshift separation between lens and source bins to use for dSigma measurement"), + "lower_bin_edge": StageParameter(list, [0.0, 0.358, 0.631, 0.872], msg="Lower edge of the source redshift bins to use for dSigma measurement, only used if photoz=True"), + "source_cat_w_col": StageParameter(str, "weight", msg="Source catalog weight column name."), + "lens_cat_w_col": StageParameter(str, "weight", msg="Lens catalog weight column name."), } def run(self): @@ -47,6 +52,7 @@ def run(self): with self.open_input("fiducial_cosmology", wrapper=True) as cosmo_file: cosmo = cosmo_file.to_astropy() source_n_of_z = self.load_redshift_distribution("shear_photoz_stack") + print(source_n_of_z['z']) # The lens n_of_z is only used when saving at the end lens_n_of_z = self.load_redshift_distribution("lens_photoz_stack") @@ -71,6 +77,11 @@ def run(self): lens_table = self.load_lens_table(lens_bin) randoms_table = self.load_random_table(lens_bin) + if self.config["photoz"]: + # if we do not have point photo-z estimates for the sources then we use the lower edge of the tomographic bins to make z column + source_table['z'] = np.array(self.config["lower_bin_edge"])[source_table['z_bin']] + + # Add columns to two tables in-place to do most of the pre-computation work. # We should look at using the n_jobs multiprocessing option here # but I don't know if it will play well with MPI on NERSC that we are @@ -79,8 +90,9 @@ def run(self): f"Computing excess surface density for source = {source_bin}, lens = {lens_bin}, " f"with {len(source_table)} sources, {len(lens_table)} lenses, {len(randoms_table)} randoms" ) - dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo) - dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo) + print(np.amax(source_n_of_z['z'][source_n_of_z['n'][:, 0] > 0])) + dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, lens_source_cut=self.config["lens_source_sep"]) + dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, lens_source_cut=self.config["lens_source_sep"]) # stack to get the excess surface density Delta(Sigma) result = dsigma.stacking.excess_surface_density( @@ -164,12 +176,14 @@ def load_source_table(self, bin_index): "ra": "ra", "dec": "dec", "z": "z", - "w": "weight", + "w": self.config["source_cat_w_col"], "e_1": "g1", "e_2": "g2", } with self.open_input("binned_shear_catalog") as shear_file: group = shear_file[f"shear/bin_{bin_index}"] + if self.config["photoz"]: + del names["z"] table = self.load_table(group, names) nbin = shear_file["shear"].attrs["nbin_source"] @@ -240,7 +254,7 @@ def load_lens_table(self, bin_index): "ra": "ra", "dec": "dec", "z": "z", - "w_sys": "weight", + "w_sys": self.config["lens_cat_w_col"], } with self.open_input("binned_lens_catalog") as lens_file: group = lens_file[f"lens/bin_{bin_index}"] From b84ed413eec1ab00a40f2796621209ee7c686097 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 6 May 2026 16:36:29 +0100 Subject: [PATCH 14/25] Add placeholder stage classes --- examples/desi_dr1_stage3/pipeline.yml | 23 +++++++++++++++++++++++ txpipe/ingest/desi.py | 14 +++++++++++++- txpipe/ingest/dp1.py | 4 ++++ txpipe/masks.py | 24 +++++++++++++++++++++++- 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/examples/desi_dr1_stage3/pipeline.yml b/examples/desi_dr1_stage3/pipeline.yml index 7f39d5e58..bc311bc44 100644 --- a/examples/desi_dr1_stage3/pipeline.yml +++ b/examples/desi_dr1_stage3/pipeline.yml @@ -1,9 +1,25 @@ # Stages to run stages: + - name: TXIngestDataPreview2 # - name: TXSourceSelectorMetacal # - name: TXSourceSelectorMetadetect + # aliases: + # shear_catalog: cut_shear_catalog + # - name: TXSourceTomography # - name: TXSourceTomography # - name: TXShearCalibration + # aliases: + # shear_catalog: cut_shear_catalog + # - name: TXCustomMask + # - name: TXIngestDESIRandoms + # - name: PZRailSummarizeSource # Run the DIR method on the lens sample to find n(z) + # classname: PZRailSummarize + # aliases: + # tomography_catalog: shear_tomography_catalog + # binned_catalog: binned_shear_catalog + # model: source_photoz_model + # photoz_stack: shear_photoz_stack + # photoz_realizations: source_photoz_realizations # - name: TXExternalLensCatalogSplitter # - name: TXStarCatalogSplitter # - name: TXTruePhotozStackSource @@ -14,9 +30,16 @@ stages: # weights_catalog: shear_catalog # photoz_stack: shear_photoz_stack # - name: TXIngestRedmagic + # - name: TXDESIJointMask - name: TXDESISelector - name: TXIngestDESI - name: TXTracerMetadata + # aliases: + # shear_catalog: cut_shear_catalog + # - name: TXCutCatalog + # aliases: + # catalog: shear_catalog + # cut_catalog: cut_shear_catalog # - name: TXSourceMaps # - name: TXExternalLensMaps # - name: TXAuxiliarySourceMaps diff --git a/txpipe/ingest/desi.py b/txpipe/ingest/desi.py index 3a1682f27..b3783c398 100644 --- a/txpipe/ingest/desi.py +++ b/txpipe/ingest/desi.py @@ -1,8 +1,20 @@ from ..base_stage import PipelineStage -from ..data_types import HDFFile, FitsFile, QPNOfZFile +from ..data_types import HDFFile, FitsFile, QPNOfZFile, RandomsCatalog import numpy as np from ceci.config import StageParameter +class TXIngestDESIRandoms(PipelineStage): + name = "TXIngestDESIRandoms" + inputs = [ + ("desi_random_catalog", FitsFile,) + ] + outputs = [ + ("random_cats", RandomsCatalog), + ("binned_random_catalog", RandomsCatalog), + ("binned_random_catalog_sub", RandomsCatalog), + + ] + class TXIngestDESI(PipelineStage): """ diff --git a/txpipe/ingest/dp1.py b/txpipe/ingest/dp1.py index d0c11abfb..f929f5dde 100644 --- a/txpipe/ingest/dp1.py +++ b/txpipe/ingest/dp1.py @@ -334,6 +334,10 @@ def get_catalog_size(self, butler, dataset_type): return n +class TXIngestDataPreview2(TXIngestDataPreview1): + name = "TXIngestDataPreview2" + + def sanitize(data): """ Convert unicode arrays into types that h5py can save diff --git a/txpipe/masks.py b/txpipe/masks.py index 10e53d72e..d0ebfff78 100644 --- a/txpipe/masks.py +++ b/txpipe/masks.py @@ -1,7 +1,7 @@ import numpy as np from .utils import choose_pixelization from .base_stage import PipelineStage -from .data_types import MapsFile +from .data_types import MapsFile, HDFFile from ceci.config import StageParameter from .mapping import degrade_healsparse @@ -482,3 +482,25 @@ def degrade(self, mask, metadata_in, nside_out): assert np.round(area_in, 3) == np.round(area_out, 3) return mask_out, metadata_out + + +class TXDESIJointMask(PipelineStage): + name = "TXDESIJointMask" + inputs = [ + ("desi_mask", MapsFile), + ("shear_catalog", HDFFile), + ] + outputs = [ + ("mask", MapsFile), + ] + + +class TXCutCatalog(PipelineStage): + name = "TXCutCatalog" + inputs = [ + ("catalog", HDFFile), + ("mask", MapsFile), + ] + outputs = [ + ("cut_catalog", HDFFile), + ] \ No newline at end of file From c20dead6689027bc9521ceba42e55f2d8c2671c0 Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Wed, 6 May 2026 21:28:57 -0400 Subject: [PATCH 15/25] joint mask working --- txpipe/masks.py | 89 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 6 deletions(-) diff --git a/txpipe/masks.py b/txpipe/masks.py index d0ebfff78..11814e2f5 100644 --- a/txpipe/masks.py +++ b/txpipe/masks.py @@ -483,16 +483,87 @@ def degrade(self, mask, metadata_in, nside_out): return mask_out, metadata_out +class TXJointMask(TXBaseMask): + """ + Combine two binary masks into one using AND intersection. + + Currently only supports binary masks. + + Subclasses can override `mask_input_names` to rename the two input tags + without duplicating any logic. + """ + + name = "TXJointMask" + inputs = [ + ("mask1", MapsFile), + ("mask2", MapsFile), + ] + outputs = [("mask", MapsFile)] + + # Subclasses override this tuple to change which input tags are opened. + mask_input_names = ("mask1", "mask2") + + def make_binary_mask(self): + """ + Load the two input masks and return their AND intersection. + + TODO: make this more flexible to allow for different types of masks (e.g. fractional coverage maps) and different combination logic (e.g. OR, product, etc.). Currently, we assume shear mask is in healsparse format and lens mask is in HDF5. + + Returns + ------- + mask : hsp.HealSparseMap + Combined boolean mask. + pixel_scheme : PixelScheme + Pixelization object taken from the first mask's metadata. + metadata : dict + Metadata from the first input mask. + """ + import healsparse as hsp + + name1, name2 = self.mask_input_names + + with self.open_input(name1, wrapper=True) as f: + metadata = dict(f.file["maps"].attrs) + nside1 = metadata["nside"] + pixel_scheme1 = choose_pixelization(**metadata) + mask1 = f.read_mask("mask", returnbool=True) + + with self.open_input(name2, wrapper=True) as f: + metadata = dict(f.file["maps"].attrs) + nside2 = metadata["nside"] + pixel_scheme2 = choose_pixelization(**metadata) + mask2 = f.read_mask("mask", returnbool=True) + + if nside1 != nside2: + if nside1 < nside2: + print(f"Degrading {name2} from Nside {nside2} to {nside1}") + mask2 = mask2.degrade(nside1) + elif nside2 < nside1: + print(f"Degrading {name1} from Nside {nside1} to {nside2}") + mask1 = mask1.degrade(nside2) + mask = hsp.operations.product_intersection([mask1, mask2]) + else: + mask = hsp.operations.product_intersection([mask1, mask2]) + nside_out = mask.nside_sparse + metadata["nside"] = nside_out + pixel_scheme = choose_pixelization(pixelization="healpix", nside=nside_out, nest=True) + + return mask, pixel_scheme, metadata + + +class TXDESIJointMask(TXJointMask): + """ + Combine the DESI lens mask and the shear catalog mask using AND intersection. + """ -class TXDESIJointMask(PipelineStage): name = "TXDESIJointMask" inputs = [ ("desi_mask", MapsFile), - ("shear_catalog", HDFFile), - ] - outputs = [ - ("mask", MapsFile), + ("shear_mask", MapsFile), ] + outputs = [("mask", MapsFile)] + + mask_input_names = ("desi_mask", "shear_mask") class TXCutCatalog(PipelineStage): @@ -503,4 +574,10 @@ class TXCutCatalog(PipelineStage): ] outputs = [ ("cut_catalog", HDFFile), - ] \ No newline at end of file + ] + + def run(self): + + with self.open_input("mask", wrapper=True) as f: + self.mask = f.read_mask("mask") + self.mask_nside = f.read_map_info("mask")["nside"] \ No newline at end of file From 4a43be0d4de3a07fdad7645076b4775f873a00ef Mon Sep 17 00:00:00 2001 From: myamamoto26 Date: Mon, 11 May 2026 22:12:01 -0400 Subject: [PATCH 16/25] pipeline runs --- examples/desi_dr1_stage3/config.yml | 10 ++ examples/desi_dr1_stage3/pipeline.yml | 30 +++--- txpipe/delta_sigma.py | 5 +- txpipe/ingest/desi.py | 135 +++++++++++++++++++++++++- txpipe/masks.py | 76 ++++++++++++++- 5 files changed, 234 insertions(+), 22 deletions(-) diff --git a/examples/desi_dr1_stage3/config.yml b/examples/desi_dr1_stage3/config.yml index c40d2d65a..83095345c 100644 --- a/examples/desi_dr1_stage3/config.yml +++ b/examples/desi_dr1_stage3/config.yml @@ -41,6 +41,12 @@ TXIngestDESI: lens_zbin_edges: [0.0, 3.0] dz: 0.001 +TXIngestDESIRandoms: + lens_zbin_edges: [0.0, 3.0] + ra_col: RA + dec_col: DEC + z_col: Z + TXDESISelector: ra_col: RA dec_col: DEC @@ -50,9 +56,13 @@ TXDESISelector: zmin: 0.0 TXDeltaSigma: + source_bins: [0,1,2,3] # no non-tomograhpic redshift distribution. lens_bins: [0] lens_source_sep: 0.1 photoz: True + r_min: 0.35 + r_max: 70.0 + nbins: 15 lower_bin_edge: [0.0, 0.358, 0.631, 0.872] source_cat_w_col: w lens_cat_w_col: w_sys diff --git a/examples/desi_dr1_stage3/pipeline.yml b/examples/desi_dr1_stage3/pipeline.yml index bc311bc44..666f63d2e 100644 --- a/examples/desi_dr1_stage3/pipeline.yml +++ b/examples/desi_dr1_stage3/pipeline.yml @@ -1,6 +1,10 @@ # Stages to run stages: - - name: TXIngestDataPreview2 + # - name: TXIngestDataPreview2 + # - name: TXCutShearCatalog + # aliases: + # catalog: shear_catalog + # cut_catalog: cut_shear_catalog # - name: TXSourceSelectorMetacal # - name: TXSourceSelectorMetadetect # aliases: @@ -11,7 +15,7 @@ stages: # aliases: # shear_catalog: cut_shear_catalog # - name: TXCustomMask - # - name: TXIngestDESIRandoms + - name: TXIngestDESIRandoms # - name: PZRailSummarizeSource # Run the DIR method on the lens sample to find n(z) # classname: PZRailSummarize # aliases: @@ -30,7 +34,7 @@ stages: # weights_catalog: shear_catalog # photoz_stack: shear_photoz_stack # - name: TXIngestRedmagic - # - name: TXDESIJointMask + - name: TXDESIJointMask - name: TXDESISelector - name: TXIngestDESI - name: TXTracerMetadata @@ -50,8 +54,8 @@ stages: # - name: TXMapPlots # - name: TXRandomCat - name: TXJackknifeCenters - - name: TXTwoPoint - threads_per_process: 2 + # - name: TXTwoPoint + # threads_per_process: 2 - name: TXDeltaSigma - name: TXDeltaSigmaPlots # - name: TXBlinding # Blind the data following Muir et al @@ -120,23 +124,25 @@ inputs: # See README for paths to download these files # shear_catalog: /scratch/gpfs/AAMON/desc_analysis/TXPipe/data/shear_catalog_desy3_unmasked_withfakez_v2.h5 # shear_tomography_classifier: none - shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/metacal_desy3.hdf5 + shear_catalog: none #/scratch/gpfs/AAMON/desc_analysis/catalogs/metacal_desy3.hdf5 shear_tomography_catalog: none - binned_shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_metacal_desy3.hdf5 + binned_shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/cut_binned_metacal_desy3.hdf5 shear_photoz_stack: /scratch/gpfs/AAMON/desc_analysis/catalogs/shear_photoz_stack_metacal_desy3.hdf5 # photometry_catalog: data/example/inputs/photometry_catalog.hdf5 # photoz_trained_model: data/example/inputs/cosmoDC2_trees_i25.3.npy # exposures: data/example/inputs/exposures.hdf5 # star_catalog: data/example/inputs/star_catalog.hdf5 - random_cats: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 - binned_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_randoms_bin0.hdf5 - binned_random_catalog_sub: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 + desi_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/BGS_ANY_SGC_0_clustering.ran.fits + # random_cats: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 + # binned_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_randoms_bin0.hdf5 + # binned_random_catalog_sub: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 spectroscopic_catalog: None - desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/LRG_SGC_clustering.dat.fits + desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/BGS_ANY_SGC_clustering.dat.fits # mask should be already a combination of source and lens. - mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/dp2_23p5_combined_mask_nmin=500_nside=512.hdf5 + desi_mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_BGS_ANY_SGC_clustering.h5 + shear_mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/desy3_mask.h5 # This file comes with the code fiducial_cosmology: data/fiducial_cosmology.yml # spectroscopic_catalog: data/example/inputs/mock_spectroscopic_catalog.hdf5 diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index bf1df1d95..036fdd2be 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -46,13 +46,13 @@ def run(self): import sacc bin_pairs = self.get_bin_pairs() + print(bin_pairs) source_bin = 3 lens_bin = 0 with self.open_input("fiducial_cosmology", wrapper=True) as cosmo_file: cosmo = cosmo_file.to_astropy() source_n_of_z = self.load_redshift_distribution("shear_photoz_stack") - print(source_n_of_z['z']) # The lens n_of_z is only used when saving at the end lens_n_of_z = self.load_redshift_distribution("lens_photoz_stack") @@ -90,7 +90,7 @@ def run(self): f"Computing excess surface density for source = {source_bin}, lens = {lens_bin}, " f"with {len(source_table)} sources, {len(lens_table)} lenses, {len(randoms_table)} randoms" ) - print(np.amax(source_n_of_z['z'][source_n_of_z['n'][:, 0] > 0])) + dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, lens_source_cut=self.config["lens_source_sep"]) dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, lens_source_cut=self.config["lens_source_sep"]) @@ -320,6 +320,7 @@ def save_results(self, results, shear_photoz_stack, lens_photoz_stack): z = shear_photoz_stack["z"] Nz = shear_photoz_stack["n"] nbin_source = Nz.shape[1] + for i in range(nbin_source): if i == nbin_source - 1: i = "all" diff --git a/txpipe/ingest/desi.py b/txpipe/ingest/desi.py index b3783c398..0bc696447 100644 --- a/txpipe/ingest/desi.py +++ b/txpipe/ingest/desi.py @@ -1,19 +1,148 @@ from ..base_stage import PipelineStage -from ..data_types import HDFFile, FitsFile, QPNOfZFile, RandomsCatalog +from ..data_types import HDFFile, FitsFile, QPNOfZFile, RandomsCatalog, FiducialCosmology import numpy as np from ceci.config import StageParameter class TXIngestDESIRandoms(PipelineStage): + """ + Ingest a DESI random catalog from FITS and write random_cats, + binned_random_catalog, and a subsampled binned_random_catalog_sub. + + Output format matches TXRandomCat: group "randoms" with datasets + ra, dec, z, comoving_distance, bin at the top level, and per-bin + subgroups bin_0 ... bin_N in the binned outputs. + """ + name = "TXIngestDESIRandoms" + parallel = False inputs = [ - ("desi_random_catalog", FitsFile,) + ("desi_random_catalog", FitsFile), + ("fiducial_cosmology", FiducialCosmology), ] outputs = [ ("random_cats", RandomsCatalog), ("binned_random_catalog", RandomsCatalog), ("binned_random_catalog_sub", RandomsCatalog), - ] + config_options = { + "lens_zbin_edges": StageParameter(list, [float], msg="Edges of lens redshift bins."), + "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk."), + "sample_rate": StageParameter(float, 0.5, msg="Fraction of randoms retained in the sub-catalog."), + "z_col": StageParameter(str, "z", msg="Redshift column name in the FITS random catalog."), + "ra_col": StageParameter(str, "ra", msg="Right ascension column name in the FITS random catalog."), + "dec_col": StageParameter(str, "dec", msg="Declination column name in the FITS random catalog."), + } + + def run(self): + import pyccl + + with self.open_input("fiducial_cosmology", wrapper=True) as f: + cosmo = f.to_ccl() + + zbin_edges = self.config["lens_zbin_edges"] + nbin = len(zbin_edges) - 1 + chunk_rows = self.config["chunk_rows"] + z_col = self.config["z_col"] + ra_col = self.config["ra_col"] + dec_col = self.config["dec_col"] + + # Count total rows + f = self.open_input("desi_random_catalog") + n_total = f[1].get_nrows() + f.close() + print(f"Total randoms: {n_total}") + + # First pass: count objects per redshift bin + bin_counts = np.zeros(nbin, dtype=np.int64) + for s, e, data in self.iterate_fits("desi_random_catalog", 1, [z_col], chunk_rows): + zbin = np.digitize(data[z_col], zbin_edges) - 1 + zbin[zbin >= nbin] = -1 + valid = zbin >= 0 + bin_counts += np.bincount(zbin[valid], minlength=nbin) + for i, c in enumerate(bin_counts): + print(f" Bin {i}: {c} randoms") + + # Create output files and datasets + random_cats = self.open_output("random_cats") + binned_output = self.open_output("binned_random_catalog") + + g = random_cats.create_group("randoms") + g.create_dataset("ra", (n_total,), dtype=np.float64) + g.create_dataset("dec", (n_total,), dtype=np.float64) + g.create_dataset("z", (n_total,), dtype=np.float64) + g.create_dataset("comoving_distance", (n_total,), dtype=np.float64) + g.create_dataset("bin", (n_total,), dtype=np.int16) + + gb = binned_output.create_group("randoms") + gb.attrs["nbin"] = nbin + subgroups = [] + for i in range(nbin): + sg = gb.create_group(f"bin_{i}") + sg.create_dataset("ra", (bin_counts[i],), dtype=np.float64) + sg.create_dataset("dec", (bin_counts[i],), dtype=np.float64) + sg.create_dataset("z", (bin_counts[i],), dtype=np.float64) + sg.create_dataset("comoving_distance", (bin_counts[i],), dtype=np.float64) + subgroups.append(sg) + + # Second pass: fill data + bin_cursors = np.zeros(nbin, dtype=np.int64) + cols = [ra_col, dec_col, z_col] + for s, e, data in self.iterate_fits("desi_random_catalog", 1, cols, chunk_rows): + ra = data[ra_col] + dec = data[dec_col] + z = data[z_col] + zbin = np.digitize(z, zbin_edges) - 1 + zbin[zbin >= nbin] = -1 + chi = pyccl.comoving_radial_distance(cosmo, 1.0 / (1.0 + z)) + + g["ra"][s:e] = ra + g["dec"][s:e] = dec + g["z"][s:e] = z + g["comoving_distance"][s:e] = chi + g["bin"][s:e] = zbin + + for i in range(nbin): + sel = zbin == i + n_sel = int(sel.sum()) + if n_sel > 0: + c = bin_cursors[i] + subgroups[i]["ra"][c:c + n_sel] = ra[sel] + subgroups[i]["dec"][c:c + n_sel] = dec[sel] + subgroups[i]["z"][c:c + n_sel] = z[sel] + subgroups[i]["comoving_distance"][c:c + n_sel] = chi[sel] + bin_cursors[i] += n_sel + + self.subsample_randoms(binned_output, nbin) + + random_cats.close() + binned_output.close() + + def subsample_randoms(self, binned_output, nbin): + """Randomly subsample the binned random catalog and write binned_random_catalog_sub.""" + sample_rate = self.config["sample_rate"] + print(f"Sub-sampling randoms at rate {sample_rate}") + + binned_output_sub = self.open_output("binned_random_catalog_sub") + gb_sub = binned_output_sub.create_group("randoms") + gb_sub.attrs["nbin"] = nbin + + for j in range(nbin): + ra = binned_output[f"randoms/bin_{j}/ra"][:] + dec = binned_output[f"randoms/bin_{j}/dec"][:] + z = binned_output[f"randoms/bin_{j}/z"][:] + chi = binned_output[f"randoms/bin_{j}/comoving_distance"][:] + + ntotal = len(ra) + nsub = int(sample_rate * ntotal) + idx = np.random.choice(ntotal, size=nsub, replace=False) + + sg = gb_sub.create_group(f"bin_{j}") + sg.create_dataset("ra", data=ra[idx]) + sg.create_dataset("dec", data=dec[idx]) + sg.create_dataset("z", data=z[idx]) + sg.create_dataset("comoving_distance", data=chi[idx]) + + binned_output_sub.close() class TXIngestDESI(PipelineStage): diff --git a/txpipe/masks.py b/txpipe/masks.py index 11814e2f5..be8804f9c 100644 --- a/txpipe/masks.py +++ b/txpipe/masks.py @@ -567,17 +567,83 @@ class TXDESIJointMask(TXJointMask): class TXCutCatalog(PipelineStage): + """ + Cut a catalog to the footprint defined by an input mask, and write out the cut catalog. + + Subclasses can override `catalog_input_name`, `catalog_output_name`, and the + `config_options` defaults to handle different catalog types. + """ + name = "TXCutCatalog" inputs = [ ("catalog", HDFFile), ("mask", MapsFile), ] - outputs = [ - ("cut_catalog", HDFFile), - ] + outputs = [("cut_catalog", HDFFile)] + config_options = { + "chunk_rows": StageParameter(int, 100000, msg="Number of rows to read per chunk."), + "catalog_group": StageParameter(str, "catalog", msg="HDF5 group name in the input catalog."), + "ra_col": StageParameter(str, "ra", msg="RA column name."), + "dec_col": StageParameter(str, "dec", msg="Dec column name."), + } + + catalog_input_name = "catalog" + catalog_output_name = "cut_catalog" def run(self): + import healpy as hp + import h5py with self.open_input("mask", wrapper=True) as f: - self.mask = f.read_mask("mask") - self.mask_nside = f.read_map_info("mask")["nside"] \ No newline at end of file + mask = f.read_mask("mask", returnbool=True) + nside = f.read_map_info("mask")["nside"] + + group_name = self.config["catalog_group"] + ra_col = self.config["ra_col"] + dec_col = self.config["dec_col"] + chunk_rows = self.config["chunk_rows"] + + in_path = self.get_input(self.catalog_input_name) + out_path = self.get_output(self.catalog_output_name) + + with h5py.File(in_path, "r") as f_in, h5py.File(out_path, "w") as f_out: + g_in = f_in[group_name] + n_total = g_in[ra_col].size + + # First pass: collect indices of objects inside the mask footprint + selected = [] + for start in range(0, n_total, chunk_rows): + end = min(start + chunk_rows, n_total) + ra = g_in[ra_col][start:end] + dec = g_in[dec_col][start:end] + pix = hp.ang2pix(nside, ra, dec, lonlat=True, nest=True) + sel = mask[pix].astype(bool) + selected.append(np.where(sel)[0] + start) + + selected = np.concatenate(selected) + print(f"Selected {len(selected)} / {n_total} objects inside mask") + + # Write selected rows for every column in the group + g_out = f_out.create_group(group_name) + for col in g_in.keys(): + g_out.create_dataset(col, data=g_in[col][selected]) + + +class TXCutShearCatalog(TXCutCatalog): + """ + Cut a shear catalog to the footprint defined by an input mask. + """ + + name = "TXCutShearCatalog" + inputs = [ + ("shear_catalog", HDFFile), + ("mask", MapsFile), + ] + outputs = [("cut_shear_catalog", HDFFile)] + config_options = { + **TXCutCatalog.config_options, + "catalog_group": StageParameter(str, "shear", msg="HDF5 group name in the input shear catalog."), + } + + catalog_input_name = "shear_catalog" + catalog_output_name = "cut_shear_catalog" \ No newline at end of file From 88ee762fb9b7d803b0781e401e22e854593a8b2f Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 12 May 2026 15:31:58 +0100 Subject: [PATCH 17/25] remove apparently deleted lens_source_cut kwarg --- txpipe/delta_sigma.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 036fdd2be..94e1c863d 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -91,8 +91,8 @@ def run(self): f"with {len(source_table)} sources, {len(lens_table)} lenses, {len(randoms_table)} randoms" ) - dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, lens_source_cut=self.config["lens_source_sep"]) - dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, lens_source_cut=self.config["lens_source_sep"]) + dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo)#, lens_source_cut=self.config["lens_source_sep"]) + dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo)#, lens_source_cut=self.config["lens_source_sep"]) # stack to get the excess surface density Delta(Sigma) result = dsigma.stacking.excess_surface_density( From 90105b3afe4c46bb9fbaa9dd9df30d9e407751c2 Mon Sep 17 00:00:00 2001 From: joezuntz Date: Tue, 12 May 2026 08:30:44 -0700 Subject: [PATCH 18/25] expose n_jobs in delta_sigma --- txpipe/delta_sigma.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 94e1c863d..709e461ed 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -39,6 +39,7 @@ class TXDeltaSigma(TXTwoPoint): "lower_bin_edge": StageParameter(list, [0.0, 0.358, 0.631, 0.872], msg="Lower edge of the source redshift bins to use for dSigma measurement, only used if photoz=True"), "source_cat_w_col": StageParameter(str, "weight", msg="Source catalog weight column name."), "lens_cat_w_col": StageParameter(str, "weight", msg="Lens catalog weight column name."), + "n_jobs": StageParameter(int, 1, msg="Number of multiprocessing processes to use"), } def run(self): @@ -90,9 +91,11 @@ def run(self): f"Computing excess surface density for source = {source_bin}, lens = {lens_bin}, " f"with {len(source_table)} sources, {len(lens_table)} lenses, {len(randoms_table)} randoms" ) - - dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo)#, lens_source_cut=self.config["lens_source_sep"]) - dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo)#, lens_source_cut=self.config["lens_source_sep"]) + progress_bar = self.rank == 0 + n_jobs = self.config["n_jobs"] + + dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, progress_bar=progress_bar, n_jobs=n_jobs)#, lens_source_cut=self.config["lens_source_sep"]) + dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, progress_bar=progress_bar, n_jobs=n_jobs)#, lens_source_cut=self.config["lens_source_sep"]) # stack to get the excess surface density Delta(Sigma) result = dsigma.stacking.excess_surface_density( From 07dfc7c2589e44b088504d6068ecb8977cafd532 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 12 May 2026 16:31:08 +0100 Subject: [PATCH 19/25] fix collecting of results under MPI --- txpipe/delta_sigma.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 709e461ed..9bb1a685d 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -308,6 +308,8 @@ def save_results(self, results, shear_photoz_stack, lens_photoz_stack): if self.comm is not None: # Gather results from all ranks to the root rank results = self.comm.gather(results, root=0) + # flatten back into a single list. I always forget this + results = [r for sublist in results for r in sublist] # Only the root process saves the output file. if self.rank != 0: From 37aaa943ee0efe1d71da655fad7b4485d365b67c Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 12 May 2026 16:44:35 +0100 Subject: [PATCH 20/25] another fix to results packing --- txpipe/delta_sigma.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 9bb1a685d..c874e0642 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -309,7 +309,8 @@ def save_results(self, results, shear_photoz_stack, lens_photoz_stack): # Gather results from all ranks to the root rank results = self.comm.gather(results, root=0) # flatten back into a single list. I always forget this - results = [r for sublist in results for r in sublist] + if self.rank == 0: + results = [r for sublist in results for r in sublist] # Only the root process saves the output file. if self.rank != 0: From d2b18264a2da3c926c7ffa5b545fb06f0b6c4db1 Mon Sep 17 00:00:00 2001 From: joezuntz Date: Tue, 12 May 2026 09:12:55 -0700 Subject: [PATCH 21/25] Use dataregistry and parallelism at NERSC --- examples/desi_dr1_stage3/config.yml | 1 + examples/desi_dr1_stage3/pipeline.yml | 31 +++++++++++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/examples/desi_dr1_stage3/config.yml b/examples/desi_dr1_stage3/config.yml index 83095345c..75eee5824 100644 --- a/examples/desi_dr1_stage3/config.yml +++ b/examples/desi_dr1_stage3/config.yml @@ -66,6 +66,7 @@ TXDeltaSigma: lower_bin_edge: [0.0, 0.358, 0.631, 0.872] source_cat_w_col: w lens_cat_w_col: w_sys + n_jobs: 128 TXSourceSelectorHSC: input_pz: true diff --git a/examples/desi_dr1_stage3/pipeline.yml b/examples/desi_dr1_stage3/pipeline.yml index 666f63d2e..27b0b4844 100644 --- a/examples/desi_dr1_stage3/pipeline.yml +++ b/examples/desi_dr1_stage3/pipeline.yml @@ -57,6 +57,8 @@ stages: # - name: TXTwoPoint # threads_per_process: 2 - name: TXDeltaSigma + nprocess: 4 + threads_per_process: 1 - name: TXDeltaSigmaPlots # - name: TXBlinding # Blind the data following Muir et al # threads_per_process: 2 @@ -126,23 +128,35 @@ inputs: # shear_tomography_classifier: none shear_catalog: none #/scratch/gpfs/AAMON/desc_analysis/catalogs/metacal_desy3.hdf5 shear_tomography_catalog: none - binned_shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/cut_binned_metacal_desy3.hdf5 - shear_photoz_stack: /scratch/gpfs/AAMON/desc_analysis/catalogs/shear_photoz_stack_metacal_desy3.hdf5 + #binned_shear_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/cut_binned_metacal_desy3.hdf5 + #shear_photoz_stack: /scratch/gpfs/AAMON/desc_analysis/catalogs/shear_photoz_stack_metacal_desy3.hdf5 + binned_shear_catalog: + name: desy3_binned_shear_catalog + shear_photoz_stack: + name: desy3_shear_photoz_stack + shear_mask: + name: desy3_shear_mask + desi_mask: + name: desi_dr1_mask + desi_catalog: + name: desi_dr1_catalog + desi_random_catalog: + name: desi_dr1_random_catalog # photometry_catalog: data/example/inputs/photometry_catalog.hdf5 # photoz_trained_model: data/example/inputs/cosmoDC2_trees_i25.3.npy # exposures: data/example/inputs/exposures.hdf5 # star_catalog: data/example/inputs/star_catalog.hdf5 - desi_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/BGS_ANY_SGC_0_clustering.ran.fits + # desi_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/BGS_ANY_SGC_0_clustering.ran.fits # random_cats: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 # binned_random_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/binned_randoms_bin0.hdf5 # binned_random_catalog_sub: /scratch/gpfs/AAMON/desc_analysis/catalogs/random_cats.hdf5 spectroscopic_catalog: None - desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/BGS_ANY_SGC_clustering.dat.fits + # desi_catalog: /scratch/gpfs/AAMON/desc_analysis/catalogs/BGS_ANY_SGC_clustering.dat.fits # mask should be already a combination of source and lens. - desi_mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_BGS_ANY_SGC_clustering.h5 - shear_mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/desy3_mask.h5 + # desi_mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/mask_BGS_ANY_SGC_clustering.h5 + # shear_mask: /scratch/gpfs/AAMON/desc_analysis/catalogs/desy3_mask.h5 # This file comes with the code fiducial_cosmology: data/fiducial_cosmology.yml # spectroscopic_catalog: data/example/inputs/mock_spectroscopic_catalog.hdf5 @@ -153,4 +167,7 @@ resume: true # where to put output logs for individual stages log_dir: data/example/logs_desi_dr1_stage3 # where to put an overall parsl pipeline log -pipeline_log: data/example/log_desi_dr1_stage3.txt \ No newline at end of file +pipeline_log: data/example/log_desi_dr1_stage3.txt +registry: + owner_type: project + owner: wlss-dp2-desi-delta-sigma \ No newline at end of file From 82a351f0d88eb9b2acad4dd2a2bca3f07014d757 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 13 May 2026 15:05:02 +0100 Subject: [PATCH 22/25] try getting covariance using jackknife feature of dsigma --- txpipe/delta_sigma.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index c874e0642..92553ea3d 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -40,6 +40,7 @@ class TXDeltaSigma(TXTwoPoint): "source_cat_w_col": StageParameter(str, "weight", msg="Source catalog weight column name."), "lens_cat_w_col": StageParameter(str, "weight", msg="Lens catalog weight column name."), "n_jobs": StageParameter(int, 1, msg="Number of multiprocessing processes to use"), + "n_jackknife": StageParameter(int, 0, msg="Number of jackknife regions to use for error estimation, 0 means no jackknife"), } def run(self): @@ -97,11 +98,31 @@ def run(self): dsigma.precompute.precompute(lens_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, progress_bar=progress_bar, n_jobs=n_jobs)#, lens_source_cut=self.config["lens_source_sep"]) dsigma.precompute.precompute(randoms_table, source_table, table_n=source_n_of_z, bins=bins, cosmology=cosmo, progress_bar=progress_bar, n_jobs=n_jobs)#, lens_source_cut=self.config["lens_source_sep"]) + # stack to get the excess surface density Delta(Sigma) result = dsigma.stacking.excess_surface_density( lens_table, table_r=randoms_table, random_subtraction=True, boost_correction=True, return_table=True ) - results.append((source_bin, lens_bin, result)) + + # Optionally get the jackknife covariance. + # dSigma does this for us too. The stacking is the relatively + # fast bit so hopefully this is quite fast. + njack = self.config["n_jackknife"] + if njack > 0: + centers = dsigma.jackknife.compute_jackknife_fields(lens_table, njack) + dsigma.jackknife.compute_jackknife_fields(randoms_table, centers) + + # Get the covariance + delta_sigma_cov = dsigma.jackknife.jackknife_resampling( + dsigma.stacking.excess_surface_density, + lens_table, + randoms_table + ) + else: + delta_sigma_cov = None + + + results.append((source_bin, lens_bin, result, delta_sigma_cov)) # restore numpy settings np.seterr(**numpy_error_settings) @@ -345,7 +366,8 @@ def save_results(self, results, shear_photoz_stack, lens_photoz_stack): # for each bin pair's results, add all the # measurements in the output data table - for source_bin, lens_bin, result in results: + cov_blocks = [] + for source_bin, lens_bin, result, cov_block in results: tracer1 = f"source_{source_bin}" tracer2 = f"lens_{lens_bin}" @@ -364,6 +386,12 @@ def save_results(self, results, shear_photoz_stack, lens_photoz_stack): n_pairs=row["n_pairs"], # the number of pairs in the bin ) + if cov_block is not None: + cov_blocks.append(cov_block) + + if cov_blocks: + s.add_covariance(cov_blocks) + # Add provenance and potentially other metadata stuff. provenance = self.gather_provenance() other_metadata = {"nbin_source": nbin_source - 1, "nbin_lens": nbin_lens - 1} @@ -412,5 +440,5 @@ def run(self): axes[s, l].grid() x = sacc_data.get_tag("rp", tracers=(f"source_{s}", f"lens_{l}")) y = sacc_data.get_mean(tracers=(f"source_{s}", f"lens_{l}")) - axes[s, l].plot(x, y * np.array(x)) + axes[s, l].plot(x, y * np.array(x),'.') plt.subplots_adjust(hspace=0.3, wspace=0.3) From 3c0997e4370c79c07b0d04edc8c36763bb7291f3 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 13 May 2026 15:19:11 +0100 Subject: [PATCH 23/25] Allow using a random subsample of ther data --- txpipe/delta_sigma.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index 92553ea3d..ae9b9ed4d 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -1,4 +1,3 @@ -from .twopoint import TXTwoPoint, TREECORR_CONFIG, SHEAR_POS from .base_stage import PipelineStage from .data_types import SACCFile, ShearCatalog, HDFFile, QPNOfZFile, FiducialCosmology, TextFile, PNGFile import numpy as np @@ -6,7 +5,7 @@ import os -class TXDeltaSigma(TXTwoPoint): +class TXDeltaSigma(PipelineStage): """Compute Delta-Sigma, the excess surface density around lenses. This version uses the dSigma code. """ @@ -41,6 +40,7 @@ class TXDeltaSigma(TXTwoPoint): "lens_cat_w_col": StageParameter(str, "weight", msg="Lens catalog weight column name."), "n_jobs": StageParameter(int, 1, msg="Number of multiprocessing processes to use"), "n_jackknife": StageParameter(int, 0, msg="Number of jackknife regions to use for error estimation, 0 means no jackknife"), + "subsample": StageParameter(float, 0.0, msg="If set to non-zero, randomly subsample this fraction of the all the data sets for testing."), } def run(self): @@ -184,6 +184,13 @@ def load_table(self, group, names): table = Table() for dsigma_name, txpipe_name in names.items(): table[dsigma_name] = group[txpipe_name][:] + + # Optionally cut down to a random subsample. + # Faster for testing + frac = self.config["subsample"] + if frac > 0: + p = np.random.binomial(1, frac, size=len(table)).astype(bool) + table = table[p] return table def load_source_table(self, bin_index): From 8ebb23eb58febc6e86a508c323006db49ef02037 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 13 May 2026 15:46:10 +0100 Subject: [PATCH 24/25] Add error bars to delta sigma plot if present --- txpipe/delta_sigma.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index ae9b9ed4d..aabe91e64 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -434,6 +434,12 @@ def run(self): nbin_source = sacc_data.metadata["nbin_source"] nbin_lens = sacc_data.metadata["nbin_lens"] + if sacc_data.has_covariance(): + cov = sacc_data.covariance.dense + else: + cov = None + + # Plot in r coordinates nbin_source = sacc_data.metadata["nbin_source"] nbin_lens = sacc_data.metadata["nbin_lens"] @@ -447,5 +453,13 @@ def run(self): axes[s, l].grid() x = sacc_data.get_tag("rp", tracers=(f"source_{s}", f"lens_{l}")) y = sacc_data.get_mean(tracers=(f"source_{s}", f"lens_{l}")) - axes[s, l].plot(x, y * np.array(x),'.') + if cov is not None: + index = sacc_data.indices(tracers=(f"source_{s}", f"lens_{l}")) + cov_block = cov[index][:, index] + error = np.sqrt(np.diag(cov_block)) + axes[s, l].errorbar(x, y * np.array(x), yerr=error * np.array(x), fmt='.') + else: + axes[s, l].plot(x, y * np.array(x), '.') + axes[s, l].set_ylim(0, None) + axes[s, l].set_xscale("log") plt.subplots_adjust(hspace=0.3, wspace=0.3) From bbcf6393e5cf5252fdb6562792117bcbab10343f Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Mon, 18 May 2026 07:40:54 +0100 Subject: [PATCH 25/25] add raw values to plot also --- txpipe/delta_sigma.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/txpipe/delta_sigma.py b/txpipe/delta_sigma.py index aabe91e64..61bbab265 100644 --- a/txpipe/delta_sigma.py +++ b/txpipe/delta_sigma.py @@ -452,14 +452,20 @@ def run(self): axes[s, l].set_ylabel(r"$R \cdot \Delta \Sigma [M_\odot / pc^2]$") axes[s, l].grid() x = sacc_data.get_tag("rp", tracers=(f"source_{s}", f"lens_{l}")) + x = np.array(x) y = sacc_data.get_mean(tracers=(f"source_{s}", f"lens_{l}")) + raw_y = sacc_data.get_tag("raw_value", tracers=(f"source_{s}", f"lens_{l}")) if cov is not None: index = sacc_data.indices(tracers=(f"source_{s}", f"lens_{l}")) cov_block = cov[index][:, index] error = np.sqrt(np.diag(cov_block)) - axes[s, l].errorbar(x, y * np.array(x), yerr=error * np.array(x), fmt='.') + axes[s, l].errorbar(x, y * x, yerr=error * x, fmt='+') + # Also plot the raw value with boost/randoms + axes[s, l].plot(x, raw_y * x, 'x') else: - axes[s, l].plot(x, y * np.array(x), '.') + axes[s, l].plot(x, y * x, '+') + axes[s, l].plot(x, raw_y * x, 'x') axes[s, l].set_ylim(0, None) axes[s, l].set_xscale("log") - plt.subplots_adjust(hspace=0.3, wspace=0.3) + plt.subplots_adjust(hspace=0.05, wspace=0.05) + plt.tight_layout()