From fd75f367cd65cf41222b7cf8d131757867a95424 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Tue, 21 Apr 2026 16:12:13 +0100 Subject: [PATCH 01/43] Added function for creating dask map of selection function using SSI outputs --- txpipe/mapping/__init__.py | 1 + txpipe/mapping/dr1.py | 50 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/txpipe/mapping/__init__.py b/txpipe/mapping/__init__.py index 9da660c42..48037166e 100755 --- a/txpipe/mapping/__init__.py +++ b/txpipe/mapping/__init__.py @@ -2,6 +2,7 @@ make_dask_bright_object_map, make_dask_depth_map, make_dask_depth_map_det_prob, + make_dask_selection_function ) from .basic_maps import ( make_dask_flag_maps, diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index c62479b42..46b97d4bd 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -235,3 +235,53 @@ def make_dask_depth_map_det_prob( "det_frac_by_mag_thres": det_frac_by_mag_thres, "mag_edges": mag_edges, } + + +def make_dask_selection_function( + ra, dec, det, pixel_scheme, cov_map + ): + """ + Generate map of selection function from SSI catalogues. + + Maps the selection function in regions containing synthetically injected + sources. In a given pixel, the selection function is estimated as the number + of detected injections versus the total number of injections. + TODO: Add functionality for more sophisticated estimates, e.g. including + removal of blended injections. + + Parameters + ---------- + ra : dask.array + Right Ascension coordinates in degrees for injected sources. + dec : dask.array + Declination coordinates in degrees for injected sources. + det : dask.array + Whether or not the source has been detected. + pixel_scheme : PixelScheme + An object that provides pixelization scheme with methods `npix` and `ang2pix`. + cov_map : HealSparseCoverage + coverage map corresponding to these sources (or a superset of them) + + Returns + ------- + dict + A dict containing: + - pix (dask.array): Unique pixel indices. + - sel_func_map (dask.array): Selection function measured from injected sources. + """ + _, da = import_dask() + pix = pixel_scheme.ang2pix(ra, dec) + + # get the sparse map index for each of these pixel (is dask aware) + sparse_index, npix_sparse = pix2sparseindex(pix, cov_map) + + # Count detected and total injections per pixel + ndet = da.bincount(sparse_index, weights=det, minlength=npix_sparse) + ntot = da.bincount(sparse_index, minlength=npix_sparse) + # Selection function (N_det / N_tot) + sel_func_map = da.where(ntot != 0, ndet / ntot, np.nan) + + return { + "pix": pix, + "sel_func_map": sel_func_map + } \ No newline at end of file From e4c7ac298406d9918d9bd98420ea2d2ec1329707 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Tue, 21 Apr 2026 16:12:48 +0100 Subject: [PATCH 02/43] Began writing stage for creating map of selection function --- txpipe/auxiliary_maps.py | 64 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index bc2f1550f..fc398812b 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -8,10 +8,10 @@ make_dask_bright_object_map, make_dask_depth_map, make_dask_depth_map_det_prob, + make_dask_selection_function, ) from .data_types import MapsFile, HDFFile, ShearCatalog from .utils import choose_pixelization, import_dask -from .maps import map_config_options from ceci.config import StageParameter @@ -459,3 +459,65 @@ def run(self): for map_name, m in hsp_maps.items(): out.write_map(map_name, m, metadata) out.file["maps"].attrs.update(metadata) + + +class TXSelectionFunctionSSIMaps(TXBaseMaps): + """ + Generate map of the selection function from SSI catalogs. + + TODO: More detailed description. + """ + + name = "TXSelectionFunctionSSIMaps" + dask_parallel = True # TODO: change if not actually necessary + inputs = [ + ("matched_ssi_photometry_catalog", HDFFile), # injected objects that were detected + ("injection_catalog", HDFFile), # injection locations + ("ssi_detection_catalog", HDFFile), # detection info on each injection + ] + outputs = [ + ("sel_func_ssi_maps", MapsFile), + ] + + config_options = { + "block_size": StageParameter(int, 0, msg="Block size for dask processing (0 means auto)."), + **map_config_options + } + + def run(self): + # Import dask and alias it as 'da' + _, da = import_dask() + import healsparse as hsp + + # Retrieve configuration parameters + block_size = self.config["block_size"] + if block_size == 0: + block_size = "auto" + + # Open the input catalog files + # We can't use "with" statements because we need to keep the file open + # while we're using dask. + f_matched = self.open_input("matched_ssi_photometry_catalog", wrapper=True) + f_inj = self.open_input("injection_catalog", wrapper=True) + f_det = self.open_input("ssi_detection_catalog", wrapper=True) + + # Load matched catalog data into dask arrays. + # This is lazy in dask, so we're not actually loading the data here. + ra = da.from_array(f_matched.file["photometry/ra"], block_size) + block_size = ra.chunksize + dec = da.from_array(f_matched.file["photometry/dec"], block_size) + + # Choose the pixelization scheme based on the configuration. + # Might need to review this to make sure we use the same scheme everywhere + pixel_scheme = choose_pixelization(**self.config) + + # Load detection catalog data into dask arrays. + # This is lazy in dask, so we're not actually loading the data here. + ra_inj = da.from_array(f_inj.file["photometry/ra"], block_size) + dec_inj = da.from_array(f_inj.file["photometry/dec"], block_size) + det = da.from_array(f_det.file[f"photometry/detected"], block_size) + + # Make coverage map for these ra,dec + cov_map = make_coverage_map(ra_inj, dec_inj, pixel_scheme) + + From 7f8dfa97014c36a5452224a2047cc5ef913ec71f Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 23 Apr 2026 12:00:03 +0100 Subject: [PATCH 03/43] Uncertainty on selection function now also output by make_dask_selection_function --- txpipe/mapping/dr1.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index 46b97d4bd..4e19d3ecf 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -248,6 +248,7 @@ def make_dask_selection_function( of detected injections versus the total number of injections. TODO: Add functionality for more sophisticated estimates, e.g. including removal of blended injections. + TODO: Avoid the normal approximation for estimating the uncertainties. Parameters ---------- @@ -268,6 +269,7 @@ def make_dask_selection_function( A dict containing: - pix (dask.array): Unique pixel indices. - sel_func_map (dask.array): Selection function measured from injected sources. + - err_sel_func_map (dask.array): Uncertainty on selection function. """ _, da = import_dask() pix = pixel_scheme.ang2pix(ra, dec) @@ -279,9 +281,16 @@ def make_dask_selection_function( ndet = da.bincount(sparse_index, weights=det, minlength=npix_sparse) ntot = da.bincount(sparse_index, minlength=npix_sparse) # Selection function (N_det / N_tot) - sel_func_map = da.where(ntot != 0, ndet / ntot, np.nan) + selfunc = da.where(ntot != 0, ndet / ntot, np.nan) + # Uncertainty on sel. func. (normal approximation) + err_selfunc = da.where( + ntot != 0, + da.sqrt(selfunc * (1 - selfunc) / ntot), + np.nan + ) return { "pix": pix, - "sel_func_map": sel_func_map + "sel_func_map": selfunc, + "err_sel_func_map": err_selfunc } \ No newline at end of file From f15302693db60b1ebe86a0c86c3c4be0297101cd Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 23 Apr 2026 13:23:45 +0100 Subject: [PATCH 04/43] Basic selection function and uncertainties computed and stored --- txpipe/auxiliary_maps.py | 44 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index fc398812b..7f70d6a86 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -465,11 +465,13 @@ class TXSelectionFunctionSSIMaps(TXBaseMaps): """ Generate map of the selection function from SSI catalogs. - TODO: More detailed description. + This class generates maps of: + - the selection function (in regions where SSI has been done) + - the uncertainty on the measured selection function """ name = "TXSelectionFunctionSSIMaps" - dask_parallel = True # TODO: change if not actually necessary + dask_parallel = True inputs = [ ("matched_ssi_photometry_catalog", HDFFile), # injected objects that were detected ("injection_catalog", HDFFile), # injection locations @@ -520,4 +522,40 @@ def run(self): # Make coverage map for these ra,dec cov_map = make_coverage_map(ra_inj, dec_inj, pixel_scheme) - + # Initialize a dictionary to store the maps. + # To start with this is all lazy too, until we call compute + maps = {} + + # Create selection function map using injection catalog + sel_func_results = make_dask_selection_function( + ra_inj, + dec_inj, + det, + pixel_scheme, + cov_map + ) + + maps["selection_function"] = sel_func_results["sel_func_map"] + maps["err_selection_function"] = sel_func_results["err_sel_func_map"] + + (maps,) = da.compute(maps) + + # convert sparse_map arrays into healsparse map objects + hsp_maps = {} + for name, map in maps.items(): + hsp_maps[name] = hsp.HealSparseMap( + cov_map=cov_map, + sparse_map=map, + nside_sparse=cov_map.nside_sparse, + ) + + # Prepare metadata for the maps. Copy the pixelization-related + # configuration options only here + metadata = {key: self.config[key] for key in map_config_options if key in self.config} + metadata.update(pixel_scheme.metadata) + + # Write the output maps to the output file + with self.open_output("sel_func_ssi_maps", wrapper=True) as out: + for map_name, m in hsp_maps.items(): + out.write_map(map_name, m, metadata) + out.file["maps"].attrs.update(metadata) From fac42a0adcbc048b73a22bfb787b132213858ce1 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Mon, 27 Apr 2026 21:15:31 +0100 Subject: [PATCH 05/43] Pixels without SSI now display hp.UNSEEN, not NaN --- txpipe/mapping/dr1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index 4e19d3ecf..96139eedc 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -281,7 +281,7 @@ def make_dask_selection_function( ndet = da.bincount(sparse_index, weights=det, minlength=npix_sparse) ntot = da.bincount(sparse_index, minlength=npix_sparse) # Selection function (N_det / N_tot) - selfunc = da.where(ntot != 0, ndet / ntot, np.nan) + selfunc = da.where(ntot != 0, ndet / ntot, hp.UNSEEN) # Uncertainty on sel. func. (normal approximation) err_selfunc = da.where( ntot != 0, From 413a53f94ce31fe70bb6fa43ac68e7af0caff522 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Tue, 28 Apr 2026 08:59:06 +0100 Subject: [PATCH 06/43] Pixels without SSI now also display hp.UNSEEN in the uncertainty map --- txpipe/mapping/dr1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index 96139eedc..382f535f3 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -286,7 +286,7 @@ def make_dask_selection_function( err_selfunc = da.where( ntot != 0, da.sqrt(selfunc * (1 - selfunc) / ntot), - np.nan + hp.UNSEEN ) return { From 2fbf40d660e1ae7bb4dc6e3f5ef240185afae6c4 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Tue, 28 Apr 2026 09:10:01 +0100 Subject: [PATCH 07/43] Prelimary working version of stage which models selection function across full footprint --- txpipe/auxiliary_maps.py | 208 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 7f70d6a86..b27ec1f4c 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -559,3 +559,211 @@ def run(self): for map_name, m in hsp_maps.items(): out.write_map(map_name, m, metadata) out.file["maps"].attrs.update(metadata) + + +class TXModelSelectionFunction(TXBaseMaps): + """ + Model selection function across footprint using survey property maps. + + Uses the selection measured in a subset of the footprint to model its dependence on + survey property maps, then uses that model to predict the selection function across + the remainder of the footprint. The model is fit via polynomial regression with + respect to the survey property maps, where the degree of the polynomial is specified + via the config. + TODO: Make dask-compatible + TODO: Make it possible to select the degree of the fit for each SP map individually. + TODO: Add other options for the form of the model. + """ + + name = "TXModelSelectionFunction" + inputs = [ + ("sel_func_ssi_maps", MapsFile), # Measured selection function and uncertainties + ("mask", MapsFile) # Mask defining survey geometry + + ] + outputs = [ + ("sel_func_pred_maps", MapsFile), # Model prediction of selection function and its uncertainties + ] + + config_options = { + "systmaps_dir": StageParameter(str, "", msg="Directory containing systematic maps."), + "degree": StageParameter(int, 1, msg="Degree of the polynomial fit."), + "mask_threshold": StageParameter(float, 0.0, msg="Threshold for masking pixels."), + **map_config_options + } + + def run(self): + import glob + import healsparse as hsp + + pixel_scheme = choose_pixelization(**self.config) + + # Load the map of the measured selection function and its uncertainties + with self.open_input("sel_func_ssi_maps", wrapper=True) as f: + sel_func_meas = f.read_map("selection_function") + err_sel_func_meas = f.read_map("err_selection_function") + # Ensure correct resolution + sel_func_meas = self.ud_grade(sel_func_meas, pixel_scheme) + err_sel_func_meas = self.ud_grade(err_sel_func_meas, pixel_scheme) + + with self.open_input("mask", wrapper=True) as f: + mask = f.read_mask( + thresh=self.config["mask_threshold"], + returnbool=True, + degrade_nside=pixel_scheme.nside + ) + mask_pix = mask.valid_pixels + + # For keeping track of valid pixels across all maps + goodpix = np.ones(len(mask_pix), dtype=bool) + + # Load survey property maps at the valid pixels + spmaps = [] + root = self.config["systmaps_dir"] + sysfiles = glob.glob(f"{root}*.hs") + nsys = len(sysfiles) + print(f"Found {nsys} total systematic maps") + for sm in sysfiles: + sm_values = self.read_systematics_map( + sm, mask_pix, pixel_scheme + ) + goodpix *= sm_values > -1e-30 + spmaps.append(sm_values) + spmaps = np.stack(spmaps) + + # Select 'training' pixels for computing fit + # Want to keep only valid pixels across all maps + train = np.zeros(len(mask_pix), dtype=bool) + # Pixels where selection function is 0 or 1 have zero uncertainty; + # these pixels are removed here + # TODO: Remove this step when uncertainty calculation is fixed in + # TXSelectionFunctionSSIMaps + valid_errs = err_sel_func_meas[err_sel_func_meas.valid_pixels] > 0 + _, id_train, _ = np.intersect1d(mask_pix, sel_func_meas.valid_pixels[valid_errs], return_indices=True) + train[id_train] = True + pix_train = goodpix * train + sel_func_train = sel_func_meas[mask_pix[pix_train]] + err_sel_func_train = err_sel_func_meas[mask_pix[pix_train]] + spmaps_train = spmaps[:, pix_train] + + # Get survey property values across remainder of footprint + pix_pred = goodpix * ~train + spmaps_pred = spmaps[:, pix_pred] + + # Perform polynomial regression and retrieve the following: + # - mean prediction on the selection function (in each pixel) + # - 1-sigma uncertainty on these predictions (in each pixel) + # - best-fit coefficients for each survey property (+ an intercept term) + # - covariance matrix for the best-fit parameters + # TODO: save alphas and cov mat + sel_func_pred, err_sel_func_pred, alphas, cov_alphas = self.polynomial_model( + spmaps_train, + sel_func_train, + err_sel_func_train, + spmaps_pred + ) + + # Combine training data and predictions into single HealSparse maps + pix_all = np.concatenate([mask_pix[pix_train], mask_pix[pix_pred]]) + sel_func_full = np.concatenate([sel_func_train, sel_func_pred]) + err_sel_func_full = np.concatenate([err_sel_func_train, err_sel_func_pred]) + + hsp_maps = { + "selection_function": self.make_hsp_map( + pix_all, sel_func_full, pixel_scheme + ), + "err_selection_function": self.make_hsp_map( + pix_all, err_sel_func_full, pixel_scheme + ) + } + + # Prepare metadata for the maps. Copy the pixelization-related + # configuration options only here + metadata = {key: self.config[key] for key in map_config_options if key in self.config} + # Then add the other configuration options + metadata.update(pixel_scheme.metadata) + + # Write the output maps to the output file + with self.open_output("sel_func_pred_maps", wrapper=True) as out: + for map_name, hsp_map in hsp_maps.items(): + out.write_map(map_name, hsp_map, metadata) + out.file["maps"].attrs.update(metadata) + + + def ud_grade(self, hsp_map, pixel_scheme): + """ + Upgrades or degrades a HealSparseMap according to whether its nside + is lower or greater than that of the pixel scheme. + """ + nside_now = hsp_map.nside_sparse + nside_target = pixel_scheme.nside + if nside_now < pixel_scheme.nside: + return hsp_map.upgrade(nside_target) + elif nside_now > pixel_scheme.nside: + return hsp_map.degrade(nside_target) + else: + return hsp_map + + def read_systematics_map(self, sysmap, vpix, pixel_scheme): + import healsparse as hsp + m = hsp.HealSparseMap.read( + sysmap, + degrade_nside=pixel_scheme.nside + ) + return m[vpix] + + def polynomial_model(self, X_train, y_train, yerr_train, X_pred): + """ + Performs a polynomial regression of y with respect to X_train, then + generates predictions at X_pred. In this context, X is an array + containing survey property maps in certain pixels, and y is + the selection function measured in those pixels. + """ + # Degree of polynomial fit + deg = self.config["degree"] + + # Construct inputs matrix for regression + A_T = [np.ones((1, len(X_train[0]))), X_train] + for n in range(2, deg): + A_T.append(np.power(X_train, n)) + A_T = np.concatenate(A_T, axis=0) + A = A_T.T + + # Inverse variance on y + W = 1 / (np.power(yerr_train, 2)) + + # Explicitly compute necessary array combinations + AtWA = (A_T @ (W[:, None] * A)) + AtWy = (A_T @ (W * y_train)) + + # Compute best-fit coeffs (alphas) and their covariance + # NOTE: this assumes a diagonal covariance matrix for y, + # i.e. cov_y = diag(W) + cov_alphas = np.linalg.inv(AtWA) + alphas = cov_alphas @ AtWy + + # Now make predictions at X_pred + # Construct inputs matrix for regression + A_T = [np.ones((1, len(X_pred[0]))), X_pred] + for n in range(2, deg): + A_T.append(np.power(X_pred, n)) + A_T = np.concatenate(A_T, axis=0) + A = A_T.T + y_pred = alphas @ A_T + err_y_pred = np.sqrt(np.diag(A @ cov_alphas @ A_T)) + + return y_pred, err_y_pred, alphas, cov_alphas + + def make_hsp_map(self, pixels, values, pixel_scheme): + """ + Creates an empty HealSparseMap and populates it at the specified + pixels with the corresponding values. + """ + import healsparse as hsp + hsp_map = hsp.HealSparseMap.make_empty( + nside_coverage=pixel_scheme.nside_coverage, + nside_sparse=pixel_scheme.nside, + dtype=float + ) + hsp_map[pixels] = values + return hsp_map From 5feaeb16244bafb65cdefbfb1578079c5dcfa68f Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Tue, 28 Apr 2026 09:11:32 +0100 Subject: [PATCH 08/43] Added potential TODO --- txpipe/auxiliary_maps.py | 1 + 1 file changed, 1 insertion(+) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index b27ec1f4c..ea790a00b 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -468,6 +468,7 @@ class TXSelectionFunctionSSIMaps(TXBaseMaps): This class generates maps of: - the selection function (in regions where SSI has been done) - the uncertainty on the measured selection function + TODO: Combine with TXAuxiliarySSIMaps? """ name = "TXSelectionFunctionSSIMaps" From 2163c4af3907c2a4d250735b9704394af99c034d Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 12:06:21 +0100 Subject: [PATCH 09/43] Changed variable names --- txpipe/auxiliary_maps.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index ea790a00b..96bcdc326 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -724,34 +724,34 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): deg = self.config["degree"] # Construct inputs matrix for regression - A_T = [np.ones((1, len(X_train[0]))), X_train] + X_T = [np.ones((1, len(X_train[0]))), X_train] for n in range(2, deg): - A_T.append(np.power(X_train, n)) - A_T = np.concatenate(A_T, axis=0) - A = A_T.T + X_T.append(np.power(X_train, n)) + X_T = np.concatenate(X_T, axis=0) + X = X_T.T # Inverse variance on y W = 1 / (np.power(yerr_train, 2)) # Explicitly compute necessary array combinations - AtWA = (A_T @ (W[:, None] * A)) - AtWy = (A_T @ (W * y_train)) + XtWX = (X_T @ (W[:, None] * X)) + XtWy = (X_T @ (W * y_train)) # Compute best-fit coeffs (alphas) and their covariance # NOTE: this assumes a diagonal covariance matrix for y, # i.e. cov_y = diag(W) - cov_alphas = np.linalg.inv(AtWA) - alphas = cov_alphas @ AtWy + cov_alphas = np.linalg.inv(XtWX) + alphas = cov_alphas @ XtWy # Now make predictions at X_pred # Construct inputs matrix for regression - A_T = [np.ones((1, len(X_pred[0]))), X_pred] + X_T = [np.ones((1, len(X_pred[0]))), X_pred] for n in range(2, deg): - A_T.append(np.power(X_pred, n)) - A_T = np.concatenate(A_T, axis=0) - A = A_T.T - y_pred = alphas @ A_T - err_y_pred = np.sqrt(np.diag(A @ cov_alphas @ A_T)) + X_T.append(np.power(X_pred, n)) + X_T = np.concatenate(X_T, axis=0) + X = X_T.T + y_pred = alphas @ X_T + err_y_pred = np.sqrt(np.diag(X @ cov_alphas @ X_T)) return y_pred, err_y_pred, alphas, cov_alphas From efe979149aedb10bf177d7ccc0d0dbae9eab6833 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 14:04:46 +0100 Subject: [PATCH 10/43] Detection and injection count maps are now saved in outputs --- txpipe/auxiliary_maps.py | 2 ++ txpipe/mapping/dr1.py | 14 +++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 96bcdc326..4cb779f0a 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -538,6 +538,8 @@ def run(self): maps["selection_function"] = sel_func_results["sel_func_map"] maps["err_selection_function"] = sel_func_results["err_sel_func_map"] + maps["det_count"] = sel_func_results["det_count_map"] + maps["inj_count"] = sel_func_results["inj_count_map"] (maps,) = da.compute(maps) diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index 382f535f3..cb4849656 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -270,6 +270,8 @@ def make_dask_selection_function( - pix (dask.array): Unique pixel indices. - sel_func_map (dask.array): Selection function measured from injected sources. - err_sel_func_map (dask.array): Uncertainty on selection function. + - det_count_map (dask.array): Number of detections per pixel. + - inj_count_map (dask.array): Total number of injections per pixel. """ _, da = import_dask() pix = pixel_scheme.ang2pix(ra, dec) @@ -279,18 +281,20 @@ def make_dask_selection_function( # Count detected and total injections per pixel ndet = da.bincount(sparse_index, weights=det, minlength=npix_sparse) - ntot = da.bincount(sparse_index, minlength=npix_sparse) + ninj = da.bincount(sparse_index, minlength=npix_sparse) # Selection function (N_det / N_tot) - selfunc = da.where(ntot != 0, ndet / ntot, hp.UNSEEN) + selfunc = da.where(ninj != 0, ndet / ninj, hp.UNSEEN) # Uncertainty on sel. func. (normal approximation) err_selfunc = da.where( - ntot != 0, - da.sqrt(selfunc * (1 - selfunc) / ntot), + ninj != 0, + da.sqrt(selfunc * (1 - selfunc) / ninj), hp.UNSEEN ) return { "pix": pix, "sel_func_map": selfunc, - "err_sel_func_map": err_selfunc + "err_sel_func_map": err_selfunc, + "det_count_map": ndet, + "inj_count_map": ninj } \ No newline at end of file From 32fe3e08ecff1375614f904fa82ccf70682eba62 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 17:14:31 +0100 Subject: [PATCH 11/43] TXModelSelectionFunction is now dask-compatible --- txpipe/auxiliary_maps.py | 108 ++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 46 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 4cb779f0a..f3c53b5ff 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -579,6 +579,7 @@ class TXModelSelectionFunction(TXBaseMaps): """ name = "TXModelSelectionFunction" + dask_parallel = True inputs = [ ("sel_func_ssi_maps", MapsFile), # Measured selection function and uncertainties ("mask", MapsFile) # Mask defining survey geometry @@ -589,6 +590,7 @@ class TXModelSelectionFunction(TXBaseMaps): ] config_options = { + "block_size": StageParameter(int, 0, msg="Block size for dask processing (0 means auto)."), "systmaps_dir": StageParameter(str, "", msg="Directory containing systematic maps."), "degree": StageParameter(int, 1, msg="Degree of the polynomial fit."), "mask_threshold": StageParameter(float, 0.0, msg="Threshold for masking pixels."), @@ -598,6 +600,15 @@ class TXModelSelectionFunction(TXBaseMaps): def run(self): import glob import healsparse as hsp + from functools import reduce + # Import dask and alias it as 'da' + _, da = import_dask() + + # Retrieve configuration parameters + block_size = self.config["block_size"] + if block_size == 0: + block_size = "auto" + print(block_size) pixel_scheme = choose_pixelization(**self.config) @@ -615,43 +626,52 @@ def run(self): returnbool=True, degrade_nside=pixel_scheme.nside ) - mask_pix = mask.valid_pixels - - # For keeping track of valid pixels across all maps - goodpix = np.ones(len(mask_pix), dtype=bool) # Load survey property maps at the valid pixels - spmaps = [] root = self.config["systmaps_dir"] sysfiles = glob.glob(f"{root}*.hs") nsys = len(sysfiles) print(f"Found {nsys} total systematic maps") - for sm in sysfiles: - sm_values = self.read_systematics_map( - sm, mask_pix, pixel_scheme + spmaps = [ + hsp.HealSparseMap.read( + sf, + degrade_nside=pixel_scheme.nside ) - goodpix *= sm_values > -1e-30 - spmaps.append(sm_values) - spmaps = np.stack(spmaps) + for sf in sysfiles + ] + + # Identfy valid pixels across mask and all SP maps + goodpix = reduce( + np.intersect1d, + [m.valid_pixels for m in [mask, *spmaps]] + ) # Select 'training' pixels for computing fit # Want to keep only valid pixels across all maps - train = np.zeros(len(mask_pix), dtype=bool) + pix_train = np.intersect1d(sel_func_meas.valid_pixels, goodpix) # Pixels where selection function is 0 or 1 have zero uncertainty; # these pixels are removed here # TODO: Remove this step when uncertainty calculation is fixed in # TXSelectionFunctionSSIMaps - valid_errs = err_sel_func_meas[err_sel_func_meas.valid_pixels] > 0 - _, id_train, _ = np.intersect1d(mask_pix, sel_func_meas.valid_pixels[valid_errs], return_indices=True) - train[id_train] = True - pix_train = goodpix * train - sel_func_train = sel_func_meas[mask_pix[pix_train]] - err_sel_func_train = err_sel_func_meas[mask_pix[pix_train]] - spmaps_train = spmaps[:, pix_train] + valid_errs = err_sel_func_meas[pix_train] != 0. + pix_train = pix_train[valid_errs] + # Select training data + sel_func_train = da.from_array(sel_func_meas[pix_train], block_size) + block_size = sel_func_train.chunksize + err_sel_func_train = da.from_array(err_sel_func_meas[pix_train], block_size) + spmaps_train = da.stack( + [ + da.from_array(m[pix_train], block_size) for m in spmaps + ] + ).T # Get survey property values across remainder of footprint - pix_pred = goodpix * ~train - spmaps_pred = spmaps[:, pix_pred] + pix_pred = np.setdiff1d(goodpix, pix_train) + spmaps_pred = da.stack( + [ + da.from_array(m[pix_pred], block_size) for m in spmaps + ] + ).T # Perform polynomial regression and retrieve the following: # - mean prediction on the selection function (in each pixel) @@ -667,9 +687,9 @@ def run(self): ) # Combine training data and predictions into single HealSparse maps - pix_all = np.concatenate([mask_pix[pix_train], mask_pix[pix_pred]]) - sel_func_full = np.concatenate([sel_func_train, sel_func_pred]) - err_sel_func_full = np.concatenate([err_sel_func_train, err_sel_func_pred]) + pix_all = np.concatenate([pix_train, pix_pred]) + (sel_func_full,) = da.compute(da.concatenate([sel_func_train, sel_func_pred])) + (err_sel_func_full,) = da.compute(da.concatenate([err_sel_func_train, err_sel_func_pred])) hsp_maps = { "selection_function": self.make_hsp_map( @@ -707,14 +727,6 @@ def ud_grade(self, hsp_map, pixel_scheme): else: return hsp_map - def read_systematics_map(self, sysmap, vpix, pixel_scheme): - import healsparse as hsp - m = hsp.HealSparseMap.read( - sysmap, - degrade_nside=pixel_scheme.nside - ) - return m[vpix] - def polynomial_model(self, X_train, y_train, yerr_train, X_pred): """ Performs a polynomial regression of y with respect to X_train, then @@ -722,36 +734,40 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): containing survey property maps in certain pixels, and y is the selection function measured in those pixels. """ + _, da = import_dask() # Degree of polynomial fit deg = self.config["degree"] # Construct inputs matrix for regression - X_T = [np.ones((1, len(X_train[0]))), X_train] - for n in range(2, deg): - X_T.append(np.power(X_train, n)) - X_T = np.concatenate(X_T, axis=0) - X = X_T.T + m = len(X_train[:, 0]) + ones = da.ones((m, 1), chunks=(X_train.chunks[0], 1)) + X = da.hstack([ones, X_train]) + for n in range(2, deg + 1): + X = da.hstack([X, da.power(X_train, n)]) + X_T = X.T # Inverse variance on y - W = 1 / (np.power(yerr_train, 2)) + W = 1 / (da.power(yerr_train, 2)) # Explicitly compute necessary array combinations - XtWX = (X_T @ (W[:, None] * X)) - XtWy = (X_T @ (W * y_train)) + (XtWX,) = da.compute(X_T @ (W[:, None] * X)) + (XtWy,) = da.compute(X_T @ (W * y_train)) # Compute best-fit coeffs (alphas) and their covariance # NOTE: this assumes a diagonal covariance matrix for y, # i.e. cov_y = diag(W) + alphas = np.linalg.solve(XtWX, XtWy) cov_alphas = np.linalg.inv(XtWX) - alphas = cov_alphas @ XtWy + print(alphas) # Now make predictions at X_pred # Construct inputs matrix for regression - X_T = [np.ones((1, len(X_pred[0]))), X_pred] - for n in range(2, deg): - X_T.append(np.power(X_pred, n)) - X_T = np.concatenate(X_T, axis=0) - X = X_T.T + m = len(X_pred[:,0]) + ones = da.ones((m, 1), chunks=(X_pred.chunks[0], 1)) + X = da.hstack([ones, X_pred]) + for n in range(2, deg + 1): + X = da.hstack([X, da.power(X_pred, n)]) + X_T = X.T y_pred = alphas @ X_T err_y_pred = np.sqrt(np.diag(X @ cov_alphas @ X_T)) From e2f7788b743db0b1bda0af40c29c28a5a9659cd6 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 17:15:07 +0100 Subject: [PATCH 12/43] Removed print statements --- txpipe/auxiliary_maps.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index f3c53b5ff..f69114d18 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -608,7 +608,6 @@ def run(self): block_size = self.config["block_size"] if block_size == 0: block_size = "auto" - print(block_size) pixel_scheme = choose_pixelization(**self.config) @@ -758,7 +757,6 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): # i.e. cov_y = diag(W) alphas = np.linalg.solve(XtWX, XtWy) cov_alphas = np.linalg.inv(XtWX) - print(alphas) # Now make predictions at X_pred # Construct inputs matrix for regression From e3a0c92a8afa5e239dc75294e5f3d7ca535c2359 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 18:29:44 +0100 Subject: [PATCH 13/43] Model parameters and their covariances now saved to file --- txpipe/auxiliary_maps.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index f69114d18..a232e4e61 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -587,6 +587,7 @@ class TXModelSelectionFunction(TXBaseMaps): ] outputs = [ ("sel_func_pred_maps", MapsFile), # Model prediction of selection function and its uncertainties + ("sel_func_model_info", HDFFile), # Best-fit params and their covariance matrix ] config_options = { @@ -705,12 +706,27 @@ def run(self): # Then add the other configuration options metadata.update(pixel_scheme.metadata) + # Prepare metadata for model parameter info + params_metadata = { + "model": f"polynomial (deg = {self.config['degree']})", + "inputs": [ + sf.split('/')[-1] for sf in sysfiles + ] + } + # Write the output maps to the output file with self.open_output("sel_func_pred_maps", wrapper=True) as out: for map_name, hsp_map in hsp_maps.items(): out.write_map(map_name, hsp_map, metadata) out.file["maps"].attrs.update(metadata) + # Write the model parameters and covariances to the output file + with self.open_output("sel_func_model_info", wrapper=True) as out: + gp = out.file.create_group("model_info") + out.file["model_info"].attrs.update(params_metadata) + gp.create_dataset("alphas", data=alphas) + gp.create_dataset("cov_alphas", data=cov_alphas) + def ud_grade(self, hsp_map, pixel_scheme): """ From b090bbed21f2082c3f1a499255aa694b59206813 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 20:54:09 +0100 Subject: [PATCH 14/43] Fixed bug in which attempts to apply any non-zero mask threshold would create a mask with no valid pixels --- txpipe/data_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/txpipe/data_types.py b/txpipe/data_types.py index fb3fe261c..808a5a465 100755 --- a/txpipe/data_types.py +++ b/txpipe/data_types.py @@ -345,8 +345,8 @@ def read_mask( # Setting pixels to the sentinel can be buggy in healsparse # so we'll make a copy of the mask with the < thresh pixels removed mask = hsp.HealSparseMap.make_empty_like(mask_in) - pix_to_keep = mask.valid_pixels[mask[mask.valid_pixels] > thresh] - mask.update_values_pix(pix_to_keep, mask[pix_to_keep]) + pix_to_keep = mask_in.valid_pixels[mask_in[mask_in.valid_pixels] > thresh] + mask.update_values_pix(pix_to_keep, mask_in[pix_to_keep]) else: mask = mask_in From 519fbbfecff24826b57359fde692333e2984852f Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 20:55:13 +0100 Subject: [PATCH 15/43] Added option to apply mask threshold after degrading to desired resolution --- txpipe/auxiliary_maps.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index a232e4e61..1cf600523 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -594,12 +594,15 @@ class TXModelSelectionFunction(TXBaseMaps): "block_size": StageParameter(int, 0, msg="Block size for dask processing (0 means auto)."), "systmaps_dir": StageParameter(str, "", msg="Directory containing systematic maps."), "degree": StageParameter(int, 1, msg="Degree of the polynomial fit."), - "mask_threshold": StageParameter(float, 0.0, msg="Threshold for masking pixels."), + "mask_thresh": StageParameter(float, 0.0, msg="Threshold for masking pixels at native resolution of mask."), + "mask_thresh_coarse": StageParameter(float, 0.0, msg="Threshold for masking pixels after mask is degraded."), + 'inj_count_threshold': StageParameter(int, 1, msg="Exclude pixels containing fewer injections than this number."), **map_config_options } def run(self): import glob + import healpy as hp import healsparse as hsp from functools import reduce # Import dask and alias it as 'da' @@ -622,10 +625,12 @@ def run(self): with self.open_input("mask", wrapper=True) as f: mask = f.read_mask( - thresh=self.config["mask_threshold"], - returnbool=True, + thresh=self.config["mask_thresh"], degrade_nside=pixel_scheme.nside ) + mask_pix = mask.valid_pixels + # Remove pixels below specified coverage fraction after degrading + mask_pix = mask_pix[mask[mask_pix] >= self.config["mask_thresh_coarse"]] # Load survey property maps at the valid pixels root = self.config["systmaps_dir"] @@ -643,7 +648,10 @@ def run(self): # Identfy valid pixels across mask and all SP maps goodpix = reduce( np.intersect1d, - [m.valid_pixels for m in [mask, *spmaps]] + [ + mask_pix, + *[m.valid_pixels for m in spmaps] + ] ) # Select 'training' pixels for computing fit From 9901eff4b690c755822260aff6a6688a1dc4287c Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 21:16:49 +0100 Subject: [PATCH 16/43] Removed custom ud_grade function since upgrading resolution not necessary --- txpipe/auxiliary_maps.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 1cf600523..bc8f904ff 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -573,7 +573,6 @@ class TXModelSelectionFunction(TXBaseMaps): the remainder of the footprint. The model is fit via polynomial regression with respect to the survey property maps, where the degree of the polynomial is specified via the config. - TODO: Make dask-compatible TODO: Make it possible to select the degree of the fit for each SP map individually. TODO: Add other options for the form of the model. """ @@ -619,9 +618,10 @@ def run(self): with self.open_input("sel_func_ssi_maps", wrapper=True) as f: sel_func_meas = f.read_map("selection_function") err_sel_func_meas = f.read_map("err_selection_function") - # Ensure correct resolution - sel_func_meas = self.ud_grade(sel_func_meas, pixel_scheme) - err_sel_func_meas = self.ud_grade(err_sel_func_meas, pixel_scheme) + # Ensure correct resolution (desired resolution can only be lower than + # or equal to the native resolution of the selection function map) + sel_func_meas = sel_func_meas.degrade(pixel_scheme.nside) + err_sel_func_meas = err_sel_func_meas.degrade(pixel_scheme.nside) with self.open_input("mask", wrapper=True) as f: mask = f.read_mask( @@ -633,6 +633,7 @@ def run(self): mask_pix = mask_pix[mask[mask_pix] >= self.config["mask_thresh_coarse"]] # Load survey property maps at the valid pixels + # TODO: Define these as individual inputs rather than glob search? root = self.config["systmaps_dir"] sysfiles = glob.glob(f"{root}*.hs") nsys = len(sysfiles) @@ -645,7 +646,7 @@ def run(self): for sf in sysfiles ] - # Identfy valid pixels across mask and all SP maps + # Identify valid pixels across mask and all SP maps goodpix = reduce( np.intersect1d, [ @@ -735,21 +736,6 @@ def run(self): gp.create_dataset("alphas", data=alphas) gp.create_dataset("cov_alphas", data=cov_alphas) - - def ud_grade(self, hsp_map, pixel_scheme): - """ - Upgrades or degrades a HealSparseMap according to whether its nside - is lower or greater than that of the pixel scheme. - """ - nside_now = hsp_map.nside_sparse - nside_target = pixel_scheme.nside - if nside_now < pixel_scheme.nside: - return hsp_map.upgrade(nside_target) - elif nside_now > pixel_scheme.nside: - return hsp_map.degrade(nside_target) - else: - return hsp_map - def polynomial_model(self, X_train, y_train, yerr_train, X_pred): """ Performs a polynomial regression of y with respect to X_train, then From a75b00aefdaf6e54f1553f45ae5087aec76f7969 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 21:52:42 +0100 Subject: [PATCH 17/43] Moved where coarse mask cut is applied and added a cut in number of injections --- txpipe/auxiliary_maps.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index bc8f904ff..c9d50ca9c 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -595,7 +595,7 @@ class TXModelSelectionFunction(TXBaseMaps): "degree": StageParameter(int, 1, msg="Degree of the polynomial fit."), "mask_thresh": StageParameter(float, 0.0, msg="Threshold for masking pixels at native resolution of mask."), "mask_thresh_coarse": StageParameter(float, 0.0, msg="Threshold for masking pixels after mask is degraded."), - 'inj_count_threshold': StageParameter(int, 1, msg="Exclude pixels containing fewer injections than this number."), + 'inj_count_thresh': StageParameter(int, 1, msg="Exclude pixels containing fewer injections than this number."), **map_config_options } @@ -618,10 +618,13 @@ def run(self): with self.open_input("sel_func_ssi_maps", wrapper=True) as f: sel_func_meas = f.read_map("selection_function") err_sel_func_meas = f.read_map("err_selection_function") + ninj = f.read_map("inj_count") # Ensure correct resolution (desired resolution can only be lower than - # or equal to the native resolution of the selection function map) + # or equal to the native resolution of the selection function map). + # "sum" reduction used for ninj since this is a counts map sel_func_meas = sel_func_meas.degrade(pixel_scheme.nside) err_sel_func_meas = err_sel_func_meas.degrade(pixel_scheme.nside) + ninj = ninj.degrade(pixel_scheme.nside, reduction="sum") with self.open_input("mask", wrapper=True) as f: mask = f.read_mask( @@ -629,8 +632,6 @@ def run(self): degrade_nside=pixel_scheme.nside ) mask_pix = mask.valid_pixels - # Remove pixels below specified coverage fraction after degrading - mask_pix = mask_pix[mask[mask_pix] >= self.config["mask_thresh_coarse"]] # Load survey property maps at the valid pixels # TODO: Define these as individual inputs rather than glob search? @@ -658,6 +659,10 @@ def run(self): # Select 'training' pixels for computing fit # Want to keep only valid pixels across all maps pix_train = np.intersect1d(sel_func_meas.valid_pixels, goodpix) + # Remove any pixels below specified coverage fraction after degrading + pix_train = pix_train[mask[pix_train] >= self.config["mask_thresh_coarse"]] + # Also remove pixels with fewer than the specified no. of injections + pix_train = pix_train[ninj[pix_train] >= self.config["inj_count_thresh"]] # Pixels where selection function is 0 or 1 have zero uncertainty; # these pixels are removed here # TODO: Remove this step when uncertainty calculation is fixed in From 18fe420f075f49b904a91d84c9396eecbd336fe9 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Wed, 29 Apr 2026 21:53:08 +0100 Subject: [PATCH 18/43] Removed TODO --- txpipe/auxiliary_maps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index c9d50ca9c..922b4c03c 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -692,7 +692,6 @@ def run(self): # - 1-sigma uncertainty on these predictions (in each pixel) # - best-fit coefficients for each survey property (+ an intercept term) # - covariance matrix for the best-fit parameters - # TODO: save alphas and cov mat sel_func_pred, err_sel_func_pred, alphas, cov_alphas = self.polynomial_model( spmaps_train, sel_func_train, From d1cd7b5ad065af140c6bac82352de7dc780b73bb Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 09:37:59 +0100 Subject: [PATCH 19/43] Edited comments --- txpipe/auxiliary_maps.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 922b4c03c..77beffe0f 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -763,17 +763,16 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): W = 1 / (da.power(yerr_train, 2)) # Explicitly compute necessary array combinations + # NOTE: this assumes a diagonal covariance matrix for y, + # i.e. cov_y = diag(W) (XtWX,) = da.compute(X_T @ (W[:, None] * X)) (XtWy,) = da.compute(X_T @ (W * y_train)) # Compute best-fit coeffs (alphas) and their covariance - # NOTE: this assumes a diagonal covariance matrix for y, - # i.e. cov_y = diag(W) alphas = np.linalg.solve(XtWX, XtWy) cov_alphas = np.linalg.inv(XtWX) # Now make predictions at X_pred - # Construct inputs matrix for regression m = len(X_pred[:,0]) ones = da.ones((m, 1), chunks=(X_pred.chunks[0], 1)) X = da.hstack([ones, X_pred]) From 3e36da8db2b910fdf8ed44b6399f150ff77d1cd3 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 11:56:29 +0100 Subject: [PATCH 20/43] Removed uncertainty calculation from TXSelectionFunctionSSIMaps --- txpipe/mapping/dr1.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index cb4849656..b9e7333d3 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -269,7 +269,6 @@ def make_dask_selection_function( A dict containing: - pix (dask.array): Unique pixel indices. - sel_func_map (dask.array): Selection function measured from injected sources. - - err_sel_func_map (dask.array): Uncertainty on selection function. - det_count_map (dask.array): Number of detections per pixel. - inj_count_map (dask.array): Total number of injections per pixel. """ @@ -284,17 +283,10 @@ def make_dask_selection_function( ninj = da.bincount(sparse_index, minlength=npix_sparse) # Selection function (N_det / N_tot) selfunc = da.where(ninj != 0, ndet / ninj, hp.UNSEEN) - # Uncertainty on sel. func. (normal approximation) - err_selfunc = da.where( - ninj != 0, - da.sqrt(selfunc * (1 - selfunc) / ninj), - hp.UNSEEN - ) return { "pix": pix, "sel_func_map": selfunc, - "err_sel_func_map": err_selfunc, "det_count_map": ndet, "inj_count_map": ninj } \ No newline at end of file From bc55df75cb16b6f0811d129bb80e01e98feda01e Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 11:57:33 +0100 Subject: [PATCH 21/43] Uncertainties on measured selection function now calculated in situ by TXSelectionFunctionSSIMaps (configurable) --- txpipe/auxiliary_maps.py | 78 +++++++++++++++++++++++++++++----------- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 77beffe0f..28bd09dc0 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -467,7 +467,6 @@ class TXSelectionFunctionSSIMaps(TXBaseMaps): This class generates maps of: - the selection function (in regions where SSI has been done) - - the uncertainty on the measured selection function TODO: Combine with TXAuxiliarySSIMaps? """ @@ -537,7 +536,6 @@ def run(self): ) maps["selection_function"] = sel_func_results["sel_func_map"] - maps["err_selection_function"] = sel_func_results["err_sel_func_map"] maps["det_count"] = sel_func_results["det_count_map"] maps["inj_count"] = sel_func_results["inj_count_map"] @@ -595,7 +593,8 @@ class TXModelSelectionFunction(TXBaseMaps): "degree": StageParameter(int, 1, msg="Degree of the polynomial fit."), "mask_thresh": StageParameter(float, 0.0, msg="Threshold for masking pixels at native resolution of mask."), "mask_thresh_coarse": StageParameter(float, 0.0, msg="Threshold for masking pixels after mask is degraded."), - 'inj_count_thresh': StageParameter(int, 1, msg="Exclude pixels containing fewer injections than this number."), + "inj_count_thresh": StageParameter(int, 1, msg="Exclude pixels containing fewer injections than this number."), + "sel_func_err_type": StageParameter(str, "none", msg="Type of uncertainty attributed to measured selection function."), **map_config_options } @@ -617,13 +616,11 @@ def run(self): # Load the map of the measured selection function and its uncertainties with self.open_input("sel_func_ssi_maps", wrapper=True) as f: sel_func_meas = f.read_map("selection_function") - err_sel_func_meas = f.read_map("err_selection_function") ninj = f.read_map("inj_count") # Ensure correct resolution (desired resolution can only be lower than # or equal to the native resolution of the selection function map). # "sum" reduction used for ninj since this is a counts map sel_func_meas = sel_func_meas.degrade(pixel_scheme.nside) - err_sel_func_meas = err_sel_func_meas.degrade(pixel_scheme.nside) ninj = ninj.degrade(pixel_scheme.nside, reduction="sum") with self.open_input("mask", wrapper=True) as f: @@ -663,21 +660,22 @@ def run(self): pix_train = pix_train[mask[pix_train] >= self.config["mask_thresh_coarse"]] # Also remove pixels with fewer than the specified no. of injections pix_train = pix_train[ninj[pix_train] >= self.config["inj_count_thresh"]] - # Pixels where selection function is 0 or 1 have zero uncertainty; - # these pixels are removed here - # TODO: Remove this step when uncertainty calculation is fixed in - # TXSelectionFunctionSSIMaps - valid_errs = err_sel_func_meas[pix_train] != 0. - pix_train = pix_train[valid_errs] # Select training data sel_func_train = da.from_array(sel_func_meas[pix_train], block_size) block_size = sel_func_train.chunksize - err_sel_func_train = da.from_array(err_sel_func_meas[pix_train], block_size) + ninj_train = da.from_array(ninj[pix_train], block_size) spmaps_train = da.stack( [ da.from_array(m[pix_train], block_size) for m in spmaps ] ).T + # Get uncertainties on training data + err_type = self.config["sel_func_err_type"].lower() + err_sel_func_train = self.sel_func_uncertainties( + sel_func_train, + err_type=err_type, + ninj=(None if err_type == "none" else ninj_train) + ) # Get survey property values across remainder of footprint pix_pred = np.setdiff1d(goodpix, pix_train) @@ -740,8 +738,41 @@ def run(self): gp.create_dataset("alphas", data=alphas) gp.create_dataset("cov_alphas", data=cov_alphas) + def sel_func_uncertainties(self, sel_func, err_type="none", ninj=None): + """ + Estimates uncertainties on the measured selection function. + + Currently two values for err_type are accommodated (case-independent): + - "none": selection function is treated as exact; zero uncertainty + - "gaussian": uncertainties are calculated under a Gaussian approx. + TODO: add other options, e.g. Wilson score interval. + """ + _, da = import_dask() + if err_type == "none": + return da.zeros_like(sel_func) + elif err_type == "gaussian": + return da.sqrt(sel_func * (1 - sel_func) / ninj) + else: + raise ValueError( + "err_type must be either 'none' or 'gaussian'." + ) + + def sel_func_weights(self, err_sel_func): + """ + Converts selection function uncertainties into weights for modelling. + """ + _, da = import_dask() + if all(err_sel_func == 0): + # Return ones if uncertanties are all zero + return da.ones(len(err_sel_func)) + else: + # Return inverse of variance + return 1 / (da.power(err_sel_func, 2)) + def polynomial_model(self, X_train, y_train, yerr_train, X_pred): """ + Polynomial regression and model predictions for test data. + Performs a polynomial regression of y with respect to X_train, then generates predictions at X_pred. In this context, X is an array containing survey property maps in certain pixels, and y is @@ -751,22 +782,28 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): # Degree of polynomial fit deg = self.config["degree"] + # Weights for each training pixel + W = self.sel_func_weights(yerr_train) + # If Gaussian approx was used for uncertainty estimation, pixels in + # which the selection function is 0 or 1 will have zero uncertainty + # (and thus infinite weight); remove these pixels here. + (keep,) = da.compute(da.isfinite(W)) + X_train = X_train[keep, :] + y_train = y_train[keep] + W = W[keep] + # Construct inputs matrix for regression m = len(X_train[:, 0]) ones = da.ones((m, 1), chunks=(X_train.chunks[0], 1)) X = da.hstack([ones, X_train]) for n in range(2, deg + 1): X = da.hstack([X, da.power(X_train, n)]) - X_T = X.T - - # Inverse variance on y - W = 1 / (da.power(yerr_train, 2)) # Explicitly compute necessary array combinations # NOTE: this assumes a diagonal covariance matrix for y, # i.e. cov_y = diag(W) - (XtWX,) = da.compute(X_T @ (W[:, None] * X)) - (XtWy,) = da.compute(X_T @ (W * y_train)) + (XtWX,) = da.compute(X.T @ (W[:, None] * X)) + (XtWy,) = da.compute(X.T @ (W * y_train)) # Compute best-fit coeffs (alphas) and their covariance alphas = np.linalg.solve(XtWX, XtWy) @@ -778,9 +815,8 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): X = da.hstack([ones, X_pred]) for n in range(2, deg + 1): X = da.hstack([X, da.power(X_pred, n)]) - X_T = X.T - y_pred = alphas @ X_T - err_y_pred = np.sqrt(np.diag(X @ cov_alphas @ X_T)) + y_pred = alphas @ X.T + err_y_pred = np.sqrt(np.diag(X @ cov_alphas @ X.T)) return y_pred, err_y_pred, alphas, cov_alphas From c2e5f84f8a0924edb4aa9471bffabd22bae0e1ae Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 14:42:55 +0100 Subject: [PATCH 22/43] Added total injection counts as function og mag threshold to outputs of TXAuxiliarySSIMaps --- txpipe/auxiliary_maps.py | 3 +++ txpipe/mapping/dr1.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 28bd09dc0..ccdeeca12 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -421,6 +421,9 @@ def run(self): maps["depth_det_prob/det_frac_by_mag_thres"] = depth_map_results[ "det_frac_by_mag_thres" ] + maps["depth_det_prob/inj_count_by_mag_thres"] = depth_map_results[ + "inj_count_by_mag_thres" + ] (maps,) = da.compute(maps) diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index b9e7333d3..b37b00f99 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -187,6 +187,7 @@ def make_dask_depth_map_det_prob( # loop over mag bins # TODO: add option to compute fraction *at* each magnitude, rather than below frac_list = [] + ntot_list = [] # Needed for error estimation on det frac (e.g. as done in TXModelSelectionFunction) for mag_thresh in mag_edges: above_thresh = mag < mag_thresh ntot = da.bincount(sparse_index, weights=above_thresh, minlength=npix_sparse) @@ -195,7 +196,9 @@ def make_dask_depth_map_det_prob( ) frac_det = da.where(ntot != 0, ndet / ntot, np.nan) frac_list.append(frac_det) + ntot_list.append(ntot) det_frac_by_mag_thres = da.stack(frac_list) + inj_count_by_mag_thres = da.stack(ntot_list) # Optional smoothing of the stacked detection fractions if smooth_det_frac: @@ -233,6 +236,7 @@ def make_dask_depth_map_det_prob( "inj_count_map": inj_count_map, "depth_map": depth_map, "det_frac_by_mag_thres": det_frac_by_mag_thres, + "inj_count_by_mag_thres": inj_count_by_mag_thres, "mag_edges": mag_edges, } From 4eaae976ff57c535f94de1166a89f45587e2c9fc Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:11:27 +0100 Subject: [PATCH 23/43] TXModelSelectionFunction now models a selection function for each det_frac map produced by TXAuxiliarySSIMaps --- txpipe/auxiliary_maps.py | 210 ++++++++++++++++++++++++++------------- 1 file changed, 139 insertions(+), 71 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index ccdeeca12..aa1c6a6ed 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -581,7 +581,7 @@ class TXModelSelectionFunction(TXBaseMaps): name = "TXModelSelectionFunction" dask_parallel = True inputs = [ - ("sel_func_ssi_maps", MapsFile), # Measured selection function and uncertainties + ("aux_ssi_maps", MapsFile), # Measured selection function and uncertainties ("mask", MapsFile) # Mask defining survey geometry ] @@ -608,18 +608,24 @@ def run(self): from functools import reduce # Import dask and alias it as 'da' _, da = import_dask() + # Assign imports to self to avoid repeated imports later + self.da = da + self.hsp = hsp # Retrieve configuration parameters block_size = self.config["block_size"] if block_size == 0: block_size = "auto" + # Type of uncertainty estimate to use + err_type = self.config["sel_func_err_type"].lower() + pixel_scheme = choose_pixelization(**self.config) # Load the map of the measured selection function and its uncertainties - with self.open_input("sel_func_ssi_maps", wrapper=True) as f: - sel_func_meas = f.read_map("selection_function") - ninj = f.read_map("inj_count") + with self.open_input("aux_ssi_maps", wrapper=True) as f: + sel_func_meas = f.read_map("depth_det_prob/det_frac_by_mag_thres") + ninj = f.read_map("depth_det_prob/inj_count_by_mag_thres") # Ensure correct resolution (desired resolution can only be lower than # or equal to the native resolution of the selection function map). # "sum" reduction used for ninj since this is a counts map @@ -656,63 +662,99 @@ def run(self): ] ) - # Select 'training' pixels for computing fit - # Want to keep only valid pixels across all maps + # Training pixels must also be valid in the selection function pix_train = np.intersect1d(sel_func_meas.valid_pixels, goodpix) # Remove any pixels below specified coverage fraction after degrading pix_train = pix_train[mask[pix_train] >= self.config["mask_thresh_coarse"]] - # Also remove pixels with fewer than the specified no. of injections - pix_train = pix_train[ninj[pix_train] >= self.config["inj_count_thresh"]] - # Select training data - sel_func_train = da.from_array(sel_func_meas[pix_train], block_size) - block_size = sel_func_train.chunksize - ninj_train = da.from_array(ninj[pix_train], block_size) - spmaps_train = da.stack( - [ - da.from_array(m[pix_train], block_size) for m in spmaps - ] - ).T - # Get uncertainties on training data - err_type = self.config["sel_func_err_type"].lower() - err_sel_func_train = self.sel_func_uncertainties( - sel_func_train, - err_type=err_type, - ninj=(None if err_type == "none" else ninj_train) - ) - # Get survey property values across remainder of footprint - pix_pred = np.setdiff1d(goodpix, pix_train) - spmaps_pred = da.stack( - [ - da.from_array(m[pix_pred], block_size) for m in spmaps - ] - ).T - - # Perform polynomial regression and retrieve the following: - # - mean prediction on the selection function (in each pixel) - # - 1-sigma uncertainty on these predictions (in each pixel) - # - best-fit coefficients for each survey property (+ an intercept term) - # - covariance matrix for the best-fit parameters - sel_func_pred, err_sel_func_pred, alphas, cov_alphas = self.polynomial_model( - spmaps_train, - sel_func_train, - err_sel_func_train, - spmaps_pred - ) + # Cycle through the samples for which selection functions have been measured + samples = sel_func_meas.dtype.names + maps = { + "sel_func": [], + "err_sel_func": [] + } + model_info = { + "alphas": [], + "cov_alphas": [] + } + for k in samples: + print(f"Modelling selection function for sample {k}") + + # Remove pixels with fewer than the specified no. of injections + pix_train_k = pix_train[ninj[k][pix_train] >= self.config["inj_count_thresh"]] + print(pix_train_k) + # Select training data + sel_func_train = da.from_array(sel_func_meas[k][pix_train_k], block_size) + block_size = sel_func_train.chunksize + ninj_train = da.from_array(ninj[k][pix_train_k], block_size) + spmaps_train = da.stack( + [ + da.from_array(m[pix_train_k], block_size) for m in spmaps + ] + ).T + # Get uncertainties on training data + err_sel_func_train = self.sel_func_uncertainties( + sel_func_train, + err_type=err_type, + ninj=(None if err_type == "none" else ninj_train) + ) + print(err_sel_func_train) + + # Get survey property values across remainder of footprint + pix_pred = np.setdiff1d(goodpix, pix_train_k) + spmaps_pred = da.stack( + [ + da.from_array(m[pix_pred], block_size) for m in spmaps + ] + ).T + + # Perform polynomial regression and retrieve the following: + # - mean prediction on the selection function (in each pixel) + # - 1-sigma uncertainty on these predictions (in each pixel) + # - best-fit coefficients for each survey property (+ an intercept term) + # - covariance matrix for the best-fit parameters + sel_func_pred, err_sel_func_pred, alphas, cov_alphas = self.polynomial_model( + spmaps_train, + sel_func_train, + err_sel_func_train, + spmaps_pred + ) + + # Combine training data and predictions into single HealSparse maps + pix_all = np.concatenate([pix_train_k, pix_pred]) + sel_func_full = da.concatenate([sel_func_train, sel_func_pred]) + err_sel_func_full = da.concatenate([err_sel_func_train, err_sel_func_pred]) + # Sort by pixel number (will make saving to HealSparse format easier) + inds = pix_all.argsort() + chunk_sizes = sel_func_full.chunks[0] + chunk_bounds = np.cumsum([0] + list(chunk_sizes)) + maps["sel_func"].append(self.dask_sort(sel_func_full, inds, chunk_bounds)) + maps["err_sel_func"].append(self.dask_sort(err_sel_func_full, inds, chunk_bounds)) + # Store model info + model_info["alphas"].append(alphas) + model_info["cov_alphas"].append(cov_alphas) - # Combine training data and predictions into single HealSparse maps - pix_all = np.concatenate([pix_train, pix_pred]) - (sel_func_full,) = da.compute(da.concatenate([sel_func_train, sel_func_pred])) - (err_sel_func_full,) = da.compute(da.concatenate([err_sel_func_train, err_sel_func_pred])) - - hsp_maps = { - "selection_function": self.make_hsp_map( - pix_all, sel_func_full, pixel_scheme - ), - "err_selection_function": self.make_hsp_map( - pix_all, err_sel_func_full, pixel_scheme + (maps,) = da.compute(maps) + + # Convert maps to healsparse map objects + hsp_maps = {} + dtypes = [(k, float) for k in samples] + for n, m in maps.items(): + # Both sel_func and err_sel_func are 2D; save as recarrays + primary = "bin0" + m_hsp = hsp.HealSparseMap.make_empty( + pixel_scheme.nside_coverage, + pixel_scheme.nside, + dtypes, + primary=primary ) - } + m_hsp.update_values_pix( + goodpix, + np.zeros(len(goodpix), dtype=dtypes) + ) + for i,k in enumerate(samples): + m_hsp[k][goodpix] = m[i] + hsp_maps[n] = m_hsp # Prepare metadata for the maps. Copy the pixelization-related # configuration options only here @@ -720,6 +762,12 @@ def run(self): # Then add the other configuration options metadata.update(pixel_scheme.metadata) + # Write the output maps to the output file + with self.open_output("sel_func_pred_maps", wrapper=True) as out: + for map_name, hsp_map in hsp_maps.items(): + out.write_map(map_name, hsp_map, metadata) + out.file["maps"].attrs.update(metadata) + # Prepare metadata for model parameter info params_metadata = { "model": f"polynomial (deg = {self.config['degree']})", @@ -728,18 +776,14 @@ def run(self): ] } - # Write the output maps to the output file - with self.open_output("sel_func_pred_maps", wrapper=True) as out: - for map_name, hsp_map in hsp_maps.items(): - out.write_map(map_name, hsp_map, metadata) - out.file["maps"].attrs.update(metadata) - # Write the model parameters and covariances to the output file with self.open_output("sel_func_model_info", wrapper=True) as out: - gp = out.file.create_group("model_info") + mi = out.file.create_group("model_info") out.file["model_info"].attrs.update(params_metadata) - gp.create_dataset("alphas", data=alphas) - gp.create_dataset("cov_alphas", data=cov_alphas) + for i,k in enumerate(samples): + gp = mi.create_group(k) + gp.create_dataset("alphas", data=model_info["alphas"][i]) + gp.create_dataset("cov_alphas", data=model_info["cov_alphas"][i]) def sel_func_uncertainties(self, sel_func, err_type="none", ninj=None): """ @@ -750,11 +794,10 @@ def sel_func_uncertainties(self, sel_func, err_type="none", ninj=None): - "gaussian": uncertainties are calculated under a Gaussian approx. TODO: add other options, e.g. Wilson score interval. """ - _, da = import_dask() if err_type == "none": - return da.zeros_like(sel_func) + return self.da.zeros_like(sel_func) elif err_type == "gaussian": - return da.sqrt(sel_func * (1 - sel_func) / ninj) + return self.da.sqrt(sel_func * (1 - sel_func) / ninj) else: raise ValueError( "err_type must be either 'none' or 'gaussian'." @@ -764,13 +807,12 @@ def sel_func_weights(self, err_sel_func): """ Converts selection function uncertainties into weights for modelling. """ - _, da = import_dask() if all(err_sel_func == 0): # Return ones if uncertanties are all zero - return da.ones(len(err_sel_func)) + return self.da.ones(len(err_sel_func)) else: # Return inverse of variance - return 1 / (da.power(err_sel_func, 2)) + return 1 / (self.da.power(err_sel_func, 2)) def polynomial_model(self, X_train, y_train, yerr_train, X_pred): """ @@ -781,7 +823,7 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): containing survey property maps in certain pixels, and y is the selection function measured in those pixels. """ - _, da = import_dask() + da = self.da # Degree of polynomial fit deg = self.config["degree"] @@ -823,6 +865,32 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): return y_pred, err_y_pred, alphas, cov_alphas + def dask_sort(self, X, inds, chunk_boundaries): + """ + Reorders a dask array using `inds`, without creating more chunks. + """ + n_chunks = len(chunk_boundaries) - 1 + + # Assign each index in `inds` to a chunk + chunk_ids = np.searchsorted(chunk_boundaries[1:], inds, side='right') + + # Sort inds by chunk, preserving the mapping back to output positions + order = np.argsort(chunk_ids, kind='stable') + inds_sorted = inds[order] + + # Gather from D one chunk at a time + X_sorted = self.da.empty(len(inds), dtype=X.dtype, chunks=X.chunks) + + for c in range(n_chunks): + mask = chunk_ids[order] == c + if not mask.any(): + continue + local_inds = inds_sorted[mask] - chunk_boundaries[c] + chunk_data = X[chunk_boundaries[c]:chunk_boundaries[c+1]] + X_sorted[order[mask]] = chunk_data[local_inds] + + return X_sorted + def make_hsp_map(self, pixels, values, pixel_scheme): """ Creates an empty HealSparseMap and populates it at the specified From 6b75ad556d28abc9a4b8f052c318c7893a862555 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:16:22 +0100 Subject: [PATCH 24/43] Deleted TXSelectionFunctionSSIMaps since now using outputs from TXAuxiliarySSIMaps --- txpipe/auxiliary_maps.py | 101 --------------------------------------- 1 file changed, 101 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index aa1c6a6ed..390415c2d 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -464,107 +464,6 @@ def run(self): out.file["maps"].attrs.update(metadata) -class TXSelectionFunctionSSIMaps(TXBaseMaps): - """ - Generate map of the selection function from SSI catalogs. - - This class generates maps of: - - the selection function (in regions where SSI has been done) - TODO: Combine with TXAuxiliarySSIMaps? - """ - - name = "TXSelectionFunctionSSIMaps" - dask_parallel = True - inputs = [ - ("matched_ssi_photometry_catalog", HDFFile), # injected objects that were detected - ("injection_catalog", HDFFile), # injection locations - ("ssi_detection_catalog", HDFFile), # detection info on each injection - ] - outputs = [ - ("sel_func_ssi_maps", MapsFile), - ] - - config_options = { - "block_size": StageParameter(int, 0, msg="Block size for dask processing (0 means auto)."), - **map_config_options - } - - def run(self): - # Import dask and alias it as 'da' - _, da = import_dask() - import healsparse as hsp - - # Retrieve configuration parameters - block_size = self.config["block_size"] - if block_size == 0: - block_size = "auto" - - # Open the input catalog files - # We can't use "with" statements because we need to keep the file open - # while we're using dask. - f_matched = self.open_input("matched_ssi_photometry_catalog", wrapper=True) - f_inj = self.open_input("injection_catalog", wrapper=True) - f_det = self.open_input("ssi_detection_catalog", wrapper=True) - - # Load matched catalog data into dask arrays. - # This is lazy in dask, so we're not actually loading the data here. - ra = da.from_array(f_matched.file["photometry/ra"], block_size) - block_size = ra.chunksize - dec = da.from_array(f_matched.file["photometry/dec"], block_size) - - # Choose the pixelization scheme based on the configuration. - # Might need to review this to make sure we use the same scheme everywhere - pixel_scheme = choose_pixelization(**self.config) - - # Load detection catalog data into dask arrays. - # This is lazy in dask, so we're not actually loading the data here. - ra_inj = da.from_array(f_inj.file["photometry/ra"], block_size) - dec_inj = da.from_array(f_inj.file["photometry/dec"], block_size) - det = da.from_array(f_det.file[f"photometry/detected"], block_size) - - # Make coverage map for these ra,dec - cov_map = make_coverage_map(ra_inj, dec_inj, pixel_scheme) - - # Initialize a dictionary to store the maps. - # To start with this is all lazy too, until we call compute - maps = {} - - # Create selection function map using injection catalog - sel_func_results = make_dask_selection_function( - ra_inj, - dec_inj, - det, - pixel_scheme, - cov_map - ) - - maps["selection_function"] = sel_func_results["sel_func_map"] - maps["det_count"] = sel_func_results["det_count_map"] - maps["inj_count"] = sel_func_results["inj_count_map"] - - (maps,) = da.compute(maps) - - # convert sparse_map arrays into healsparse map objects - hsp_maps = {} - for name, map in maps.items(): - hsp_maps[name] = hsp.HealSparseMap( - cov_map=cov_map, - sparse_map=map, - nside_sparse=cov_map.nside_sparse, - ) - - # Prepare metadata for the maps. Copy the pixelization-related - # configuration options only here - metadata = {key: self.config[key] for key in map_config_options if key in self.config} - metadata.update(pixel_scheme.metadata) - - # Write the output maps to the output file - with self.open_output("sel_func_ssi_maps", wrapper=True) as out: - for map_name, m in hsp_maps.items(): - out.write_map(map_name, m, metadata) - out.file["maps"].attrs.update(metadata) - - class TXModelSelectionFunction(TXBaseMaps): """ Model selection function across footprint using survey property maps. From 09af75e3c5fc6b630f3f80ceaf58747dba55ede2 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:17:04 +0100 Subject: [PATCH 25/43] Removed unused function --- txpipe/auxiliary_maps.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 390415c2d..6e527e087 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -789,17 +789,3 @@ def dask_sort(self, X, inds, chunk_boundaries): X_sorted[order[mask]] = chunk_data[local_inds] return X_sorted - - def make_hsp_map(self, pixels, values, pixel_scheme): - """ - Creates an empty HealSparseMap and populates it at the specified - pixels with the corresponding values. - """ - import healsparse as hsp - hsp_map = hsp.HealSparseMap.make_empty( - nside_coverage=pixel_scheme.nside_coverage, - nside_sparse=pixel_scheme.nside, - dtype=float - ) - hsp_map[pixels] = values - return hsp_map From c85708d1eb9c9eb7ddc4f72bc3d6f80f1457a69b Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:20:23 +0100 Subject: [PATCH 26/43] Changed ordering of groups/datasets in model_info output file --- txpipe/auxiliary_maps.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 6e527e087..c08bb8191 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -679,10 +679,11 @@ def run(self): with self.open_output("sel_func_model_info", wrapper=True) as out: mi = out.file.create_group("model_info") out.file["model_info"].attrs.update(params_metadata) + gp_alphas = mi.create_group("alphas") + gp_cov = mi.create_group("cov_alphas") for i,k in enumerate(samples): - gp = mi.create_group(k) - gp.create_dataset("alphas", data=model_info["alphas"][i]) - gp.create_dataset("cov_alphas", data=model_info["cov_alphas"][i]) + gp_alphas.create_dataset(k, data=model_info["alphas"][i]) + gp_cov.create_dataset(k, data=model_info["cov_alphas"][i]) def sel_func_uncertainties(self, sel_func, err_type="none", ninj=None): """ From 90728a04ed53f7a090f8a099684676fe4a051681 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:28:53 +0100 Subject: [PATCH 27/43] Tidying up --- txpipe/auxiliary_maps.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index c08bb8191..9660c26f3 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -507,9 +507,8 @@ def run(self): from functools import reduce # Import dask and alias it as 'da' _, da = import_dask() - # Assign imports to self to avoid repeated imports later + # Assign imports to self to avoid repeated imports in other functions self.da = da - self.hsp = hsp # Retrieve configuration parameters block_size = self.config["block_size"] @@ -581,7 +580,6 @@ def run(self): # Remove pixels with fewer than the specified no. of injections pix_train_k = pix_train[ninj[k][pix_train] >= self.config["inj_count_thresh"]] - print(pix_train_k) # Select training data sel_func_train = da.from_array(sel_func_meas[k][pix_train_k], block_size) block_size = sel_func_train.chunksize @@ -597,7 +595,6 @@ def run(self): err_type=err_type, ninj=(None if err_type == "none" else ninj_train) ) - print(err_sel_func_train) # Get survey property values across remainder of footprint pix_pred = np.setdiff1d(goodpix, pix_train_k) @@ -627,8 +624,10 @@ def run(self): inds = pix_all.argsort() chunk_sizes = sel_func_full.chunks[0] chunk_bounds = np.cumsum([0] + list(chunk_sizes)) - maps["sel_func"].append(self.dask_sort(sel_func_full, inds, chunk_bounds)) - maps["err_sel_func"].append(self.dask_sort(err_sel_func_full, inds, chunk_bounds)) + sel_func_full = self.dask_sort(sel_func_full, inds, chunk_bounds) + err_sel_func_full = self.dask_sort(err_sel_func_full, inds, chunk_bounds) + maps["sel_func"].append(sel_func_full) + maps["err_sel_func"].append(err_sel_func_full) # Store model info model_info["alphas"].append(alphas) model_info["cov_alphas"].append(cov_alphas) From e5a084dd67ac0c3bfb33fdb59443cf1e94295968 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:44:44 +0100 Subject: [PATCH 28/43] Removed unused function --- txpipe/mapping/dr1.py | 55 ------------------------------------------- 1 file changed, 55 deletions(-) diff --git a/txpipe/mapping/dr1.py b/txpipe/mapping/dr1.py index b37b00f99..ccd4672b4 100755 --- a/txpipe/mapping/dr1.py +++ b/txpipe/mapping/dr1.py @@ -239,58 +239,3 @@ def make_dask_depth_map_det_prob( "inj_count_by_mag_thres": inj_count_by_mag_thres, "mag_edges": mag_edges, } - - -def make_dask_selection_function( - ra, dec, det, pixel_scheme, cov_map - ): - """ - Generate map of selection function from SSI catalogues. - - Maps the selection function in regions containing synthetically injected - sources. In a given pixel, the selection function is estimated as the number - of detected injections versus the total number of injections. - TODO: Add functionality for more sophisticated estimates, e.g. including - removal of blended injections. - TODO: Avoid the normal approximation for estimating the uncertainties. - - Parameters - ---------- - ra : dask.array - Right Ascension coordinates in degrees for injected sources. - dec : dask.array - Declination coordinates in degrees for injected sources. - det : dask.array - Whether or not the source has been detected. - pixel_scheme : PixelScheme - An object that provides pixelization scheme with methods `npix` and `ang2pix`. - cov_map : HealSparseCoverage - coverage map corresponding to these sources (or a superset of them) - - Returns - ------- - dict - A dict containing: - - pix (dask.array): Unique pixel indices. - - sel_func_map (dask.array): Selection function measured from injected sources. - - det_count_map (dask.array): Number of detections per pixel. - - inj_count_map (dask.array): Total number of injections per pixel. - """ - _, da = import_dask() - pix = pixel_scheme.ang2pix(ra, dec) - - # get the sparse map index for each of these pixel (is dask aware) - sparse_index, npix_sparse = pix2sparseindex(pix, cov_map) - - # Count detected and total injections per pixel - ndet = da.bincount(sparse_index, weights=det, minlength=npix_sparse) - ninj = da.bincount(sparse_index, minlength=npix_sparse) - # Selection function (N_det / N_tot) - selfunc = da.where(ninj != 0, ndet / ninj, hp.UNSEEN) - - return { - "pix": pix, - "sel_func_map": selfunc, - "det_count_map": ndet, - "inj_count_map": ninj - } \ No newline at end of file From 0c36714a98216cf207331cbe748254bde4e86fb9 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:45:43 +0100 Subject: [PATCH 29/43] Removed import of deleted function --- txpipe/auxiliary_maps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 9660c26f3..d6c3fad3b 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -8,7 +8,6 @@ make_dask_bright_object_map, make_dask_depth_map, make_dask_depth_map_det_prob, - make_dask_selection_function, ) from .data_types import MapsFile, HDFFile, ShearCatalog from .utils import choose_pixelization, import_dask From 2097b26f7a4079563a3b3dfdec7d88072685cf25 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 30 Apr 2026 18:51:20 +0100 Subject: [PATCH 30/43] Removed deleted function from mapping/__init__.py --- txpipe/mapping/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/txpipe/mapping/__init__.py b/txpipe/mapping/__init__.py index 48037166e..9da660c42 100755 --- a/txpipe/mapping/__init__.py +++ b/txpipe/mapping/__init__.py @@ -2,7 +2,6 @@ make_dask_bright_object_map, make_dask_depth_map, make_dask_depth_map_det_prob, - make_dask_selection_function ) from .basic_maps import ( make_dask_flag_maps, From 11af4f7ce67f9ef56d112482cdbec1bd4b783198 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 7 May 2026 10:56:34 +0100 Subject: [PATCH 31/43] Weighted mean now used when degrading selection function --- txpipe/auxiliary_maps.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index d6c3fad3b..7d27c6cd5 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -526,7 +526,11 @@ def run(self): # Ensure correct resolution (desired resolution can only be lower than # or equal to the native resolution of the selection function map). # "sum" reduction used for ninj since this is a counts map - sel_func_meas = sel_func_meas.degrade(pixel_scheme.nside) + sel_func_meas = sel_func_meas.degrade( + pixel_scheme.nside, + reduction="wmean", + weights=ninj + ) ninj = ninj.degrade(pixel_scheme.nside, reduction="sum") with self.open_input("mask", wrapper=True) as f: From 27bb13b6a4a3e63efc31c4337e7feca905566a97 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 7 May 2026 11:56:33 +0100 Subject: [PATCH 32/43] Now uses hsp.operations.product_intersection when determining valid pixels across maps --- txpipe/auxiliary_maps.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 7d27c6cd5..f15c15270 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -555,13 +555,8 @@ def run(self): ] # Identify valid pixels across mask and all SP maps - goodpix = reduce( - np.intersect1d, - [ - mask_pix, - *[m.valid_pixels for m in spmaps] - ] - ) + goodmap = hsp.operations.product_intersection([mask] + spmaps) + goodpix = goodmap.valid_pixels # Training pixels must also be valid in the selection function pix_train = np.intersect1d(sel_func_meas.valid_pixels, goodpix) From 149ba557d7375cebeb54f364c2f727e7378e3110 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 7 May 2026 11:57:15 +0100 Subject: [PATCH 33/43] Removed now unused mask_pix --- txpipe/auxiliary_maps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index f15c15270..989a26402 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -538,7 +538,6 @@ def run(self): thresh=self.config["mask_thresh"], degrade_nside=pixel_scheme.nside ) - mask_pix = mask.valid_pixels # Load survey property maps at the valid pixels # TODO: Define these as individual inputs rather than glob search? From da16179c86638063ca2b97fd0cb37dd3dc52d65d Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 7 May 2026 12:00:02 +0100 Subject: [PATCH 34/43] Added possible argument values to sel_func_err_type description --- txpipe/auxiliary_maps.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 989a26402..b8b5b3e60 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -495,7 +495,11 @@ class TXModelSelectionFunction(TXBaseMaps): "mask_thresh": StageParameter(float, 0.0, msg="Threshold for masking pixels at native resolution of mask."), "mask_thresh_coarse": StageParameter(float, 0.0, msg="Threshold for masking pixels after mask is degraded."), "inj_count_thresh": StageParameter(int, 1, msg="Exclude pixels containing fewer injections than this number."), - "sel_func_err_type": StageParameter(str, "none", msg="Type of uncertainty attributed to measured selection function."), + "sel_func_err_type": StageParameter( + str, + "none", + msg="Type of uncertainty attributed to measured selection function. Can be either 'none' (assumes no " + "uncertainty) or 'gaussian' (uses a Gaussian approximation to estimate uncertainties)."), **map_config_options } From fde4dd7be5e28e799c80e7408168de4e9f04b7ba Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Thu, 7 May 2026 18:00:55 +0100 Subject: [PATCH 35/43] Added more detailed docstrings for all methods --- txpipe/auxiliary_maps.py | 110 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 103 insertions(+), 7 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index b8b5b3e60..f7af6d4e6 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -689,10 +689,28 @@ def sel_func_uncertainties(self, sel_func, err_type="none", ninj=None): """ Estimates uncertainties on the measured selection function. - Currently two values for err_type are accommodated (case-independent): - - "none": selection function is treated as exact; zero uncertainty - - "gaussian": uncertainties are calculated under a Gaussian approx. - TODO: add other options, e.g. Wilson score interval. + Parameters + ---------- + sel_func: dask.array, shape=(N,) + Selection function computed in TXAuxiliarySSIMaps, evaluated at pixels + which are valid in the N pixels which have at least the requisite + number of injections and are valid in all survey property maps. + + err_type: str, optional (default="none") + Determines the treatment of uncertainties on the measured selection + function. Currently two values for err_type are accommodated (case-independent): + - "none": selection function is treated as exact; zero uncertainty + - "gaussian": uncertainties are calculated under a Gaussian approx. + TODO: add other options, e.g. Wilson score interval. + + ninj: dask.array (shape=N,) or None; optional (default=None) + Total injected counts in each of the pixels covered by sel_func. Only required + if err_type=="gaussian". + + Returns + ------- + dask.array, shape=(N,) + Estimates of the uncertainty on the measured selection function in each pixel. """ if err_type == "none": return self.da.zeros_like(sel_func) @@ -706,6 +724,19 @@ def sel_func_uncertainties(self, sel_func, err_type="none", ninj=None): def sel_func_weights(self, err_sel_func): """ Converts selection function uncertainties into weights for modelling. + + Parameters + ---------- + err_sel_func: dask.array, shape=(N,) + Uncertainty on the measured selection function in the N pixels which have at + least the requisite number of injections and are valid in all survey property + maps. + + Returns + ------- + dask.array, shape=(N,) + Weights corresponding to the uncertainty on the measured selection function in + each pixel. """ if all(err_sel_func == 0): # Return ones if uncertanties are all zero @@ -722,6 +753,49 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): generates predictions at X_pred. In this context, X is an array containing survey property maps in certain pixels, and y is the selection function measured in those pixels. + + Parameters + ---------- + X_train: dask.array, shape=(N_train, N_sys) + Survey property maps evaluated at the N_train pixels that have been selected + for use as training data for the model. + + y_train: dask.array, shape=(N_train,) + Measured selection function in the N_train training data pixels. + + yerr_train: dask.array, shape=(N_train,) + Uncertainties on the measured selection function in the N_train training data + pixels. + + X_pred: dask.array, shape=(N_pred, N_sys) + Survey property maps evaluated at all pixels in the survey footprint (as defined + by the mask) not included in the training set, which are also valid across all + survey property maps. I.e. if the footprint comprises N_tot of these valid pixels, + then N_pred = N_tot - N_train. + + Returns + ------- + y_pred: dask.array, shape=(N_pred,) + Model predictions for the selection function in the N_pred pixels at which the + survey property maps were evaluated to construct X_pred. + + yerr_pred: dask.array, shape=(N_pred,) + Uncertainties on the model preditions for the selection function. + + alphas: numpy.array, shape=(deg*N_sys+1,) + Best-fit parameters from the model. Shape will depend on the degree of the + polynomial (deg) specified in the config file. The first parameter is the bias + term; the next N_sys parameters correspond to each survey property map. If + deg >= 2, then the following N_sys parameters correspond to the squares of the + survey property maps, etc. The indices of the parameters corresponding to + the i'th survey property, its square, its cube, etc., can be obtained via: + `inds = [1 + i + d * N_sys for d in range(deg)].` + The names of the survey property maps are saved in the correct order as part of + the outputs of this stage so that they can be associated with their corresponding + parameters. + + cov_alphas: numpy.array, shape=(deg*N_sys+1, deg*N_sys+1) + Covariance matrix of the best-fit parameters. """ da = self.da # Degree of polynomial fit @@ -761,13 +835,35 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): for n in range(2, deg + 1): X = da.hstack([X, da.power(X_pred, n)]) y_pred = alphas @ X.T - err_y_pred = np.sqrt(np.diag(X @ cov_alphas @ X.T)) + yerr_pred = np.sqrt(np.diag(X @ cov_alphas @ X.T)) - return y_pred, err_y_pred, alphas, cov_alphas + return y_pred, yerr_pred, alphas, cov_alphas def dask_sort(self, X, inds, chunk_boundaries): """ Reorders a dask array using `inds`, without creating more chunks. + + This is motivated by wanting to order certain dask arrays by their corresponding + HEALPix pixel indices, which can result in dask creating many more chunks and + slowing down subsequent computations. This function sorts the pixel indices by chunk, + and then gathers from the target array one chunk at a time. + + Parameters + ---------- + X: dask.array, shape=(N,) + Dask array to be sorted according to `inds`. + + inds: numpy.array, shape=(N,) + Array containing the indices of X in order of ascending HEALPix pixel ID. + + chunk_boundaries: numpy.array, shape=(ceil(N/m)+1,) + Boundaries of each chunk of size m, including the left edge and excluding the right. + E.g. The first chunk will have boundaries [0, m), the next will have [m, 2*m), etc. + + Returns + ------- + X_sorted: dask.array, shape=(N,) + Sorted version of the input array """ n_chunks = len(chunk_boundaries) - 1 @@ -778,7 +874,7 @@ def dask_sort(self, X, inds, chunk_boundaries): order = np.argsort(chunk_ids, kind='stable') inds_sorted = inds[order] - # Gather from D one chunk at a time + # Gather from X one chunk at a time X_sorted = self.da.empty(len(inds), dtype=X.dtype, chunks=X.chunks) for c in range(n_chunks): From 82943432194fb25c06721443dca1f122addb473e Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Fri, 8 May 2026 14:38:06 +0100 Subject: [PATCH 36/43] Changed built-in all() to dask.array.all() --- txpipe/auxiliary_maps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index f7af6d4e6..1782fbc09 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -738,7 +738,7 @@ def sel_func_weights(self, err_sel_func): Weights corresponding to the uncertainty on the measured selection function in each pixel. """ - if all(err_sel_func == 0): + if (err_sel_func == 0).all(): # Return ones if uncertanties are all zero return self.da.ones(len(err_sel_func)) else: From 5075994e82461c2d27179c4e2dd98ab040b15b83 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Fri, 8 May 2026 14:57:53 +0100 Subject: [PATCH 37/43] Added option to specify for which bins the selection function will be modelled --- txpipe/auxiliary_maps.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 1782fbc09..ee926c5e5 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -499,7 +499,14 @@ class TXModelSelectionFunction(TXBaseMaps): str, "none", msg="Type of uncertainty attributed to measured selection function. Can be either 'none' (assumes no " - "uncertainty) or 'gaussian' (uses a Gaussian approximation to estimate uncertainties)."), + "uncertainty) or 'gaussian' (uses a Gaussian approximation to estimate uncertainties)." + ), + "bins_to_model": StageParameter( + list, + [-1], + msg="List of depth bins for which the selection function is to be modelled. -1 means selection functions will be " + "modelled for every bin for which fracdet maps were created in TXAuxiliarySSIMaps." + ), **map_config_options } @@ -566,8 +573,13 @@ def run(self): # Remove any pixels below specified coverage fraction after degrading pix_train = pix_train[mask[pix_train] >= self.config["mask_thresh_coarse"]] - # Cycle through the samples for which selection functions have been measured - samples = sel_func_meas.dtype.names + # Determine which samples are to have their selection functions modelled + if self.config["bins_to_model"] == [-1]: + samples = sel_func_meas.dtype.names + else: + samples = [f"bin{k}" for k in self.config["bins_to_model"]] + print(samples) + # Cycle through the samples for which selection functions are to be modelled maps = { "sel_func": [], "err_sel_func": [] From c6449d64afdf454b8e7a7a348c15f60ddbc7270e Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Fri, 8 May 2026 15:32:00 +0100 Subject: [PATCH 38/43] Added error handling for case where there are zero valid pixels in the training data --- txpipe/auxiliary_maps.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index ee926c5e5..255bbfd78 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -593,6 +593,9 @@ def run(self): # Remove pixels with fewer than the specified no. of injections pix_train_k = pix_train[ninj[k][pix_train] >= self.config["inj_count_thresh"]] + # Check there is at least one pixel which has survived all cuts + errmsg = f"Cannot model selection function for sample {k}; no valid pixels." + assert len(pix_train_k) > 0, errmsg # Select training data sel_func_train = da.from_array(sel_func_meas[k][pix_train_k], block_size) block_size = sel_func_train.chunksize From de74cca004c8d70b97c5fbb407f1b3877bae0089 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Fri, 8 May 2026 15:32:39 +0100 Subject: [PATCH 39/43] Removed print statement --- txpipe/auxiliary_maps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 255bbfd78..6847b59f5 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -578,7 +578,7 @@ def run(self): samples = sel_func_meas.dtype.names else: samples = [f"bin{k}" for k in self.config["bins_to_model"]] - print(samples) + # Cycle through the samples for which selection functions are to be modelled maps = { "sel_func": [], From c73ddb82063a9f13a62829fec39e86abd8ef4084 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Mon, 11 May 2026 11:10:43 +0100 Subject: [PATCH 40/43] err_type now included as an argument in most methods (avoids need to expicitly compute uncertainties earleir than needed) --- txpipe/auxiliary_maps.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 6847b59f5..715457875 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -629,7 +629,8 @@ def run(self): spmaps_train, sel_func_train, err_sel_func_train, - spmaps_pred + spmaps_pred, + err_type ) # Combine training data and predictions into single HealSparse maps @@ -736,16 +737,22 @@ def sel_func_uncertainties(self, sel_func, err_type="none", ninj=None): "err_type must be either 'none' or 'gaussian'." ) - def sel_func_weights(self, err_sel_func): + def sel_func_weights(self, err_type="none", err_sel_func=None): """ Converts selection function uncertainties into weights for modelling. Parameters ---------- - err_sel_func: dask.array, shape=(N,) + err_type: str, optional (default="none") + Determines the treatment of uncertainties on the measured selection + function. Currently two values for err_type are accommodated (case-independent): + - "none": selection function is treated as exact; zero uncertainty + - "gaussian": uncertainties are calculated under a Gaussian approx. + + err_sel_func: dask.array, shape=(N,) or None; optional (default=None) Uncertainty on the measured selection function in the N pixels which have at least the requisite number of injections and are valid in all survey property - maps. + maps. Only required if err_type=="gaussian". Returns ------- @@ -753,14 +760,14 @@ def sel_func_weights(self, err_sel_func): Weights corresponding to the uncertainty on the measured selection function in each pixel. """ - if (err_sel_func == 0).all(): - # Return ones if uncertanties are all zero + if err_type == "none": + # Return ones if uncertanties are treated as being zero return self.da.ones(len(err_sel_func)) else: # Return inverse of variance return 1 / (self.da.power(err_sel_func, 2)) - def polynomial_model(self, X_train, y_train, yerr_train, X_pred): + def polynomial_model(self, X_train, y_train, yerr_train, X_pred, err_type="none"): """ Polynomial regression and model predictions for test data. @@ -788,6 +795,12 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): survey property maps. I.e. if the footprint comprises N_tot of these valid pixels, then N_pred = N_tot - N_train. + err_type: str, optional (default="none") + Determines the treatment of uncertainties on the measured selection + function. Currently two values for err_type are accommodated (case-independent): + - "none": selection function is treated as exact; zero uncertainty + - "gaussian": uncertainties are calculated under a Gaussian approx. + Returns ------- y_pred: dask.array, shape=(N_pred,) @@ -817,7 +830,7 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred): deg = self.config["degree"] # Weights for each training pixel - W = self.sel_func_weights(yerr_train) + W = self.sel_func_weights(err_type, yerr_train) # If Gaussian approx was used for uncertainty estimation, pixels in # which the selection function is 0 or 1 will have zero uncertainty # (and thus infinite weight); remove these pixels here. From 703cef6ea60d6334551d59b1061be2439a7154ad Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Mon, 11 May 2026 11:26:53 +0100 Subject: [PATCH 41/43] Circumvents costly matrix multiplication when computing uncertainties on model predictions --- txpipe/auxiliary_maps.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 715457875..0594d0b12 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -863,7 +863,9 @@ def polynomial_model(self, X_train, y_train, yerr_train, X_pred, err_type="none" for n in range(2, deg + 1): X = da.hstack([X, da.power(X_pred, n)]) y_pred = alphas @ X.T - yerr_pred = np.sqrt(np.diag(X @ cov_alphas @ X.T)) + # The following is a cheaper equivalent of doing + # `sqrt(diag(X @ cov_alphas @ X.T))` + yerr_pred = (X @ cov_alphas * X).sum(axis=1) return y_pred, yerr_pred, alphas, cov_alphas From 4e440f6d39bcd64f5192ac86932a69b63008bde1 Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Mon, 11 May 2026 15:15:49 +0100 Subject: [PATCH 42/43] Changed name of class to TXPredictDensitySelectionFunction --- txpipe/auxiliary_maps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/txpipe/auxiliary_maps.py b/txpipe/auxiliary_maps.py index 0594d0b12..202047562 100644 --- a/txpipe/auxiliary_maps.py +++ b/txpipe/auxiliary_maps.py @@ -463,7 +463,7 @@ def run(self): out.file["maps"].attrs.update(metadata) -class TXModelSelectionFunction(TXBaseMaps): +class TXPredictDensitySelectionFunction(TXBaseMaps): """ Model selection function across footprint using survey property maps. @@ -476,7 +476,7 @@ class TXModelSelectionFunction(TXBaseMaps): TODO: Add other options for the form of the model. """ - name = "TXModelSelectionFunction" + name = "TXPredictDensitySelectionFunction" dask_parallel = True inputs = [ ("aux_ssi_maps", MapsFile), # Measured selection function and uncertainties From b2093d1c554d6d17ea84b781d663d72f863de71a Mon Sep 17 00:00:00 2001 From: Thomas Cornish Date: Mon, 11 May 2026 15:52:01 +0100 Subject: [PATCH 43/43] Added example pipeline and config --- examples/ssi/config_ssi_sel_func.yml | 39 +++++++++++++++++++++++ examples/ssi/pipeline_ssi_sel_func.yml | 43 ++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 examples/ssi/config_ssi_sel_func.yml create mode 100644 examples/ssi/pipeline_ssi_sel_func.yml diff --git a/examples/ssi/config_ssi_sel_func.yml b/examples/ssi/config_ssi_sel_func.yml new file mode 100644 index 000000000..d6471a7ae --- /dev/null +++ b/examples/ssi/config_ssi_sel_func.yml @@ -0,0 +1,39 @@ +# Values in this section are accessible to all the different stages. +# They can be overridden by individual stages though. +global: + # This is read by many stages that read complete + # catalog data, and tells them how many rows to read + # at once + chunk_rows: 100_000 + # These mapping options are also read by a range of stages + pixelization: healpix + nside: 256 + sparse: true # Generate sparse maps - faster if using small areas + + +TXIngestSSIDetectionDESBalrog: + name: TXIngestSSIDetectionDESBalrog + +TXIngestSSIMatchedDESBalrog: + name: TXIngestSSIMatchedDESBalrog + +TXAuxiliarySSIMaps: + name: TXAuxiliarySSIMaps + mag_delta: 0.5 + smooth_det_frac: false + min_depth: 23 + max_depth: 24 + smooth_window: 2.0 + + +TXPredictDensitySelectionFunction: + name: TXPredictDensitySelectionFunction + systmaps_dir: /global/cfs/cdirs/lsst/groups/WLSS/users/tcornish/balrog_txpipe_example/survey_properties/ + degree: 2 + mask_thresh: 0. + mask_thresh_coarse: 0. + inj_count_thresh: 1 + sel_func_err_type: gaussian + block_size: 2_000 + bins_to_model: [0,1] + diff --git a/examples/ssi/pipeline_ssi_sel_func.yml b/examples/ssi/pipeline_ssi_sel_func.yml new file mode 100644 index 000000000..c7bd15184 --- /dev/null +++ b/examples/ssi/pipeline_ssi_sel_func.yml @@ -0,0 +1,43 @@ +# Stages to run +stages: + - name: TXIngestSSIDetectionDESBalrog + - name: TXIngestSSIMatchedDESBalrog + - name: TXAuxiliarySSIMaps + - name: TXPredictDensitySelectionFunction + +modules: txpipe + +# Where to put outputs +output_dir: /pscratch/sd/t/tcornish/txpipe_out/ + +# How to run the pipeline: mini, parsl, or cwl +launcher: + name: mini + interval: 1.0 + +# Where to run the pipeline: cori-interactive, cori-batch, or local +site: + name: local + max_threads: 2 + +# configuration settings +config: examples/ssi/config_ssi_sel_func.yml + +inputs: + # See README for paths to download these files + + #NERSC locations of public DES balrog data + balrog_detection_catalog: /global/cfs/cdirs/lsst/groups/WLSS/users/tcornish/balrog_txpipe_example/balrog_detection_catalog_sof_y3-merged_v1.2.fits + balrog_matched_catalog: /global/cfs/cdirs/lsst/groups/WLSS/users/tcornish/balrog_txpipe_example/balrog_matched_catalog_sof_y3-merged_v1.2.fits + mask: /global/cfs/cdirs/lsst/groups/WLSS/users/tcornish/balrog_txpipe_example/des_y3_mask.hdf5 + #aux_ssi_maps: /home/cornisht/cloned_repos/TXPipe/data/example/outputs_ssi_des_depth/aux_ssi_maps.hdf5 + + fiducial_cosmology: data/fiducial_cosmology.yml + +# if supported by the launcher, restart the pipeline where it left off +# if interrupted +resume: false +# where to put output logs for individual stages +log_dir: data/example/outputs_ssi_des_sel_func/logs/ +# where to put an overall parsl pipeline log +pipeline_log: data/example/outputs_ssi_des_sel_func/log.txt