From 03c7b0165b946821e559352aa78193ddaaef4486 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 29 Apr 2026 17:01:39 +0100 Subject: [PATCH 1/8] Reapply "Work in progress on desy3 config" This reverts commit e1dca2634e1a1547965773e98fb3cf45ec8260dd. --- examples/desy3/config.yml | 397 +++++++++++++++++------------------- examples/desy3/pipeline.yml | 130 ++++++++++-- 2 files changed, 298 insertions(+), 229 deletions(-) diff --git a/examples/desy3/config.yml b/examples/desy3/config.yml index 1094244cd..3b45235c4 100644 --- a/examples/desy3/config.yml +++ b/examples/desy3/config.yml @@ -1,225 +1,196 @@ -# Values in this section are accessible to all the different stages. -# They can be overridden by individual stages though. global: - # This is read by many stages that read complete - # catalog data, and tells them how many rows to read - # at once - chunk_rows: 1000000000 - # These mapping options are also read by a range of stages pixelization: healpix - nside: 128 - sparse: True # Generate sparse maps - faster if using small areas - -# Here's the list of stages we are using: -# -# TXSourceSelectorMetacal -# TXShearCalibration -# TXTruthLensCatalogSplitter -# TXStarCatalogSplitter -# TXPhotozPlots -# TXSourceMaps -# TXLensMaps -# TXAuxiliarySourceMaps -# TXAuxiliaryLensMaps -# TXSourceNoiseMaps -# TXDensityMaps -# TXMapPlots -# TXTracerMetadata -# TXJackknifeCenters -# TXTwoPoint -# TXBlinding -# TXTwoPointTheoryReal -# TXTwoPointPlots -# TXSourceDiagnosticPlots -# TXLensDiagnosticPlots -# TXGammaTFieldCenters # Compute and plot gamma_t around center points -# TXGammaTStars # Compute and plot gamma_t around bright stars -# TXGammaTRandoms # Compute and plot gamma_t around randoms -# TXRoweStatistics # Compute and plot Rowe statistics -# TXGalaxyStarDensity # Compute and plot the star-galaxy density cross-correlation -# TXGalaxyStarShear # Compute and plot the star-galaxy shear cross-correlation -# TXPSFDiagnostics # Compute and plots other PSF diagnostics -# TXBrighterFatterPlot # Make plots tracking the brighter-fatter effect -# TXConvergenceMaps # Make convergence kappa maps from g1, g2 maps -# TXConvergenceMapPlots # Plot the convergence map -# TXMapCorrelations # plot the correlations between systematics and data -# TXApertureMass # Compute aperture-mass statistics - -##- name: TXRealGaussianCovariance # Compute covariance of real-space correlations -##- name: TXTwoPointFourier # Compute power spectra C_ell -##- name: TXFourierNamasterCovariance # Compute the C_ell covariance -##- name: TXFourierTJPCovariance # Compute the C_ell covariance with TJPCov + chunk_rows: 1_000_000 #default for TXPipe stages (can be overwritten in individual stages) + chunk_size: 100_000 #default for RAIL stages (can be overwritten in individual stages) + sparse: true + nside: 4096 + + +TXIngestDESY3Gold: + name: TXIngestDESY3Gold + input_group_name: catalog/gold/ + chunk_rows: 10_000_000 + +TXIngestDESY3Footprint: + name: TXIngestDESY3Footprint + input_nside: 4096 + input_filepaths: + - data/desy3/inputs/patch_maps/y3a2_footprint_griz_1exp_v2.0_healsparse.hs + - data/desy3/inputs/patch_maps/y3a2_griz_o.4096_t.32768_coverfoot_EQU_healsparse.hs + - data/desy3/inputs/patch_maps/y3a2_foreground_mask_v2.1_healsparse.hs + - data/desy3/inputs/patch_maps/y3a2_badregions_mask_v2.1_healsparse.hs + - data/desy3/inputs/patch_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highdens_vl05_vlim_zmask_ZMAX.hs + - data/desy3/inputs/patch_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highlum_vl10_vlim_zmask_ZMAX.hs + - data/desy3/inputs/patch_maps/y3a2_gold_2_2_1_sof_nside4096_nest_i_depth.hs + + input_labels: + - footprint/footprint + - footprint/fracdet_griz + - foreground/foreground + - foreground/badregions + - zmax/highdens + - zmax/highlum + - depth/depth_i + +TXIngestDESY3SpeczCat: + name: TXIngestDESY3SpeczCat + + +TXCustomMask: + name: TXCustomMask + fracdet_name: footprint/fracdet_griz + nside: 4096 + #cuts from Rodriguez-Monroy et al https://arxiv.org/abs/2105.13540 + cuts: + - "footprint/footprint == 1" + - "footprint/fracdet_griz > 0.8" + - "foreground/foreground == 0" + - "foreground/badregions <= 1" + - "zmax/highdens >= 0.65" + - "zmax/highlum >= 0.95" + - "depth/depth_i > 22.2" + degrade: True + + + +PZPrepareEstimatorLens: + name: PZPrepareEstimatorLens + classname: DNFInformer + hdf5_groupname: photometry + redshift_col: redshift + bands: ['mag_g', 'mag_r', 'mag_i', 'mag_z'] + err_bands: ['mag_err_g', 'mag_err_r', 'mag_err_i', 'mag_err_z' ] + mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6} + +PZEstimatorLens: + name: PZEstimatorLens + classname: DNFEstimator + hdf5_groupname: photometry + redshift_col: redshift + bands: ['mag_g', 'mag_r', 'mag_i', 'mag_z'] + err_bands: ['mag_err_g', 'mag_err_r', 'mag_err_i', 'mag_err_z' ] + mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6} + selection_mode: 2 #0: ENF, 1: ANF, 2: DNF see https://github.com/LSSTDESC/rail_dnf + nondetect_val: .inf + +PZRailSummarizeLens: + name: PZRailSummarizeLens + classname: PZRailPZSummarize + catalog_group: lens + tomography_name: lens + nsamples: 0 + zmin: 0.0 + zmax: 2.0 + nzbins: 150 + point_estimate_key: DNF_ZN + hdf5_groupname: tomography + summarizer: PointEstHistMaskedSummarizer + module: "rail.estimation.algos.point_est_hist" + chunk_size: 100_000 + +PZRailSummarizeSource: + name: PZRailSummarizeSource + classname: PZRailPZSummarize + catalog_group: source + tomography_name: source + nsamples: 0 + zmin: 0.0 + zmax: 2.0 + nzbins: 150 + point_estimate_key: DNF_ZN + hdf5_groupname: tomography + summarizer: PointEstHistMaskedSummarizer + module: "rail.estimation.algos.point_est_hist" + chunk_size: 100_000 + +TXPhotozPlotSource: + name: TXPhotozPlotSource + +TXPhotozPlotLens: + name: TXPhotozPlotLens + +PZRealizationsPlotSource: + name: PZRealizationsPlotSource + + +TXCustomLensSelector: + name: TXCustomLensSelector + lens_zbin_edges: [0.20, 0.40, 0.55, 0.70, 0.85, 0.95, 1.05] + zcol: DNF_Z + selection_type: DESmaglim + maglim_band: i + bright_limit: 17.5 + a: 4 + b: 18 + extra_cols: [EXTENDED_CLASS_SOF, FLAGS_GOLD] #extra columns that are used in the sample selection #TO DO: make this automatic + apply_mask: True + + +TXLSSWeightsLinBinned: + name: TXLSSWeightsLinBinned + supreme_path_root: data/desy3/inputs/patch_survey_property_maps/ + nbin: 10 + outlier_fraction: 0.05 + pvalue_threshold: 0.05 # max p-value for maps to be included in the corrected (a very simple form of regularization) + allow_weighted_input: False + nside_coverage: 32 + +TXLSSDensityNullTests: + name: TXLSSDensityNullTests + supreme_path_root: data/desy3/inputs/patch_survey_property_maps/ + nbin: 10 + outlier_fraction: 0.05 + pvalue_threshold: 0.05 # max p-value for maps to be included in the corrected (a very simple form of regularization) + equal_area_bins: True # if you are using binned 1d correlations, should the bins have equal area (set False for equal spacing) + simple_cov: True # if True will use a diagonal shot noise only covariance for the 1d relations + diag_blocks_only: True # If True, will compute only the diagonal blocks of the 1D covariance matrix (no correlation between SP maps) + b0: [1.5, 1.8, 1.8, 1.9, 2.3, 2.3] + allow_weighted_input: False + nside_coverage: 32 + +TXLensCatalogSplitter: + name: TXLensCatalogSplitter + redshift_column: DNF_Z + +PZPrepareEstimatorSource: + name: PZPrepareEstimatorSource + classname: DNFInformer + hdf5_groupname: photometry + redshift_col: redshift + bands: ['mag_r', 'mag_i', 'mag_z'] + err_bands: ['mag_err_r', 'mag_err_i', 'mag_err_z' ] + mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6} + +PZEstimatorSource: + name: PZEstimatorSource + classname: DNFEstimator + hdf5_groupname: shear + redshift_col: redshift + bands: ['mag_r', 'mag_i', 'mag_z'] + err_bands: ['mag_err_r', 'mag_err_i', 'mag_err_z' ] + mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6} + selection_mode: 2 #0: ENF, 1: ANF, 2: DNF see https://github.com/LSSTDESC/rail_dnf + nondetect_val: .inf + +TXSourceTomography: + bands: ["r", "i", "z"] + source_zbin_edges: [0.0, 0.2, 0.4, 0.6, 0.8] + seed: 6548756 + spec_mag_column_format: "photometry/mag_{band}" + TXSourceSelectorMetacal: - input_pz: True + input_pz: False true_z: False - bands: riz #used for selection + bands: ["r", "i", "z"] T_cut: 0.5 s2n_cut: 10.0 max_rows: 10000000000 delta_gamma: 0.02 + use_diagonal_response : False source_zbin_edges: [0.0, 0.2, 0.4, 0.6, 0.8] - resp_mean_diag : True - -TXSourceSelectorLensfit: - bands: ri #used for selection - T_cut: 0.0 - s2n_cut: 0.0 - max_rows: 1000 - delta_gamma: 0.02 - source_zbin_edges: [0.1, 0.3, 0.5, 0.7, 0.9, 1.2] - nsrc: 5 - nlens: 0 - shear_catalog_type: 'lensfit' - input_pz: True - # may also need one for r_cpar_cut - input_m_is_weighted: True - -TXShearCalibration: {} -TXTruthLensCatalogSplitter: {} -TXStarCatalogSplitter: {} -TXPhotozPlots: {} - -TXSourceMaps: - sparse: True # Generate sparse maps - faster if using small areas - -TXLensMaps: - sparse: True # Generate sparse maps - faster if using small areas - -TXAuxiliarySourceMaps: - flag_exponent_max: 8 - nside: 128 - -TXAuxiliaryLensMaps: - flag_exponent_max: 8 - bright_obj_threshold: 10.0 # The magnitude threshold for a object to be counted as bright - depth_band : i - snr_threshold: 20.0 # The S/N value to generate maps for (e.g. 5 for 5-sigma depth) - snr_delta: 1.0 # The range threshold +/- delta is used for finding objects at the boundary - -TXSourceNoiseMaps: {} -TXDensityMaps: {} -TXMapPlots: {} -TXTracerMetadata: {} - -TXJackknifeCenters: - npatch: 100 - every_nth: 100 - -TXTwoPoint: - calcs: [0] - bin_slop: 0.5 - delta_gamma: 0.02 - do_pos_pos: False - do_shear_shear: False - do_shear_pos: False - flip_g2: False - min_sep: 2.5 - max_sep: 250 - nbins: 20 - verbose: 0 - subtract_mean_shear: True - source_bins: [0] - lens_bins: [0] - -TXBlinding: - seed: 1972 ## seed uniquely specifies the shift in parameters - Omega_b: [0.0485, 0.001] ## fiducial_model_value, shift_sigma - Omega_c: [0.2545, 0.01] - w0: [-1.0, 0.1] - h: [0.682, 0.02] - sigma8: [0.801, 0.01] - n_s: [0.971, 0.03] - b0: 0.95 ## we use bias of the form b0/g - delete_unblinded: True - -TXTwoPointTheoryReal: {} -TXTwoPointPlots: {} - -TXSourceDiagnosticPlots: - psfT_min: 0.04 - psfT_max: 0.36 - T_min: 0.04 - T_max: 4.0 - nbins: 20 - bands: ri - -TXLensDiagnosticPlots: {} - -TXGammaTFieldCenters: {} - -TXGammaTStars: {} - -TXGammaTRandoms: {} - -TXRoweStatistics: - psf_size_units: 'T' - -TXTauStatistics: - flip_g2: False - -TXFocalPlanePlot: {} - -TXGalaxyStarDensity: {} - - - - - -TXExposureInfo: - dc2_name: '1.2p' - - -# Mock version of stacking: -TXSourceTrueNumberDensity: - nz: 301 - zmax: 3.0 - -# Mock version of stacking: -TXTrueNumberDensity: - nz: 301 - zmax: 3.0 - - - - -TXGammaTBrightStars: {} - -TXGammaTDimStars: {} - - - - -TXDiagnosticMaps: - sparse: True # Generate sparse maps - faster if using small areas - snr_threshold: 10.0 - snr_delta: 1.0 - # pixelization: gnomonic - pixel_size: 0.2 - ra_cent: 62. - dec_cent: -35. - npix_x: 60 - npix_y: 60 - depth_cut: 23.0 - - -TXRealGaussianCovariance: - min_sep: 2.5 - max_sep: 60. - nbins: 10 - pickled_wigner_transform: data/example/inputs/wigner.pkl - - - -TXTwoPointFourier: - flip_g2: True - bandwidth: 100 - - - -TXMapCorrelations: - supreme_path_root: data/example/inputs/supreme - outlier_fraction: 0.05 - nbin: 20 +TXRandomCat: + depth_band: i + density: 10 # gals per sq arcmin diff --git a/examples/desy3/pipeline.yml b/examples/desy3/pipeline.yml index cdc7eb949..d71d13959 100644 --- a/examples/desy3/pipeline.yml +++ b/examples/desy3/pipeline.yml @@ -2,8 +2,93 @@ # Stages to run stages: - - name: TXSourceSelectorMetacal # select and split objects into source bins - - name: TXShearCalibration # Calibrate and split the source sample tomographically + - name: TXIngestDESY3Gold + - name: TXIngestDESY3Footprint + - name: TXIngestDESY3SpeczCat + - name: TXIngestDESY3Shear + - name: TXCustomMask + - name: PZPrepareEstimatorLens + classname: DNFInformer + aliases: + input: spectroscopic_catalog + model: lens_photoz_model + - name: PZEstimatorLens + classname: DNFEstimator + nprocess: 4 + threads_per_process: 2 + aliases: + model: lens_photoz_model + input: photometry_catalog + output: lens_photoz_pdfs + - name: TXCustomLensSelector + - name: TXLensCatalogSplitter + - name: PZRailSummarizeLens #Run the RAIL PZ summarizer on each tomo bin + classname: PZRailPZSummarize + nprocess: 4 + threads_per_process: 2 + aliases: + tomography_catalog: lens_tomography_catalog_unweighted + photoz_pdfs: lens_photoz_pdfs + model: lens_photoz_model + photoz_stack: lens_photoz_stack + photoz_realizations: lens_photoz_realizations + + - name: PZPrepareEstimatorSource + classname: DNFInformer + aliases: + input: spectroscopic_catalog + model: source_photoz_model + + - name: PZEstimatorSource + classname: DNFEstimator + aliases: + model: source_photoz_model + input: shear_catalog + output: source_photoz_pdfs + - name: TXLSSDensityNullTests + - name: TXLSSWeightsLinBinned + - name: TXSourceTomography + - name: TXSourceSelectorMetacal + - name: TXShearCalibration + - name: TXAuxiliarySourceMaps + - name: TXRandomCat + - name: TXJackknifeCenters + - name: TXLensMaps + - name: TXDensityMaps + - name: TXTracerMetadata + - name: TXSourceMaps + - name: TXTwoPoint + + - name: PZRailSummarizeSource #Run the RAIL PZ summarizer on each tomo bin + classname: PZRailPZSummarize + nprocess: 4 + threads_per_process: 2 + aliases: + tomography_catalog: shear_tomography_catalog + photoz_pdfs: source_photoz_pdfs + model: source_photoz_model + photoz_stack: shear_photoz_stack + photoz_realizations: source_photoz_realizations + + - name: TXPhotozPlotSource # Plot the bin n(z) + classname: TXPhotozPlot + aliases: + photoz_stack: shear_photoz_stack + nz_plot: nz_source + + - name: TXPhotozPlotLens # Plot the bin n(z) + classname: TXPhotozPlot + aliases: + photoz_stack: lens_photoz_stack + nz_plot: nz_lens + + - name: PZRealizationsPlotSource # Plot n(z) realizations + classname: PZRealizationsPlot + aliases: + photoz_realizations: source_photoz_realizations + photoz_realizations_plot: source_photoz_realizations_plot + + #- name: TXPhotozPlots #- name: TXPhotozSourceStack #- name: TXTruthLensCatalogSplitter @@ -70,9 +155,15 @@ launcher: site: name: local - max_threads: 128 + max_threads: 12 + +modules: > + txpipe + rail.estimation.algos.dnf + rail.estimation.algos.naive_stack + rail.estimation.algos.point_est_hist + rail.estimation.algos.uniform_binning -modules: txpipe # where to find any modules that are not in this repo, # and any other code we need. @@ -88,22 +179,29 @@ config: examples/desy3/config.yml # These are overall inputs to the whole pipeline, not generated within it inputs: - shear_catalog: //global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/shear_catalog_desy3_unmasked_withfakez_v2.h5 - photometry_catalog: - #shear_tomography_catalog: /global/cfs/cdirs/desc-wl/projects/txpipe-sys-tests/des-y3/shear_tomography_desy3_unmasked_test.h5 - lens_tomography_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/lens_tomography_catalog_desy1_RM.h5 - lens_photoz_stack: - mask: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/mask_desy1.h5 - star_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/DES_psf_y3_catalog.h5 - calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat + + des_photometry_catalog: data/desy3/inputs/DESY3_GOLD_2_2.1_testpatch.h5 + des_specz_catalog: data/desy3/inputs/y6gold22_trainnov20_matchedtoy3.fits + des_shear_catalog: data/desy3/inputs/DESY3_metacal_v03-004_testpatch.h5 fiducial_cosmology: data/fiducial_cosmology.yml - random_cats_source: Null - shear_tomography_classifier: none - flow: data/example/inputs/example_flow.pkl + + + # shear_catalog: //global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/shear_catalog_desy3_unmasked_withfakez_v2.h5 + # photometry_catalog: + # #shear_tomography_catalog: /global/cfs/cdirs/desc-wl/projects/txpipe-sys-tests/des-y3/shear_tomography_desy3_unmasked_test.h5 + # lens_tomography_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/lens_tomography_catalog_desy1_RM.h5 + # lens_photoz_stack: + # mask: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/mask_desy1.h5 + # star_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/DES_psf_y3_catalog.h5 + # calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat + # fiducial_cosmology: data/fiducial_cosmology.yml + # random_cats_source: Null + # shear_tomography_classifier: none + # flow: data/example/inputs/example_flow.pkl # if supported by the launcher, restart the pipeline where it left off # if interrupted -resume: False +resume: resume # where to put output logs for individual stages log_dir: data/desy3/logs From 2666636cc0c45a205394027400fd52671ac40529 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 29 Apr 2026 17:01:45 +0100 Subject: [PATCH 2/8] Reapply "set desy3 pipeline for nersc" This reverts commit 8f014b91b6a968c6d12ab366bc27193a004bc1c9. --- examples/desy3/pipeline.yml | 210 ++++++++++++++++++------------------ 1 file changed, 107 insertions(+), 103 deletions(-) diff --git a/examples/desy3/pipeline.yml b/examples/desy3/pipeline.yml index d71d13959..78a01a015 100644 --- a/examples/desy3/pipeline.yml +++ b/examples/desy3/pipeline.yml @@ -1,68 +1,101 @@ # Stages to run stages: - + # INGESTION - name: TXIngestDESY3Gold - name: TXIngestDESY3Footprint - name: TXIngestDESY3SpeczCat - name: TXIngestDESY3Shear - name: TXCustomMask + + # PER-OBJECT PHOTO-Z + + # Train the DNF model on the spectroscopic sample - name: PZPrepareEstimatorLens classname: DNFInformer aliases: input: spectroscopic_catalog model: lens_photoz_model + + # Apply the trained DNF model to the photometric sample to get lens photo-z - name: PZEstimatorLens classname: DNFEstimator - nprocess: 4 - threads_per_process: 2 + nprocess: 32 + threads_per_process: 4 aliases: model: lens_photoz_model input: photometry_catalog output: lens_photoz_pdfs - - name: TXCustomLensSelector - - name: TXLensCatalogSplitter - - name: PZRailSummarizeLens #Run the RAIL PZ summarizer on each tomo bin - classname: PZRailPZSummarize - nprocess: 4 - threads_per_process: 2 - aliases: - tomography_catalog: lens_tomography_catalog_unweighted - photoz_pdfs: lens_photoz_pdfs - model: lens_photoz_model - photoz_stack: lens_photoz_stack - photoz_realizations: lens_photoz_realizations + # Train the DNF model on the spectroscopic sample. This uses a restricted set of bands + # measured under metacalibration - name: PZPrepareEstimatorSource classname: DNFInformer aliases: input: spectroscopic_catalog model: source_photoz_model + # Apply the trained DNF model to the metacalibration photometric sample to get source photo-z - name: PZEstimatorSource + nprocess: 32 + threads_per_process: 4 classname: DNFEstimator aliases: model: source_photoz_model input: shear_catalog output: source_photoz_pdfs + + # SELECTION & CALIBRATION + + # DES-Y3 maglim selection + - name: TXCustomLensSelector + nprocess: 32 + + # Split the lens catalog into tomographic bins + - name: TXLensCatalogSplitter + nprocess: 8 + + # Compute correlations between survey properties and lens density - name: TXLSSDensityNullTests + threads_per_process: 8 + + # Compute weights that mitigate correlations between survey properties and lens density - name: TXLSSWeightsLinBinned + threads_per_process: 8 + + # Train a tomography model from the spectroscopic sample for the sources - name: TXSourceTomography + threads_per_process: 8 + + # Compute metacalibration shear response and selection bias corrections - name: TXSourceSelectorMetacal + nprocess: 32 + threads_per_process: 4 + + # Generate calibrated tomographic source catalogs - name: TXShearCalibration - - name: TXAuxiliarySourceMaps - - name: TXRandomCat - - name: TXJackknifeCenters - - name: TXLensMaps - - name: TXDensityMaps - - name: TXTracerMetadata - - name: TXSourceMaps - - name: TXTwoPoint + nproces: 8 + threads_per_process: 4 + + # PHOTO-Z SUMMARIZATION + + # Compute the ensemble n(z) for the lens bins using point estimate histograms + - name: PZRailSummarizeLens + classname: PZRailPZSummarize + nprocess: 32 + threads_per_process: 4 + aliases: + tomography_catalog: lens_tomography_catalog_unweighted + photoz_pdfs: lens_photoz_pdfs + model: lens_photoz_model + photoz_stack: lens_photoz_stack + photoz_realizations: lens_photoz_realizations - - name: PZRailSummarizeSource #Run the RAIL PZ summarizer on each tomo bin + # Compute the ensemble n(z) for the source bins using point estimate histograms + - name: PZRailSummarizeSource classname: PZRailPZSummarize - nprocess: 4 - threads_per_process: 2 + nprocess: 32 + threads_per_process: 4 aliases: tomography_catalog: shear_tomography_catalog photoz_pdfs: source_photoz_pdfs @@ -70,84 +103,69 @@ stages: photoz_stack: shear_photoz_stack photoz_realizations: source_photoz_realizations - - name: TXPhotozPlotSource # Plot the bin n(z) + # Plot the source n(z) distribution + - name: TXPhotozPlotSource classname: TXPhotozPlot aliases: photoz_stack: shear_photoz_stack nz_plot: nz_source - - name: TXPhotozPlotLens # Plot the bin n(z) + # Plot the lens n(z) distribution + - name: TXPhotozPlotLens classname: TXPhotozPlot aliases: photoz_stack: lens_photoz_stack nz_plot: nz_lens - - name: PZRealizationsPlotSource # Plot n(z) realizations + # Plot realizations of the source n(z) distribution + - name: PZRealizationsPlotSource classname: PZRealizationsPlot aliases: photoz_realizations: source_photoz_realizations photoz_realizations_plot: source_photoz_realizations_plot + + # TWO-POINT ESTIMATION + + # Generate random catalogs for the lens sample, for use in two-point estimation + - name: TXRandomCat + nprocess: 32 + threads_per_process: 4 + + # Collate metadata for the samples + - name: TXTracerMetadata + + # Choose jackknife centers for two-point estimation speed from randoms + - name: TXJackknifeCenters + + # Measure the real-space two-point correlation functions + - name: TXTwoPoint + threads_per_process: 128 + + # Plot the correlations functions + - name: TXTwoPointPlots + + # Pretend to blind the two-point functions + - name: TXNullBlinding + # - name: TXRealGaussianCovariance + + # MAPPING - #- name: TXPhotozPlots - #- name: TXPhotozSourceStack - #- name: TXTruthLensCatalogSplitter - #- name: TXStarCatalogSplitter # Split the star catalog into separate bins (psf/non-psf) - #- name: TXSourceMaps - #nprocess: 6 - #threads_per_process: 1 - #nodes: 1 - #- name: TXLensMaps - #- name: TXAuxiliarySourceMaps # make PSF and flag maps - #- name: TXAuxiliaryLensMaps # make depth and bright object maps - #- name: TXSourceNoiseMaps # noise maps for sources using random rotations - # stage below is not working because TXAuxiliaryLensMaps is broken - #- name: TXLensNoiseMaps # noise maps for lenses using density splits - #- name: TXDensityMaps # turn mask and ngal maps into overdensity maps - #- name: TXMapPlots # make pictures of all the maps - #- name: TXTracerMetadata # collate metadata - #- name: TXRandomCat - #- name: TXJackknifeCenters # Split the area into jackknife regions - #- name: TXTwoPoint # Compute real-space 2-point correlations - # threads_per_process: 128 - #- name: TXBlinding # Blind the data following Muir et al - #threads_per_process: 2 - #- name: TXTwoPointTheoryReal # compute theory using CCL to save in sacc file and plot later - #- name: TXTwoPointPlots # Make plots of 2pt correlations - #- name: TXSourceDiagnosticPlots # Make a suite of diagnostic plots - #nprocess: 8 - #nodes: 2 - #- name: TXLensDiagnosticPlots # Make a suite of diagnostic plots - #- name: TXGammaTFieldCenters # Compute and plot gamma_t around center points # ERROR: EXPOSURE FILE - ##threads_per_process: 2 - #- name: TXGammaTStars # Compute and plot gamma_t around bright stars - #threads_per_process: 2 - #- name: TXGammaTRandoms # Compute and plot gamma_t around randoms - #threads_per_process: 2 - #- name: TXRoweStatistics # Compute and plot Rowe statistics - # threads_per_process: 128 - #- name: TXFocalPlanePlot # mean PSF ellipticity and mean residual on the focal plane - #- name: TXGalaxyStarDensity # Compute and plot the star-galaxy density cross-correlation - #- name: TXGalaxyStarShear # Compute and plot the star-galaxy shear cross-correlation - #- name: TXPSFDiagnostics # Compute and plots other PSF diagnostics - #- name: TXBrighterFatterPlot # Make plots tracking the brighter-fatter effect - #- name: TXConvergenceMaps # Make convergence kappa maps from g1, g2 maps #ERROR: WLMASSMAP - #- name: TXConvergenceMapPlots # Plot the convergence map - #- name: TXMapCorrelations # plot the correlations between systematics and data # ERROR: NEEDS CONVERGENCE MAP - #- name: TXApertureMass # Compute aperture-mass statistics - ##threads_per_process: 2 - # Disabled since not yet synchronised with new Treecorr MPI - # - name: TXSelfCalibrationIA # Self-calibration intrinsic alignments of galaxies - # - # Disabling these as they takes too long for a quick test. - # The latter three need NaMaster - - ##- name: TXRealGaussianCovariance # Compute covariance of real-space correlations - ##- name: TXTwoPointFourier # Compute power spectra C_ell - ##- name: TXFourierNamasterCovariance # Compute the C_ell covariance - ##- name: TXFourierTJPCovariance # Compute the C_ell covariance with TJPCov + # Make PSF and similar maps for the sources + - name: TXAuxiliarySourceMaps + + # Make maps of the lens counts + - name: TXLensMaps + + # Make maps of the lens over-density + - name: TXDensityMaps + + # Make maps of the shear field + - name: TXSourceMaps + + # Plot all the maps + - name: TXMapPlots -#=========== launcher: name: mini @@ -179,25 +197,11 @@ config: examples/desy3/config.yml # These are overall inputs to the whole pipeline, not generated within it inputs: - des_photometry_catalog: data/desy3/inputs/DESY3_GOLD_2_2.1_testpatch.h5 - des_specz_catalog: data/desy3/inputs/y6gold22_trainnov20_matchedtoy3.fits des_shear_catalog: data/desy3/inputs/DESY3_metacal_v03-004_testpatch.h5 + des_specz_catalog: data/desy3/inputs/y6gold22_trainnov20_matchedtoy3.fits fiducial_cosmology: data/fiducial_cosmology.yml - - - # shear_catalog: //global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/shear_catalog_desy3_unmasked_withfakez_v2.h5 - # photometry_catalog: - # #shear_tomography_catalog: /global/cfs/cdirs/desc-wl/projects/txpipe-sys-tests/des-y3/shear_tomography_desy3_unmasked_test.h5 - # lens_tomography_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/lens_tomography_catalog_desy1_RM.h5 - # lens_photoz_stack: - # mask: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/mask_desy1.h5 - # star_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/DES_psf_y3_catalog.h5 - # calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat - # fiducial_cosmology: data/fiducial_cosmology.yml - # random_cats_source: Null - # shear_tomography_classifier: none - # flow: data/example/inputs/example_flow.pkl + star_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/DES_psf_y3_catalog.h5 # if supported by the launcher, restart the pipeline where it left off # if interrupted From 8fccdccb571df95a080a77242f2105e23425ba8d Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 29 Apr 2026 17:01:49 +0100 Subject: [PATCH 3/8] Reapply "set desy3 pipeline for nersc" This reverts commit a205466a55f3a48d6ebaa071ce27836779ca8f3a. --- examples/desy3/pipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/desy3/pipeline.yml b/examples/desy3/pipeline.yml index 78a01a015..9b056aa91 100644 --- a/examples/desy3/pipeline.yml +++ b/examples/desy3/pipeline.yml @@ -172,8 +172,8 @@ launcher: interval: 1.0 site: - name: local - max_threads: 12 + name: nersc-interactive + max_threads: 128 modules: > txpipe From ecf8c90959450ba6f8f8e3cce5f99dfcef248e54 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Wed, 29 Apr 2026 17:03:23 +0100 Subject: [PATCH 4/8] Reapply "Updates to fix DES-Y3 runs after other changes" This reverts commit 64e2c1ffa3bdbe327b5fa8eeed4ab830307caa2e. --- txpipe/ingest/base.py | 2 +- txpipe/ingest/stage3/desy3.py | 121 ++++++++++++++++++++++++++++- txpipe/lens_selector.py | 8 +- txpipe/lssweights.py | 6 ++ txpipe/masks.py | 4 +- txpipe/metadata.py | 2 - txpipe/rail/summarize.py | 11 ++- txpipe/random_cats.py | 17 +++- txpipe/source_selection/base.py | 2 + txpipe/source_selection/metacal.py | 24 ++++-- txpipe/utils/pixel_schemes.py | 2 +- 11 files changed, 178 insertions(+), 21 deletions(-) diff --git a/txpipe/ingest/base.py b/txpipe/ingest/base.py index 5dbc19519..4a6f4d2a1 100644 --- a/txpipe/ingest/base.py +++ b/txpipe/ingest/base.py @@ -282,7 +282,7 @@ def process_maps(self, input_filepaths, input_labels, output_name): metadata = { "pixelization": "healpix", "nside": self.config["input_nside"], - "nest": False, # currently TXPipe defaults to ring format + "nest": True, # currently TXPipe defaults to ring format } print(f"Input nside {self.config['input_nside']}") diff --git a/txpipe/ingest/stage3/desy3.py b/txpipe/ingest/stage3/desy3.py index 4f58c22ea..8cad28cd3 100644 --- a/txpipe/ingest/stage3/desy3.py +++ b/txpipe/ingest/stage3/desy3.py @@ -1,6 +1,20 @@ -from ...data_types import HDFFile, MapsFile, FitsFile -from ..base import TXIngestCatalogH5, TXIngestMapsHsp, TXIngestCatalogFits +from ...data_types import HDFFile, MapsFile, FitsFile, ShearCatalog +from ..base import TXIngestCatalogH5, TXIngestMapsHsp, TXIngestCatalogFits, PipelineStage from ceci.config import StageParameter +import numpy as np + +def to_native_endian(arr: np.ndarray) -> np.ndarray: + """ + Convert a NumPy array to the machine's native byte order. + + Various downstream stages use NUMBA, which doesn't like + non-native endianness, and some of the DES Y3 data files seem + to be big-endian. + """ + native_dtype = arr.dtype.newbyteorder('=') + # copy=False avoids copying if already correct + return arr.astype(native_dtype, copy=False) + class TXIngestDESY3Gold(TXIngestCatalogH5): @@ -146,3 +160,106 @@ def run(self): column_names, dummy_columns, ) + + +class TXIngestDESY3Shear(PipelineStage): + name = "TXIngestDESY3Shear" + parallel = False + inputs = [ + ("des_shear_catalog", HDFFile), + ] + + outputs = [ + ("shear_catalog", ShearCatalog), + ] + + config_options = { + "input_group_name": StageParameter(str, "catalog/", msg="Input group name in the HDF5 file."), + "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk."), + } + + def des_metacal_flux_to_mag(self, flux, flux_err): + # Seems to use a zero-point of 30 based on comparison + # to the gold catalog. + mag = -2.5 * np.log10(flux) + 30 + mag_err = 2.5 / np.log(10) * (flux_err / flux) + return mag, mag_err + + + def run(self): + """ + Run the analysis for this stage. + """ + print("Ingesting DES Y3 Metacal catalog from") + + + subgroups_to_suffixes = { + "unsheared": "", + "sheared_1p": "_1p", + "sheared_1m": "_1m", + "sheared_2p": "_2p", + "sheared_2m": "_2m", + } + + single_columns = { + "unsheared/coadd_object_id": "id", + "unsheared/ra": "ra", + "unsheared/dec": "dec", + "unsheared/psf_T": "psf_T_mean", + "unsheared/psf_e1": "psf_e1", + "unsheared/psf_e2": "psf_e2", + "unsheared/flags": "flags", + } + + sheared_columns = { + "T": "T", + "T_err": "T_err", + "e_1": "g1", + "e_2": "g2", + "snr": "s2n", # we originally called the TXPipe version "s2n" instead of "snr" + # precisely to be more consistent with metacal, but now apparently + # metacal uses "snr" again. After this PR we should update to use "snr" everywhere. + "weight": "weight", + } + + fluxes = ["r", "i", "z"] + + # The input and output sections in the files + fin = self.open_input("des_shear_catalog") + fout = self.open_output("shear_catalog") + gin = fin.file["catalog/"] + gout = fout.file.create_group("shear") + gout.attrs['catalog_type'] = 'metacal' + + # save all the columns that are only measured once, not on all the + # sheared versions. In metacal this includes positions and PSF properties. + for input_col, output_col in single_columns.items(): + d = to_native_endian(gin[input_col][:]) + gout.create_dataset(output_col, data=d, chunks=True, compression="gzip") + + # Now save the stuff that is different for each sheared version + for variant, suffix in subgroups_to_suffixes.items(): + gin_sub = gin[variant] + + # First handle the ones that are just renamed, including the shears + # themselves and the SNR and weight etc. + for input_col, output_col in sheared_columns.items(): + d = to_native_endian(gin_sub[input_col][:]) + gout.create_dataset(output_col + suffix, data=d, chunks=True, compression="gzip") + + # Now deal with fluxes, which we have to turn into magnitudes. + # Note that we might change our mind on this and at some point + # just start storing fluxes. + for band in fluxes: + # convert fluxes to mags + flux = to_native_endian(gin_sub[f"flux_{band}"][:]) + flux_err = to_native_endian(gin_sub[f"flux_err_{band}"][:]) + + flux[flux < 0] = 0 + + mag, mag_err = self.des_metacal_flux_to_mag(flux, flux_err) + + # save mags and mag_errs + gout.create_dataset(f"mag_{band}{suffix}", data=mag, chunks=True) + gout.create_dataset(f"mag_err_{band}{suffix}", data=mag_err, chunks=True) + diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py index 0c264ec92..4b2ded81d 100755 --- a/txpipe/lens_selector.py +++ b/txpipe/lens_selector.py @@ -88,7 +88,11 @@ def run(self): # If we are going to remove lens galaxies that sit outside the mask, load the mask if self.config["apply_mask"]: with self.open_input("mask", wrapper=True) as f: - self.mask, self.mask_nside = f.read_healsparse("mask", return_all=True) + info = f.read_map_info("mask") + print(info) + self.mask_nside = info['nside'] + self.mask_nest = info['nest'] + self.mask = f.read_mask_healpix("mask") # We will collect the selection biases for each bin # as a matrix. We will collect together the different @@ -334,7 +338,7 @@ def select_in_footprint(self, phot_data): # with self.open_input('mask', wrapper=True) as f: # mask, nside= f.read_healsparse('mask', return_all=True) - pix = hp.ang2pix(self.mask_nside, phot_data["ra"], phot_data["dec"], lonlat=True) + pix = hp.ang2pix(self.mask_nside, phot_data["ra"], phot_data["dec"], lonlat=True, nest=self.mask_nest) s = np.where(self.mask[pix] == hp.UNSEEN, 0, 1) print(f"{len(s) - np.sum(s)}/{len(s)} objects removed because they are outside the mask") return s diff --git a/txpipe/lssweights.py b/txpipe/lssweights.py index a2bfa6d55..9eb591f20 100644 --- a/txpipe/lssweights.py +++ b/txpipe/lssweights.py @@ -156,6 +156,11 @@ def load_and_mask_sysmaps(self): nsys = len(sys_files) print(f"Found {nsys} total systematic maps") + if nsys == 0: + raise ValueError(f"No systematic maps found in {root} with extension .hs. " + "Please check your config supreme_path_root setting and " + "make sure it points to the right location") + with self.open_input("mask", wrapper=True) as map_file: # load *boolean* mask mask = map_file.read_mask( @@ -414,6 +419,7 @@ def run(self): self.sys_maps, self.sys_names, self.sys_meta = self.prepare_sys_maps() for ibin in range(self.Ntomo): + print("Computed density correlations for lens bin {0}/{1}".format(ibin + 1, self.Ntomo)) # compute density vs SP map data vector density_corrs = self.calculate_1d_density_correlations(ibin) diff --git a/txpipe/masks.py b/txpipe/masks.py index 10e53d72e..3d2077722 100644 --- a/txpipe/masks.py +++ b/txpipe/masks.py @@ -367,10 +367,10 @@ def make_binary_mask(self): import operator OP_MAP = { - ">": operator.gt, ">=": operator.ge, - "<": operator.lt, "<=": operator.le, + ">": operator.gt, + "<": operator.lt, "==": operator.eq, "!=": operator.ne, } diff --git a/txpipe/metadata.py b/txpipe/metadata.py index 3aff38b8a..c0f648d5b 100644 --- a/txpipe/metadata.py +++ b/txpipe/metadata.py @@ -60,10 +60,8 @@ def copy_source_metadata(self, meta_file, metadata, area, area_sq_arcmin): if shear_catalog_type == "metacal": copy(shear_tomo_file, "response", "tracers", "R_gamma_mean", meta_file, metadata) copy(shear_tomo_file, "response", "tracers", "R_S", meta_file, metadata) - copy(shear_tomo_file, "response", "tracers", "R_total", meta_file, metadata) copy(shear_tomo_file, "response", "tracers", "R_gamma_mean_2d", meta_file, metadata) copy(shear_tomo_file, "response", "tracers", "R_S_2d", meta_file, metadata) - copy(shear_tomo_file, "response", "tracers", "R_total_2d", meta_file, metadata) elif shear_catalog_type == "metadetect": copy(shear_tomo_file, "response", "tracers", "R", meta_file, metadata) copy(shear_tomo_file, "response", "tracers", "R_2d", meta_file, metadata) diff --git a/txpipe/rail/summarize.py b/txpipe/rail/summarize.py index f3e41d258..803d52b54 100644 --- a/txpipe/rail/summarize.py +++ b/txpipe/rail/summarize.py @@ -35,6 +35,7 @@ def run(self): # But there is also often a special bin_all # that is non-tomographic bins = self.get_bin_list() + print("Summarizing the following bins: ", bins) # These parameters are common to all the bins runs if "binned_catalog" in self.input_tags(): @@ -210,15 +211,19 @@ def get_bin_list(self): bins = [ f"bin_{bin_index}" for bin_index in unique_bins[unique_bins != -1] ] # do not include the "unselected" bin, -1 + bins.append("bin_all") # add the non-tomographic bin at the end return bins def get_extra_sub_config(self, bin): """ Additional config options needed by the PZSummarizers """ - with self.open_input("tomography_catalog", wrapper=True) as f: - tomo_path = f.path - bin_index = int(bin.split("_")[-1]) + from rail.core.common_params import TOMOGRAPHY_ALL + tomo_path = self.get_input("tomography_catalog") + if bin == "bin_all": + bin_index = TOMOGRAPHY_ALL + else: + bin_index = int(bin.split("_")[-1]) extra_sub_config = { "selected_bin": bin_index, diff --git a/txpipe/random_cats.py b/txpipe/random_cats.py index 4e8d21fe8..360b9efd8 100644 --- a/txpipe/random_cats.py +++ b/txpipe/random_cats.py @@ -44,6 +44,7 @@ class TXRandomCat(PipelineStage): "sample_rate": StageParameter( float, 0.5, msg="Fraction of random catalog to be retained in the sub-sampled catalog." ), + "depth_band": StageParameter(str, "", msg="Band to use for depth map when calculating density. If not set just use 'depth'"), } def run(self): @@ -56,11 +57,15 @@ def run(self): import healpix # Load the input depth map + depth_band = self.config["depth_band"] + if depth_band: + depth_band = "_" + depth_band with self.open_input("aux_lens_maps", wrapper=True) as maps_file: - depth = maps_file.read_map("depth/depth") - info = maps_file.read_map_info("depth/depth") + depth = maps_file.read_map(f"depth/depth{depth_band}") + info = maps_file.read_map_info(f"depth/depth{depth_band}") nside = info["nside"] scheme = choose_pixelization(**info) + print(scheme.nest, scheme.nside) # Load the input mask with self.open_input("mask", wrapper=True) as maps_file: @@ -70,6 +75,7 @@ def run(self): # This is a QP object n_of_z_object = f.read_ensemble() Ntomo = n_of_z_object.npdf - 1 # ensemble includes the non-tomo 2D n(z) + print("Generating randoms for {0} tomographic bins".format(Ntomo)) # We also generate comoving distances under a fiducial cosmology # for each random, for use in Rlens type metrics @@ -81,6 +87,10 @@ def run(self): depth = depth[pixel] frac = mask[pixel] npix = depth.size + print("npix = ", npix) + print("frac = ", frac) + print("depth = ", depth) + print("pixel = ", pixel) if len(pixel) == 1: raise ValueError("Only one pixel in depth map!") @@ -263,7 +273,8 @@ def run(self): pix_catalog = np.repeat(pixel[start_vertex:end_vertex], numbers[j, :][start_vertex:end_vertex]) # generate a random location within the pixel using healpix - ra, dec = healpix.randang(nside, pix_catalog, lonlat=True) + ra, dec = healpix.randang(nside, pix_catalog, lonlat=True, nest=scheme.nest) + print(ra.min(), ra.max(), dec.min(), dec.max()) N = len(pix_catalog) bin_index = np.repeat(j, N) diff --git a/txpipe/source_selection/base.py b/txpipe/source_selection/base.py index 86136fa86..7e82eb949 100644 --- a/txpipe/source_selection/base.py +++ b/txpipe/source_selection/base.py @@ -289,6 +289,8 @@ def write_global_values(self, outfile, calculators): calculators: list of Calculator objects, one for each tomographic bin and one for the 2D sample, that have been fed all the data and are ready to have their results collected and written out. """ + # hard link to bin column. needed for RAIL masked summarizer stages + outfile["tomography/class_id"] = outfile["tomography/bin"] for i, calculator in enumerate(calculators): stats = calculator.collect(self.comm, allgather=True) diff --git a/txpipe/source_selection/metacal.py b/txpipe/source_selection/metacal.py index 99130ff9c..ca008af52 100644 --- a/txpipe/source_selection/metacal.py +++ b/txpipe/source_selection/metacal.py @@ -40,8 +40,19 @@ def data_iterator(self): We call to a parent class method to do the main iteration; the work here is just choosing which columns to read. """ + with self.open_input("shear_catalog") as f: + if "flags_1p" in f.file["shear"]: + flag_mode = "variant" + else: + flag_mode = "single" + if self.rank == 0: + print(f"Using {flag_mode} flags") bands = self.config["bands"] - shear_cols = metacal_variants("T", "s2n", "g1", "g2", "flags", "weight") + shear_cols = metacal_variants("T", "s2n", "g1", "g2", "weight") + if flag_mode == "variant": + shear_cols += metacal_variants("flags") + else: + shear_cols += ["flags"] shear_cols += ["ra", "dec", "psf_T_mean"] shear_cols += band_variants(bands, "mag", "mag_err", shear_catalog_type="metacal") @@ -51,7 +62,13 @@ def data_iterator(self): shear_cols += ["redshift_true"] chunk_rows = self.config["chunk_rows"] - return self.iterate_hdf("shear_catalog", "shear", shear_cols, chunk_rows) + for s, e, data in self.iterate_hdf("shear_catalog", "shear", shear_cols, chunk_rows): + if flag_mode == "single": + data["flags_1p"] = data["flags"] + data["flags_1m"] = data["flags"] + data["flags_2p"] = data["flags"] + data["flags_2m"] = data["flags"] + yield s, e, data def setup_output(self): """ @@ -61,7 +78,6 @@ def setup_output(self): R_gamma: the per-object estimator response R_S: the per-bin selection response R_gamma_mean: the mean per-bin estimator response - R_total: the complete per-bin response and the 2D versions of the per-bin values. """ @@ -75,10 +91,8 @@ def setup_output(self): group.create_dataset("R_gamma", (n, 2, 2), dtype="f") group.create_dataset("R_S", (nbin_source, 2, 2), dtype="f") group.create_dataset("R_gamma_mean", (nbin_source, 2, 2), dtype="f") - group.create_dataset("R_total", (nbin_source, 2, 2), dtype="f") group.create_dataset("R_S_2d", (2, 2), dtype="f") group.create_dataset("R_gamma_mean_2d", (2, 2), dtype="f") - group.create_dataset("R_total_2d", (2, 2), dtype="f") return outfile def setup_response_calculators(self, nbin_source): diff --git a/txpipe/utils/pixel_schemes.py b/txpipe/utils/pixel_schemes.py index 0a31586a7..b289fcffd 100755 --- a/txpipe/utils/pixel_schemes.py +++ b/txpipe/utils/pixel_schemes.py @@ -159,7 +159,7 @@ def read_map(HealpixScheme, fname_map, i_map=0): return p, maps def vertices(self, pix): - return self.healpy.boundaries(self.nside, pix) + return self.healpy.boundaries(self.nside, pix, nest=self.nest) class GnomonicPixelScheme: From e977952871fd7ce291e78e56e7f2d2b625203849 Mon Sep 17 00:00:00 2001 From: joezuntz Date: Wed, 29 Apr 2026 09:06:21 -0700 Subject: [PATCH 5/8] Switch to full DESY3 data filers --- examples/desy3/config.yml | 16 +++++++++------- examples/desy3/pipeline.yml | 15 +++++++-------- txpipe/ingest/stage3/desy3.py | 15 ++++++++++----- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/examples/desy3/config.yml b/examples/desy3/config.yml index 3b45235c4..f06c9d280 100644 --- a/examples/desy3/config.yml +++ b/examples/desy3/config.yml @@ -15,13 +15,13 @@ TXIngestDESY3Footprint: name: TXIngestDESY3Footprint input_nside: 4096 input_filepaths: - - data/desy3/inputs/patch_maps/y3a2_footprint_griz_1exp_v2.0_healsparse.hs - - data/desy3/inputs/patch_maps/y3a2_griz_o.4096_t.32768_coverfoot_EQU_healsparse.hs - - data/desy3/inputs/patch_maps/y3a2_foreground_mask_v2.1_healsparse.hs - - data/desy3/inputs/patch_maps/y3a2_badregions_mask_v2.1_healsparse.hs - - data/desy3/inputs/patch_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highdens_vl05_vlim_zmask_ZMAX.hs - - data/desy3/inputs/patch_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highlum_vl10_vlim_zmask_ZMAX.hs - - data/desy3/inputs/patch_maps/y3a2_gold_2_2_1_sof_nside4096_nest_i_depth.hs + - data/desy3/inputs/footprint_maps/y3a2_footprint_griz_1exp_v2.0_healsparse.hs + - data/desy3/inputs/footprint_maps/y3a2_griz_o.4096_t.32768_coverfoot_EQU_healsparse.hs + - data/desy3/inputs/footprint_maps/y3a2_foreground_mask_v2.1_healsparse.hs + - data/desy3/inputs/footprint_maps/y3a2_badregions_mask_v2.1_healsparse.hs + - data/desy3/inputs/footprint_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highdens_vl05_vlim_zmask_ZMAX.hs + - data/desy3/inputs/footprint_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highlum_vl10_vlim_zmask_ZMAX.hs + - data/desy3/inputs/footprint_maps/y3a2_gold_2_2_1_sof_nside4096_nest_i_depth.hs input_labels: - footprint/footprint @@ -72,6 +72,7 @@ PZEstimatorLens: mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6} selection_mode: 2 #0: ENF, 1: ANF, 2: DNF see https://github.com/LSSTDESC/rail_dnf nondetect_val: .inf + chunk_size: 10000 # We have to make this very low to avoid memory issues PZRailSummarizeLens: name: PZRailSummarizeLens @@ -171,6 +172,7 @@ PZEstimatorSource: mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6} selection_mode: 2 #0: ENF, 1: ANF, 2: DNF see https://github.com/LSSTDESC/rail_dnf nondetect_val: .inf + chunk_size: 10000 # low to avoid memory issues TXSourceTomography: bands: ["r", "i", "z"] diff --git a/examples/desy3/pipeline.yml b/examples/desy3/pipeline.yml index 9b056aa91..26c376cd3 100644 --- a/examples/desy3/pipeline.yml +++ b/examples/desy3/pipeline.yml @@ -20,15 +20,15 @@ stages: # Apply the trained DNF model to the photometric sample to get lens photo-z - name: PZEstimatorLens classname: DNFEstimator - nprocess: 32 - threads_per_process: 4 + nprocess: 64 + threads_per_process: 1 aliases: model: lens_photoz_model input: photometry_catalog output: lens_photoz_pdfs # Train the DNF model on the spectroscopic sample. This uses a restricted set of bands - # measured under metacalibration + # measured under metacalibration - name: PZPrepareEstimatorSource classname: DNFInformer aliases: @@ -37,8 +37,8 @@ stages: # Apply the trained DNF model to the metacalibration photometric sample to get source photo-z - name: PZEstimatorSource - nprocess: 32 - threads_per_process: 4 + nprocess: 64 + threads_per_process: 1 classname: DNFEstimator aliases: model: source_photoz_model @@ -197,11 +197,10 @@ config: examples/desy3/config.yml # These are overall inputs to the whole pipeline, not generated within it inputs: - des_photometry_catalog: data/desy3/inputs/DESY3_GOLD_2_2.1_testpatch.h5 - des_shear_catalog: data/desy3/inputs/DESY3_metacal_v03-004_testpatch.h5 + des_photometry_catalog: data/desy3/inputs/DESY3_GOLD_2_2.1.h5 + des_shear_catalog: data/desy3/inputs/DESY3_metacal_v03-004.h5 des_specz_catalog: data/desy3/inputs/y6gold22_trainnov20_matchedtoy3.fits fiducial_cosmology: data/fiducial_cosmology.yml - star_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/DES_psf_y3_catalog.h5 # if supported by the launcher, restart the pipeline where it left off # if interrupted diff --git a/txpipe/ingest/stage3/desy3.py b/txpipe/ingest/stage3/desy3.py index 8cad28cd3..f76d9455d 100644 --- a/txpipe/ingest/stage3/desy3.py +++ b/txpipe/ingest/stage3/desy3.py @@ -190,7 +190,7 @@ def run(self): """ Run the analysis for this stage. """ - print("Ingesting DES Y3 Metacal catalog from") + print("Ingesting DES Y3 Metacal catalog from ", self.get_input("des_shear_catalog")) subgroups_to_suffixes = { @@ -231,11 +231,14 @@ def run(self): gout = fout.file.create_group("shear") gout.attrs['catalog_type'] = 'metacal' + compression = None + # save all the columns that are only measured once, not on all the # sheared versions. In metacal this includes positions and PSF properties. for input_col, output_col in single_columns.items(): d = to_native_endian(gin[input_col][:]) - gout.create_dataset(output_col, data=d, chunks=True, compression="gzip") + print(f"Copying {input_col} -> {output_col}") + gout.create_dataset(output_col, data=d, chunks=True, compression=compression) # Now save the stuff that is different for each sheared version for variant, suffix in subgroups_to_suffixes.items(): @@ -244,8 +247,9 @@ def run(self): # First handle the ones that are just renamed, including the shears # themselves and the SNR and weight etc. for input_col, output_col in sheared_columns.items(): + print(f"Copying {variant}/{input_col} -> {output_col}{suffix}") d = to_native_endian(gin_sub[input_col][:]) - gout.create_dataset(output_col + suffix, data=d, chunks=True, compression="gzip") + gout.create_dataset(output_col + suffix, data=d, chunks=True, compression=compression) # Now deal with fluxes, which we have to turn into magnitudes. # Note that we might change our mind on this and at some point @@ -258,8 +262,9 @@ def run(self): flux[flux < 0] = 0 mag, mag_err = self.des_metacal_flux_to_mag(flux, flux_err) + print(f"Copying fluxes and errs {variant}/flux_{band} -> mags mag_{band}{suffix}") # save mags and mag_errs - gout.create_dataset(f"mag_{band}{suffix}", data=mag, chunks=True) - gout.create_dataset(f"mag_err_{band}{suffix}", data=mag_err, chunks=True) + gout.create_dataset(f"mag_{band}{suffix}", data=mag, chunks=True, compression=compression) + gout.create_dataset(f"mag_err_{band}{suffix}", data=mag_err, chunks=True, compression=compression) From db48c33a0c26f232dd30442be5a39b9eb5919fab Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Fri, 8 May 2026 15:55:13 +0100 Subject: [PATCH 6/8] Allow directories to be opened in parallel. Requires ceci branch --- txpipe/base_stage.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/txpipe/base_stage.py b/txpipe/base_stage.py index 8cb7dccbc..fb7f20db7 100755 --- a/txpipe/base_stage.py +++ b/txpipe/base_stage.py @@ -1,4 +1,4 @@ -from ceci import PipelineStage as PipelineStageBase +from ceci import PipelineStage as PipelineStageBase, file_types from .data_types import HDFFile from textwrap import dedent from .utils.provenance import find_module_versions, git_diff, git_current_revision @@ -183,13 +183,17 @@ def open_output(self, tag, wrapper=False, **kwargs): new_tag = self.get_aliased_tag(tag) path = self.get_output(new_tag) - # HDF files can be opened for parallel writing + # HDF files and directory outputs can be opened for parallel writing # under MPI. This checks if: # - we have been told to open in parallel # - we are actually running under MPI # and adds the flags required if all these are true run_parallel = kwargs.pop("parallel", False) and self.is_mpi() - if run_parallel: + if run_parallel and issubclass(output_class, file_types.Directory): + kwargs["parallel"] = True + kwargs["comm"] = self.comm + + elif run_parallel: if not output_class.supports_parallel_write: raise ValueError( f"Tried to open file for parallel output, but not" From d8052c8dd4df6608e71138e3149da9a1a2548c87 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Fri, 8 May 2026 15:55:24 +0100 Subject: [PATCH 7/8] Parallelize TXLSSDensityNullTests --- txpipe/lssweights.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/txpipe/lssweights.py b/txpipe/lssweights.py index 9eb591f20..cf0468abb 100644 --- a/txpipe/lssweights.py +++ b/txpipe/lssweights.py @@ -349,7 +349,7 @@ class TXLSSDensityNullTests(TXLSSDensityBase): """ name = "TXLSSDensityNullTests" - parallel = False + parallel = True inputs = [ ( "binned_lens_catalog_unweighted", @@ -410,16 +410,16 @@ def run(self): self.Ntomo = f["lens"].attrs["nbin_lens"] # output directory for the plots and summary stats - output_dir = self.open_output("lss_density_plots", wrapper=True) + # open in parallel so that each process can write to it as needed. + output_dir = self.open_output("lss_density_plots", wrapper=True, parallel=True) - # open outdir density correlation file - dens_output = self.open_output("unweighted_density_correlation", wrapper=False) # load the SP maps, apply the mask, normalize the maps (as needed by the method) self.sys_maps, self.sys_names, self.sys_meta = self.prepare_sys_maps() - for ibin in range(self.Ntomo): - print("Computed density correlations for lens bin {0}/{1}".format(ibin + 1, self.Ntomo)) + results = [] + for ibin in self.split_tasks_by_rank(range(self.Ntomo)): + print("Computing density correlations for lens bin {0}/{1}".format(ibin + 1, self.Ntomo)) # compute density vs SP map data vector density_corrs = self.calculate_1d_density_correlations(ibin) @@ -429,10 +429,20 @@ def run(self): # Add the null model n_dens/ = 1 to the # DensityCorrelation object and compute its chi2 density_corrs.add_null_model() - - # make summary stats and plots - self.summarize_density(output_dir, dens_output, density_corrs) - dens_output.close() + results.append(density_corrs) + + # gather all results on root process + if self.comm is not None: + results = self.comm.gather(results, root=0) + + if self.rank == 0: + # flatten list of lists + results = [result for sublist in results for result in sublist] + # open outdir density correlation file. + # only the root process does any writing. + with self.open_output("unweighted_density_correlation", wrapper=False) as dens_output: + for density_corrs in results: + self.summarize_density(output_dir, dens_output, density_corrs) def summarize_density(self, output_dir, dens_output, density_correlation): """ From 73e11df30a00918793a5b14cd90adbf428d3606e Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Fri, 8 May 2026 18:49:39 +0100 Subject: [PATCH 8/8] speed up with do_grid_hist and temporary pipeline changes --- examples/lssweights/des-y1/config_binned.yml | 6 ++--- .../lssweights/des-y1/pipeline_binned.yml | 24 +++++++++---------- txpipe/lsstools/lsstools.py | 2 ++ 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/examples/lssweights/des-y1/config_binned.yml b/examples/lssweights/des-y1/config_binned.yml index f9724c0d2..dd88cd3d8 100755 --- a/examples/lssweights/des-y1/config_binned.yml +++ b/examples/lssweights/des-y1/config_binned.yml @@ -11,7 +11,7 @@ global: sparse: True # Generate sparse maps - faster if using small areas TXLSSDensityNullTests: - supreme_path_root: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/spmaps/ + supreme_path_root: data/desy1//spmaps/ outlier_fraction: 0.01 # Will remove this fraction of the maps is most extreme regions when computing 1d trends nbin: 10 # number of bins per SP map to compute 1d trends simple_cov: True # If True will use diagonal shot noise covariance @@ -19,7 +19,7 @@ TXLSSDensityNullTests: b0: [1.4, 1.6, 1.6, 1.93, 1.98] TXLSSWeightsLinBinned: - supreme_path_root: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/spmaps/ + supreme_path_root: data/desy1//spmaps/ outlier_fraction: 0.01 # Will remove this fraction of the maps is most extreme regions when computing 1d trends nbin: 10 # number of bins per SP map to compute 1d trends pvalue_threshold: 0.05 # max p-value for maps to be corrected (0 is no maps, 1 is all maps) @@ -30,7 +30,7 @@ TXTwoPoint: do_pos_pos: True do_shear_shear: False do_shear_pos: False - flip_g2: True # use true when using metacal shears + flip_g2: True # use true when using metacal sh ears min_sep: 2.5 max_sep: 250.0 nbins: 20 diff --git a/examples/lssweights/des-y1/pipeline_binned.yml b/examples/lssweights/des-y1/pipeline_binned.yml index 9dc5a052c..d6c962c8a 100755 --- a/examples/lssweights/des-y1/pipeline_binned.yml +++ b/examples/lssweights/des-y1/pipeline_binned.yml @@ -23,7 +23,7 @@ stages: modules: txpipe # Where to put outputs -output_dir: examples/lssweights/des-y1/output_binned/ +output_dir: data/desy1/output # How to run the pipeline: mini, parsl, or cwl launcher: @@ -32,20 +32,20 @@ launcher: # Where to run the pipeline: cori-interactive, cori-batch, or local site: - name: nersc-interactive + name: local # configuration settings -config: examples/lssweights/des-y1//config_binned.yml +config: examples/lssweights/des-y1/config_binned.yml inputs: # See README for paths to download these files - photometry_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/photometry_catalog_desy1_RM.h5 - lens_tomography_catalog_unweighted: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/lens_tomography_catalog_desy1_RM_unweighted.h5 - random_cats: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/randoms_desy1_RM.hdf5 - binned_random_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/binned_randoms_desy1_RM.hdf5 - binned_random_catalog_sub: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/binned_randoms_desy1_RM.hdf5 - mask: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/mask_desy1_4096.h5 - lens_photoz_stack: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/lens_photoz_stack.hdf5 + photometry_catalog: data/desy1/photometry_catalog_desy1_RM.h5 + lens_tomography_catalog_unweighted: data/desy1/lens_tomography_catalog_desy1_RM_unweighted.h5 + random_cats: data/desy1/randoms_desy1_RM.hdf5 + binned_random_catalog: data/desy1/binned_randoms_desy1_RM.hdf5 + binned_random_catalog_sub: data/desy1/binned_randoms_desy1_RM.hdf5 + mask: data/desy1/mask_desy1_4096.h5 + lens_photoz_stack: data/desy1/lens_photoz_stack.hdf5 shear_catalog: none shear_tomography_catalog: none binned_shear_catalog: none @@ -62,6 +62,6 @@ inputs: # if interrupted resume: False # where to put output logs for individual stages -log_dir: examples/lssweights/des-y1/output_binned/logs/ +log_dir: lssweights_logs # where to put an overall parsl pipeline log -pipeline_log: examples/lssweights/des-y1/output_binned/log.txt +pipeline_log: ./log.txt diff --git a/txpipe/lsstools/lsstools.py b/txpipe/lsstools/lsstools.py index 0c5af02f4..43b736287 100644 --- a/txpipe/lsstools/lsstools.py +++ b/txpipe/lsstools/lsstools.py @@ -552,6 +552,7 @@ def linear_model( sysmap_table=None, map_index=None, frac=None, + do_grid_hist=True, ): """ linear contamination model: @@ -605,6 +606,7 @@ def linear_model( map_input=True, use_precompute=True, frac=frac_vals, + do_grid_hist=do_grid_hist, ) return F, F_density_corrs