diff --git a/examples/2.2i/config.yml b/examples/2.2i/config.yml index 206d812be..b6ca656c7 100644 --- a/examples/2.2i/config.yml +++ b/examples/2.2i/config.yml @@ -66,6 +66,10 @@ TXSourceSelector: r_i_cut: 2.0 # may also need one for r_cpar_cut +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.3, 0.5, 0.7, 0.9, 2.0] + TXRandomCat: chunk_rows: 100000 density: 10 # gals per sq arcmin diff --git a/examples/2.2i/pipeline.yml b/examples/2.2i/pipeline.yml index f9c9e9d4e..c66c958fe 100644 --- a/examples/2.2i/pipeline.yml +++ b/examples/2.2i/pipeline.yml @@ -27,6 +27,8 @@ stages: nprocess: 32 - name: TXSourceSelector nprocess: 32 + - name: TXSourceTomography + threads_per_process: 4 - name: TXShearCalibration nodes: 1 nprocess: 8 diff --git a/examples/2.2i/pipeline_1tract.yml b/examples/2.2i/pipeline_1tract.yml index f008bc8dc..4b9f7472f 100644 --- a/examples/2.2i/pipeline_1tract.yml +++ b/examples/2.2i/pipeline_1tract.yml @@ -24,6 +24,7 @@ stages: - name: TXSourceNoiseMaps - name: TXLensNoiseMaps - name: TXSourceSelector + - name: TXSourceTomography - name: PZRailEstimateSource - name: PZRailEstimateLensFromSource - name: TXShearCalibration diff --git a/examples/cosmodc2/Cluster_pipelines/config-1deg2-CL.yml b/examples/cosmodc2/Cluster_pipelines/config-1deg2-CL.yml index a8d00e88b..baf897a8b 100644 --- a/examples/cosmodc2/Cluster_pipelines/config-1deg2-CL.yml +++ b/examples/cosmodc2/Cluster_pipelines/config-1deg2-CL.yml @@ -11,6 +11,10 @@ TXSourceSelectorMetadetect: shear_prefix: '' +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.1, 3.0] + Inform_BPZ_lite: zmin: 0.0 zmax: 3.0 diff --git a/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml b/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml index 102f42257..8977a157b 100644 --- a/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml +++ b/examples/cosmodc2/Cluster_pipelines/config-20deg2-CL.yml @@ -10,6 +10,10 @@ TXSourceSelectorMetadetect: true_z: false shear_prefix: '' +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.1, 3.0] + BPZliteInformer: zmin: 0.0 zmax: 3.0 diff --git a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml index a10e3a3f1..f27532b12 100644 --- a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml +++ b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-in2p3.yml @@ -23,6 +23,8 @@ python_paths: [] stages: - name: TXSourceSelectorMetadetect nprocess: 30 + - name: TXSourceTomography + threads_per_process: 4 - name: BPZliteInformer nprocess: 1 aliases: diff --git a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-laptop.yml b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-laptop.yml index 906896c51..cfb806780 100644 --- a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-laptop.yml +++ b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-laptop.yml @@ -23,6 +23,8 @@ python_paths: [] stages: - name: TXSourceSelectorMetadetect nprocess: 6 + - name: TXSourceTomography + threads_per_process: 4 - name: BPZliteInformer nprocess: 1 aliases: diff --git a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml index eed905b10..7db9b7dc0 100644 --- a/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml +++ b/examples/cosmodc2/Cluster_pipelines/pipeline-20deg2-CL-nersc.yml @@ -22,6 +22,8 @@ python_paths: [] stages: - name: TXSourceSelectorMetadetect nprocess: 30 + - name: TXSourceTomography + threads_per_process: 4 - name: BPZliteInformer nprocess: 1 aliases: diff --git a/examples/cosmodc2/config-20deg2.yml b/examples/cosmodc2/config-20deg2.yml index 4c3d6185e..660d0318e 100644 --- a/examples/cosmodc2/config-20deg2.yml +++ b/examples/cosmodc2/config-20deg2.yml @@ -116,6 +116,10 @@ TXSourceSelectorMetadetect: true_z: false shear_prefix: '' +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.19285902, 0.40831394, 0.65503818, 0.94499109, 1.2947086, 1.72779632, 2.27855242, 3.] # 7 bins + TXDiagnosticQuantiles: nbins: 20 psf_prefix: 00/mcal_psf_ diff --git a/examples/cosmodc2/config.yml b/examples/cosmodc2/config.yml index c4fc76644..794726fa9 100644 --- a/examples/cosmodc2/config.yml +++ b/examples/cosmodc2/config.yml @@ -102,6 +102,10 @@ TXSourceSelectorMetadetect: shear_prefix: '' +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.19285902, 0.40831394, 0.65503818, 0.94499109, 1.2947086, 1.72779632, 2.27855242, 3.] # 7 bins + TXRandomCat: density: 10 # gals per sq arcmin diff --git a/examples/cosmodc2/config_no_shape_noise.yml b/examples/cosmodc2/config_no_shape_noise.yml index 105a3906f..818cd8774 100644 --- a/examples/cosmodc2/config_no_shape_noise.yml +++ b/examples/cosmodc2/config_no_shape_noise.yml @@ -98,6 +98,10 @@ TXSourceSelector: true_z: False shear_prefix: mcal_ +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.19285902, 0.40831394, 0.65503818, 0.94499109, 1.2947086, 1.72779632, 2.27855242, 3. ] # 7 bins + TXRandomCat: chunk_rows: 100000 density: 10 # gals per sq arcmin diff --git a/examples/cosmodc2/ingest-small-config.yml b/examples/cosmodc2/ingest-small-config.yml index b3d304759..83228c72c 100644 --- a/examples/cosmodc2/ingest-small-config.yml +++ b/examples/cosmodc2/ingest-small-config.yml @@ -107,6 +107,10 @@ TXSourceSelectorMetadetect: shear_prefix: '' +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.19285902, 0.40831394, 0.65503818, 0.94499109, 1.2947086, 1.72779632, 2.27855242, 3. ] # 7 bins + TXRandomCat: chunk_rows: 100000 density: 10 # gals per sq arcmin diff --git a/examples/cosmodc2/pipeline-20deg2-laptop.yml b/examples/cosmodc2/pipeline-20deg2-laptop.yml index beadceebb..a2a41a4ce 100644 --- a/examples/cosmodc2/pipeline-20deg2-laptop.yml +++ b/examples/cosmodc2/pipeline-20deg2-laptop.yml @@ -16,6 +16,8 @@ stages: nprocess: 12 - name: TXSourceSelectorMetadetect nprocess: 12 + - name: TXSourceTomography + threads_per_process: 4 - name: TXLSSWeightsUnit - name: TXRandomForestLensSelector nprocess: 12 diff --git a/examples/cosmodc2/pipeline-20deg2-nersc.yml b/examples/cosmodc2/pipeline-20deg2-nersc.yml index 2c2f5efad..b68e755e8 100644 --- a/examples/cosmodc2/pipeline-20deg2-nersc.yml +++ b/examples/cosmodc2/pipeline-20deg2-nersc.yml @@ -16,6 +16,8 @@ stages: nprocess: 32 - name: TXSourceSelectorMetadetect nprocess: 32 + - name: TXSourceTomography + threads_per_process: 4 - name: TXLSSWeightsUnit - name: TXRandomForestLensSelector nprocess: 32 diff --git a/examples/cosmodc2/pipeline-pz.yml b/examples/cosmodc2/pipeline-pz.yml index a2b0db0bf..f1ece0370 100644 --- a/examples/cosmodc2/pipeline-pz.yml +++ b/examples/cosmodc2/pipeline-pz.yml @@ -32,6 +32,8 @@ stages: output: spectroscopic_catalog - name: TXSourceSelectorMetacal nprocess: 32 + - name: TXSourceTomography + threads_per_process: 4 - name: TXShearCalibration nprocess: 8 - name: PZPrepareEstimatorLens # Prepare the p(z) estimator diff --git a/examples/cosmodc2/pipeline.yml b/examples/cosmodc2/pipeline.yml index d12512ca8..b62b98cb3 100644 --- a/examples/cosmodc2/pipeline.yml +++ b/examples/cosmodc2/pipeline.yml @@ -32,6 +32,8 @@ stages: output: spectroscopic_catalog - name: TXSourceSelectorMetacal nprocess: 32 + - name: TXSourceTomography + threads_per_process: 4 - name: TXShearCalibration nprocess: 32 - name: PZPrepareEstimatorLens # Prepare the p(z) estimator diff --git a/examples/cosmodc2/pipeline_redmagic.yml b/examples/cosmodc2/pipeline_redmagic.yml index 7d0258ee8..8def16c45 100644 --- a/examples/cosmodc2/pipeline_redmagic.yml +++ b/examples/cosmodc2/pipeline_redmagic.yml @@ -17,6 +17,8 @@ stages: - name: TXSourceSelectorMetacal nprocess: 64 nodes: 2 + - name: TXSourceTomography + threads_per_process: 4 - name: TXShearCalibration nprocess: 7 nodes: 1 diff --git a/examples/cosmodc2/pipeline_redmagic_full.yml b/examples/cosmodc2/pipeline_redmagic_full.yml index d878a8ac6..cc733e1a7 100644 --- a/examples/cosmodc2/pipeline_redmagic_full.yml +++ b/examples/cosmodc2/pipeline_redmagic_full.yml @@ -22,6 +22,8 @@ stages: - name: TXSourceSelectorMetadetect nprocess: 64 nodes: 2 + - name: TXSourceTomography + threads_per_process: 4 - name: TXShearCalibration nprocess: 7 nodes: 1 diff --git a/examples/cosmodc2/pipeline_redmagic_no_shape_noise.yml b/examples/cosmodc2/pipeline_redmagic_no_shape_noise.yml index 83eaf9176..f793ef643 100644 --- a/examples/cosmodc2/pipeline_redmagic_no_shape_noise.yml +++ b/examples/cosmodc2/pipeline_redmagic_no_shape_noise.yml @@ -17,6 +17,8 @@ stages: - name: TXSourceSelector nprocess: 64 nodes: 3 + - name: TXSourceTomography + threads_per_process: 4 - name: TXShearCalibration nprocess: 7 nodes: 1 diff --git a/examples/cosmodc2/pipeline_redmagic_no_shape_noise_full.yml b/examples/cosmodc2/pipeline_redmagic_no_shape_noise_full.yml index 07edbcbc5..a326a8596 100644 --- a/examples/cosmodc2/pipeline_redmagic_no_shape_noise_full.yml +++ b/examples/cosmodc2/pipeline_redmagic_no_shape_noise_full.yml @@ -21,6 +21,8 @@ stages: - name: TXSourceSelector nprocess: 64 nodes: 2 + - name: TXSourceTomography + threads_per_process: 4 - name: TXSourceTrueNumberDensity - name: TXPhotozPlot - name: TXJackknifeCenters diff --git a/examples/desy1/config.yml b/examples/desy1/config.yml index d6ead5a21..265fa7e98 100644 --- a/examples/desy1/config.yml +++ b/examples/desy1/config.yml @@ -66,6 +66,10 @@ TXTruthLensCatalogSplitter: {} TXStarCatalogSplitter: {} TXPhotozPlot: {} +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.2, 0.43, 0.63, 0.90, 1.30] + TXSourceMaps: sparse: True # Generate sparse maps - faster if using small areas diff --git a/examples/desy1/pipeline.yml b/examples/desy1/pipeline.yml index 6933c79d0..c31dae0b6 100644 --- a/examples/desy1/pipeline.yml +++ b/examples/desy1/pipeline.yml @@ -102,6 +102,7 @@ inputs: # For the self-calibration extension we are not using Random_cat_source for now # So it is set to Null, so the yaml intepreter returns a None value to python. random_cats_source: Null + shear_tomography_classifier: none mask: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/mask_desy1.h5 #random_cats: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/randoms_desy1_RM.hdf5 #binned_random_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/binned_randoms_desy1_RM.hdf5 diff --git a/examples/desy3/config.yml b/examples/desy3/config.yml index 937905ab0..d4d77ce10 100644 --- a/examples/desy3/config.yml +++ b/examples/desy3/config.yml @@ -82,6 +82,10 @@ TXTruthLensCatalogSplitter: {} TXStarCatalogSplitter: {} TXPhotozPlots: {} +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.0, 0.2, 0.4, 0.6, 0.8] + TXSourceMaps: sparse: True # Generate sparse maps - faster if using small areas diff --git a/examples/desy3/pipeline.yml b/examples/desy3/pipeline.yml index 453afe66c..cdc7eb949 100644 --- a/examples/desy3/pipeline.yml +++ b/examples/desy3/pipeline.yml @@ -98,6 +98,7 @@ inputs: calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat fiducial_cosmology: data/fiducial_cosmology.yml random_cats_source: Null + shear_tomography_classifier: none flow: data/example/inputs/example_flow.pkl # if supported by the launcher, restart the pipeline where it left off diff --git a/examples/dp0.2/config.yml b/examples/dp0.2/config.yml index 3a9ee9ea2..de19507a8 100644 --- a/examples/dp0.2/config.yml +++ b/examples/dp0.2/config.yml @@ -84,6 +84,10 @@ TXSimpleMask: {} TXTracerMetadata: {} +TXSourceTomography: + bands: ugrizy + source_zbin_edges: [0.3, 0.5, 0.7, 0.9] + TXRandomCat: method: spherical_projection TXJackknifeCenters: diff --git a/examples/dp0.2/pipeline.yml b/examples/dp0.2/pipeline.yml index 4e7266b01..4b0608ecd 100644 --- a/examples/dp0.2/pipeline.yml +++ b/examples/dp0.2/pipeline.yml @@ -29,6 +29,8 @@ stages: - name: TXSourceSelectorHSC nprocess: 32 threads_per_process: 2 + - name: TXSourceTomography + threads_per_process: 4 - name: TXPhotozSourceStack nprocess: 32 threads_per_process: 2 diff --git a/examples/dp1/config.yml b/examples/dp1/config.yml index e21a4e810..017c1daca 100644 --- a/examples/dp1/config.yml +++ b/examples/dp1/config.yml @@ -69,10 +69,14 @@ TXSourceSelectorSimple: s2n_cut: 10.0 source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: '' + + +TXSourceTomography: + bands: ugrizy + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] spec_mag_column_format: "{band}_cModelMag" spec_redshift_column: redshift - TXRandomForestLensSelector: verbose: true bands: ugrizy diff --git a/examples/dp1/config_ww.yml b/examples/dp1/config_ww.yml index 465946bac..3d6287676 100644 --- a/examples/dp1/config_ww.yml +++ b/examples/dp1/config_ww.yml @@ -69,10 +69,14 @@ TXSourceSelectorSimple: s2n_cut: 10.0 source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: '' + + +TXSourceTomography: + bands: ugrizy + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] spec_mag_column_format: "{band}_cModelMag" spec_redshift_column: redshift - TXRandomForestLensSelector: verbose: true bands: ugrizy diff --git a/examples/dp1/pipeline-edfs.yml b/examples/dp1/pipeline-edfs.yml index 8f030e926..5dde814c4 100644 --- a/examples/dp1/pipeline-edfs.yml +++ b/examples/dp1/pipeline-edfs.yml @@ -23,6 +23,7 @@ stages: - name: TXSourceSelectorSimple nprocess: 1 threads_per_process: 1 + - name: TXSourceTomography - name: TXShearCalibration - name: TXLSSWeightsUnit - name: TXRandomForestLensSelector diff --git a/examples/dp1/pipeline.yml b/examples/dp1/pipeline.yml index e28e0a07c..e3756f7cc 100644 --- a/examples/dp1/pipeline.yml +++ b/examples/dp1/pipeline.yml @@ -41,6 +41,7 @@ stages: - name: TXSourceSelectorSimple nprocess: 1 threads_per_process: 1 + - name: TXSourceTomography - name: TXShearCalibration - name: TXLSSWeightsUnit - name: TXRandomForestLensSelector diff --git a/examples/dp1/pipeline_ww.yml b/examples/dp1/pipeline_ww.yml index 1f4878c09..2a6fc02a1 100644 --- a/examples/dp1/pipeline_ww.yml +++ b/examples/dp1/pipeline_ww.yml @@ -39,6 +39,7 @@ stages: - name: TXSourceSelectorSimple nprocess: 1 threads_per_process: 1 + - name: TXSourceTomography - name: TXShearCalibration - name: TXLSSDensityNullTests #- name: TXLSSWeightsUnit diff --git a/examples/ext_cross_corr/config_1deg.yml b/examples/ext_cross_corr/config_1deg.yml index c3c2f13c3..4277afe2f 100755 --- a/examples/ext_cross_corr/config_1deg.yml +++ b/examples/ext_cross_corr/config_1deg.yml @@ -175,6 +175,10 @@ TXSourceSelectorMetadetect: shear_prefix: "" true_z: False +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + TXTruthLensSelector: # Mag cuts input_pz: False diff --git a/examples/ext_cross_corr/pipeline_1deg.yml b/examples/ext_cross_corr/pipeline_1deg.yml index 53aed427f..e5521e2dd 100755 --- a/examples/ext_cross_corr/pipeline_1deg.yml +++ b/examples/ext_cross_corr/pipeline_1deg.yml @@ -20,6 +20,7 @@ stages: - name: PZRailSummarizeSource # Run the DIR method on the lens sample to find n(z) classname: PZRailSummarize - name: TXSourceSelectorMetadetect # select and split objects into source bins + - name: TXSourceTomography - name: NZDirInformerSource # Prepare the DIR method inputs for the source sample classname: NZDirInformer - name: TXShearCalibration # Calibrate and split the source sample tomographically diff --git a/examples/gaussian_sims/config.yml b/examples/gaussian_sims/config.yml index d60e8bc2d..29d93e165 100644 --- a/examples/gaussian_sims/config.yml +++ b/examples/gaussian_sims/config.yml @@ -78,6 +78,10 @@ TXSourceSelectorMetadetect: true_z: True shear_prefix: '' +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.19285902, 0.40831394, 0.65503818, 0.94499109, 1.2947086, 1.72779632] # 5 bins + TXRandomCat: chunk_rows: 10000000 density: 3 # gals per sq arcmin diff --git a/examples/gaussian_sims/pipeline_gaussian_sims.yml b/examples/gaussian_sims/pipeline_gaussian_sims.yml index 5cc06e975..746661f1b 100644 --- a/examples/gaussian_sims/pipeline_gaussian_sims.yml +++ b/examples/gaussian_sims/pipeline_gaussian_sims.yml @@ -22,6 +22,8 @@ stages: - name: TXSourceSelectorMetadetect nprocess: 64 nodes: 2 + - name: TXSourceTomography + threads_per_process: 4 - name: TXShearCalibration nprocess: 7 nodes: 1 diff --git a/examples/hscy1/config.yml b/examples/hscy1/config.yml index 00cd143eb..79098fee6 100644 --- a/examples/hscy1/config.yml +++ b/examples/hscy1/config.yml @@ -79,6 +79,10 @@ TXTruthLensCatalogSplitter: {} TXStarCatalogSplitter: {} TXPhotozPlots: {} +TXSourceTomography: + bands: i #used for selection + source_zbin_edges: [0.3, 0.6, 0.9, 1.2, 1.5] + TXSourceMaps: sparse: True # Generate sparse maps - faster if using small areas diff --git a/examples/hscy1/hsc_pipeline.yaml b/examples/hscy1/hsc_pipeline.yaml index 57df2466a..7b2f0673c 100644 --- a/examples/hscy1/hsc_pipeline.yaml +++ b/examples/hscy1/hsc_pipeline.yaml @@ -52,6 +52,9 @@ inputs: # For the self-calibration extension we are not using Random_cat_source for now # So it is set to Null, so the yaml intepreter returns a None value to python. random_cats_source: Null + # this is required by the source selector but not used because we + # use the photo-z values for selection instead of the random forest classifier. + shear_tomography_classifier: none # if supported by the launcher, restart the pipeline where it left off # if interrupted diff --git a/examples/hscy3/config.yml b/examples/hscy3/config.yml index c66abfcce..f2e31e423 100644 --- a/examples/hscy3/config.yml +++ b/examples/hscy3/config.yml @@ -146,6 +146,10 @@ TXSourceSelectorMetacal: shear_prefix: mcal_ use_diagonal_response: true +TXSourceTomography: + bands: i #used for selection + source_zbin_edges: [0.5, 1.5, 2.5, 3.5, 4.5] + TXShearCalibration: use_true_shear: false chunk_rows: 100000 diff --git a/examples/hscy3/hsc_pipeline.yaml b/examples/hscy3/hsc_pipeline.yaml index 4e0f0c1e5..99a45617b 100644 --- a/examples/hscy3/hsc_pipeline.yaml +++ b/examples/hscy3/hsc_pipeline.yaml @@ -116,6 +116,7 @@ inputs: star_catalog: /global/cfs/cdirs/lsst/groups/WL/users/yomori/repo/nulltests_txpipe/hscy1/star_catalog_hscy1_allfields.h5 fiducial_cosmology: data/fiducial_cosmology.yml random_cats_source: + shear_tomography_classifier: none random_cats: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/hsc-y3/shear/random_cats.hdf5 binned_random_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/hsc-y3/shear/random_cats.hdf5 diff --git a/examples/kids-1000/config.yml b/examples/kids-1000/config.yml index cfe3ffd62..86f369149 100644 --- a/examples/kids-1000/config.yml +++ b/examples/kids-1000/config.yml @@ -81,6 +81,10 @@ TXTruthLensCatalogSplitter: {} TXStarCatalogSplitter: {} TXPhotozPlots: {} +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.2, 0.43, 0.63, 0.90, 1.30] + TXSourceMaps: sparse: True # Generate sparse maps - faster if using small areas diff --git a/examples/kids-1000/pipeline.yml b/examples/kids-1000/pipeline.yml index 2414aa4af..6b1a13859 100644 --- a/examples/kids-1000/pipeline.yml +++ b/examples/kids-1000/pipeline.yml @@ -96,6 +96,7 @@ inputs: calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat fiducial_cosmology: data/fiducial_cosmology.yml random_cats_source: Null + shear_tomography_classifier: none flow: data/example/inputs/example_flow.pkl # if supported by the launcher, restart the pipeline where it left off diff --git a/examples/lensfit/config.yml b/examples/lensfit/config.yml index 60a90165b..78cceedbb 100644 --- a/examples/lensfit/config.yml +++ b/examples/lensfit/config.yml @@ -78,6 +78,10 @@ TXSourceSelectorLensfit: shear_prefix: '' input_m_is_weighted: true +TXSourceTomography: + bands: ri #used for selection + source_zbin_edges: [0.1, 0.5, 1.0, 2.0] + PZPrepareEstimatorLens: name: PZPrepareEstimatorLens classname: BPZliteInformer diff --git a/examples/lensfit/pipeline.yml b/examples/lensfit/pipeline.yml index f476e0884..920c56e72 100644 --- a/examples/lensfit/pipeline.yml +++ b/examples/lensfit/pipeline.yml @@ -134,6 +134,7 @@ inputs: # This is a workaround to make an input to a ceci stage optional; # we just have the stage check for the string "none" as an input name. # We should be able to do this more cleanly but have not figure it out yet. + shear_tomography_classifier: none none: none # if supported by the launcher, restart the pipeline where it left off diff --git a/examples/metacal/config.yml b/examples/metacal/config.yml index d7d4aa00c..7d9c99030 100644 --- a/examples/metacal/config.yml +++ b/examples/metacal/config.yml @@ -137,6 +137,11 @@ TXSourceSelectorMetacal: delta_gamma: 0.02 source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: mcal_ + +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + TXTruthLensSelector: # Mag cuts input_pz: false diff --git a/examples/metacal/pipeline.yml b/examples/metacal/pipeline.yml index 266fc688f..6840254ba 100644 --- a/examples/metacal/pipeline.yml +++ b/examples/metacal/pipeline.yml @@ -47,6 +47,7 @@ stages: photoz_stack: shear_photoz_stack photoz_realizations: source_photoz_realizations - name: TXSourceSelectorMetacal # select and split objects into source bins + - name: TXSourceTomography - name: NZDirInformerSource # Prepare the DIR method inputs for the source sample classname: NZDirInformer aliases: diff --git a/examples/metadetect/config.yml b/examples/metadetect/config.yml index 187941bf7..25b54f3d8 100755 --- a/examples/metadetect/config.yml +++ b/examples/metadetect/config.yml @@ -138,6 +138,12 @@ TXSourceSelectorMetadetect: delta_gamma: 0.02 source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: '' + psf_prefix: psf_ + +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + TXTruthLensSelector: # Mag cuts input_pz: false @@ -210,12 +216,12 @@ TXBlinding: delete_unblinded: true TXDiagnosticQuantiles: - psf_prefix: 00/mcal_psf_ + psf_prefix: 00/psf_ shear_prefix: 00/ nbins: 20 TXSourceDiagnosticPlots: - psf_prefix: 00/mcal_psf_ + psf_prefix: "" shear_prefix: 00/ nbins: 20 g_min: -0.01 @@ -240,7 +246,7 @@ TXDiagnosticMaps: npix_x: 60 npix_y: 60 depth_cut: 23.0 - psf_prefix: mcal_psf_ + psf_prefix: psf_ TXSourceMaps: sparse: true # Generate sparse maps - faster if using small areas diff --git a/examples/metadetect/pipeline.yml b/examples/metadetect/pipeline.yml index 41b817eca..8b84a3433 100644 --- a/examples/metadetect/pipeline.yml +++ b/examples/metadetect/pipeline.yml @@ -47,6 +47,7 @@ stages: photoz_stack: shear_photoz_stack photoz_realizations: source_photoz_realizations - name: TXSourceSelectorMetadetect # select and split objects into source bins + - name: TXSourceTomography - name: NZDirInformerSource # Prepare the DIR method inputs for the source sample classname: NZDirInformer aliases: @@ -148,7 +149,7 @@ python_paths: - submodules/pyfsb # Where to put outputs -output_dir: data/example/outputs_metadetect +output_dir: data/cosmodc2-1deg2/outputs # How to run the pipeline: mini, parsl, or cwl launcher: @@ -168,8 +169,8 @@ config: examples/metadetect/config.yml inputs: # See README for paths to download these files - shear_catalog: data/example/inputs/metadetect_shear_catalog.hdf5 - photometry_catalog: data/example/inputs/photometry_catalog.hdf5 + shear_catalog: data/cosmodc2-1deg2/inputs/shear_catalog.hdf5 + photometry_catalog: data/cosmodc2-1deg2/inputs/photometry_catalog.hdf5 exposures: data/example/inputs/exposures.hdf5 star_catalog: data/example/inputs/star_catalog.hdf5 # This file comes with the code diff --git a/examples/metadetect_source_only/config.yml b/examples/metadetect_source_only/config.yml index 5501835f6..f34dbefe9 100755 --- a/examples/metadetect_source_only/config.yml +++ b/examples/metadetect_source_only/config.yml @@ -76,6 +76,11 @@ TXSourceSelectorMetadetect: delta_gamma: 0.02 source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: '' + +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + TXRandomCat: density: 10 # gals per sq arcmin diff --git a/examples/metadetect_source_only/pipeline.yml b/examples/metadetect_source_only/pipeline.yml index 2e4056033..cb3f2edf9 100755 --- a/examples/metadetect_source_only/pipeline.yml +++ b/examples/metadetect_source_only/pipeline.yml @@ -21,6 +21,7 @@ stages: photoz_stack: shear_photoz_stack photoz_realizations: source_photoz_realizations - name: TXSourceSelectorMetadetect # select and split objects into source bins + - name: TXSourceTomography - name: NZDirInformerSource # Prepare the DIR method inputs for the source sample classname: NZDirInformer aliases: diff --git a/examples/mock_shear/config.yml b/examples/mock_shear/config.yml index 0d22c398c..e881ee254 100644 --- a/examples/mock_shear/config.yml +++ b/examples/mock_shear/config.yml @@ -17,6 +17,10 @@ TXSourceSelectorSimple: shear_prefix: '' verbose: true +TXSourceTomography: + bands: riz + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + TXMockTruthPZ: mock_sigma_z: 0.001 diff --git a/examples/mock_shear/pipeline.yml b/examples/mock_shear/pipeline.yml index 362625edf..1df4bb2c7 100644 --- a/examples/mock_shear/pipeline.yml +++ b/examples/mock_shear/pipeline.yml @@ -2,6 +2,7 @@ stages: - name: TXSimpleMock # Convert a text file mock catalog to HDF5 - name: TXSourceSelectorSimple # select and split objects into source bins + - name: TXSourceTomography - name: TXShearCalibration # Calibrate and split the source sample tomographically - name: TXMockTruthPZ # Generate PDFs as narrow gaussian centered on the true redshifts aliases: diff --git a/examples/redmagic/config.yml b/examples/redmagic/config.yml index 3c1b6eae7..b37f90c4f 100644 --- a/examples/redmagic/config.yml +++ b/examples/redmagic/config.yml @@ -81,6 +81,10 @@ TXSourceSelectorMetacal: source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] shear_prefix: mcal_ +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.5, 0.7, 0.9, 1.1, 2.0] + TXTruthLensSelector: # Mag cuts input_pz: false diff --git a/examples/redmagic/pipeline.yml b/examples/redmagic/pipeline.yml index ce36efea7..0f59684dd 100644 --- a/examples/redmagic/pipeline.yml +++ b/examples/redmagic/pipeline.yml @@ -1,6 +1,7 @@ # Stages to run stages: - name: TXSourceSelectorMetacal + - name: TXSourceTomography - name: TXShearCalibration - name: TXExternalLensCatalogSplitter - name: TXStarCatalogSplitter diff --git a/examples/skysim/config.yml b/examples/skysim/config.yml index d3b8e09da..a86e19987 100644 --- a/examples/skysim/config.yml +++ b/examples/skysim/config.yml @@ -86,6 +86,10 @@ TXSourceSelectorMetacal: true_z: false shear_prefix: mcal_ +TXSourceTomography: + bands: riz #used for selection + source_zbin_edges: [0.3, 0.55, 0.8, 1.05, 2.0] + TXRandomCat: density: 10 # gals per sq arcmin diff --git a/examples/skysim/pipeline.yml b/examples/skysim/pipeline.yml index 895583bf5..3331794e7 100644 --- a/examples/skysim/pipeline.yml +++ b/examples/skysim/pipeline.yml @@ -16,6 +16,8 @@ stages: - name: TXSourceSelectorMetacal nodes: 8 nprocess: 256 + - name: TXSourceTomography + threads_per_process: 4 - name: TXTruthLensSelector nodes: 8 nprocess: 256 diff --git a/examples/star-challenge/config.yml b/examples/star-challenge/config.yml index e36a0a165..03f3ba015 100644 --- a/examples/star-challenge/config.yml +++ b/examples/star-challenge/config.yml @@ -9,9 +9,13 @@ TXSourceSelectorMetadetect: # found approximately to split data into equal number counts # for selected objects source_zbin_edges: [0.0, 0.34, 0.53, 0.71, 0.96, 3.0] - random_seed: 6765675 shear_prefix: '' +TXSourceTomography: + bands: riz + source_zbin_edges: [0.0, 0.34, 0.53, 0.71, 0.96, 3.0] + random_seed: 6765675 + TXRandomForestLensSelector: verbose: False bands: ugrizy diff --git a/examples/star-challenge/config_full.yml b/examples/star-challenge/config_full.yml index 65d4c4de9..37145d313 100644 --- a/examples/star-challenge/config_full.yml +++ b/examples/star-challenge/config_full.yml @@ -9,9 +9,13 @@ TXSourceSelectorMetadetect: # found approximately to split data into equal number counts # for selected objects source_zbin_edges: [0.0, 0.34, 0.53, 0.71, 0.96, 3.0] - random_seed: 6765675 shear_prefix: '' +TXSourceTomography: + bands: riz + source_zbin_edges: [0.0, 0.34, 0.53, 0.71, 0.96, 3.0] + random_seed: 6765675 + TXRandomForestLensSelector: verbose: False bands: ugrizy diff --git a/txpipe/diagnostics.py b/txpipe/diagnostics.py index 8db7600cd..bfeb4c04e 100644 --- a/txpipe/diagnostics.py +++ b/txpipe/diagnostics.py @@ -223,9 +223,9 @@ def run(self): "g1", "g2", "T", - "mcal_psf_g1", - "mcal_psf_g2", - "mcal_psf_T_mean", + f"{psf_prefix}psf_g1", + f"{psf_prefix}psf_g2", + f"{psf_prefix}psf_T_mean", "s2n", "weight", ) @@ -244,7 +244,10 @@ def run(self): "m", ] + [f"{shear_prefix}mag_{b}" for b in self.config["bands"]] - shear_tomo_cols = ["bin"] + if self.config["shear_catalog_type"] == 'metadetect': + shear_tomo_cols = ["bin_00", "bin_1p", "bin_2p", "bin_1m", "bin_2m"] + else: + shear_tomo_cols = ["bin"] if self.config["shear_catalog_type"] == "metacal": more_iters = ["shear_tomography_catalog", "response", ["R_gamma"]] @@ -270,6 +273,14 @@ def run(self): # This causes each data = yield statement in each plotter to # be given this data chunk as the variable data. + if self.config["shear_catalog_type"] == "metadetect": + data['bin'] = data['bin_00'] + data['00/bin'] = data['bin_00'] + data['1p/bin'] = data['bin_1p'] + data['2p/bin'] = data['bin_2p'] + data['1m/bin'] = data['bin_1m'] + data['2m/bin'] = data['bin_2m'] + for plotter in plotters: plotter.send(data) @@ -302,15 +313,16 @@ def plot_psf_shear(self): psf_g_edges = self.get_bin_edges("psf_g1") + p1 = MeanShearInBins( - f"{psf_prefix}g1", + f"psf_g1", psf_g_edges, delta_gamma, cut_source_bin=True, shear_catalog_type=self.config["shear_catalog_type"], ) p2 = MeanShearInBins( - f"{psf_prefix}g2", + f"psf_g2", psf_g_edges, delta_gamma, cut_source_bin=True, @@ -324,6 +336,7 @@ def plot_psf_shear(self): if data is None: break + p1.add_data(data) p2.add_data(data) @@ -402,7 +415,7 @@ def plot_psf_size_shear(self): psf_T_edges = self.get_bin_edges("psf_T_mean") binnedShear = MeanShearInBins( - f"{psf_prefix}T_mean", + f"psf_T_mean", psf_T_edges, delta_gamma, cut_source_bin=True, @@ -466,7 +479,7 @@ def plot_snr_shear(self): # This class includes all the cutting and calibration, both for # estimator and selection biases binnedShear = MeanShearInBins( - f"{shear_prefix}s2n", + f"s2n", snr_edges, delta_gamma, cut_source_bin=True, @@ -528,7 +541,7 @@ def plot_size_shear(self): T_edges = self.get_bin_edges("T") binnedShear = MeanShearInBins( - f"{shear_prefix}T", + f"T", T_edges, delta_gamma, cut_source_bin=True, @@ -595,7 +608,7 @@ def plot_mag_shear(self): m_edges = self.get_bin_edges(f"{shear_prefix}mag_{band}") binnedShear[f"{band}"] = MeanShearInBins( - f"{shear_prefix}mag_{band}", + f"mag_{band}", m_edges, delta_gamma, cut_source_bin=True, @@ -717,27 +730,37 @@ def plot_g_histogram(self): if data is None: break - qual_cut = data["bin"] != -1 + if cat_type == "metacal": g1 = data["mcal_g1"] g2 = data["mcal_g2"] w = data["weight"] + bin_cut = data["bin"] >= 0 elif cat_type == "metadetect": g1 = data["00/g1"] g2 = data["00/g2"] w = data["00/weight"] + bin_cut = data["bin_00"] >= 0 elif cat_type == "lensfit": dec = data["dec"] g1 = data["g1"] g2 = data["g2"] w = data["weight"] + bin_cut = data["bin"] >= 0 else: g1 = data["g1"] g2 = data["g2"] c1 = data["c1"] c2 = data["c2"] w = data["weight"] + bin_cut = data["bin"] >= 0 + c1 = c1[bin_cut] + c2 = c2[bin_cut] + + g1 = g1[bin_cut] + g2 = g2[bin_cut] + w = w[bin_cut] if cat_type == "metacal" or cat_type == "metadetect": g1, g2 = cal.apply(g1, g2) @@ -807,13 +830,14 @@ def plot_snr_histogram(self): break qual_cut = data["bin"] != -1 + s2n = data['00/s2n'] if self.config["shear_catalog_type"] == "metadetect" else data[f"s2n"] - b1 = np.digitize(data[f"{shear_prefix}s2n"][qual_cut], edges) - 1 + b1 = np.digitize(s2n[qual_cut], edges) - 1 for i in range(bins): w = np.where(b1 == i) # Do more things here to establish - calc1.add_data(i, data[f"{shear_prefix}s2n"][qual_cut][w]) + calc1.add_data(i, s2n[qual_cut][w]) count1, mean1, var1 = calc1.collect(self.comm, mode="gather") if self.rank != 0: diff --git a/txpipe/ingest/__init__.py b/txpipe/ingest/__init__.py index 0b381c8d7..ab2f8ce71 100644 --- a/txpipe/ingest/__init__.py +++ b/txpipe/ingest/__init__.py @@ -1,5 +1,5 @@ from .dp02 import TXIngestDataPreview02 -from .mocks import TXCosmoDC2Mock, TXGaussianSimsMock +from .mocks import TXIngestRomanRubin, TXIngestSkySim, TXIngestCosmoDC2, TXIngestBuzzard, TXSimpleMock, TXMockTruthPZ from .gcr import TXMetacalGCRInput, TXIngestStars from .redmagic import TXIngestRedmagic from .ssi import ( diff --git a/txpipe/ingest/mock_tools.py b/txpipe/ingest/mock_tools.py new file mode 100644 index 000000000..6161bc20c --- /dev/null +++ b/txpipe/ingest/mock_tools.py @@ -0,0 +1,328 @@ +import numpy as np +from ..utils.conversion import mag_ab_to_nanojansky, nanojansky_to_mag_ab, nanojansky_err_to_mag_ab, mag_ab_err_to_nanojansky_err + + + +# These are quick, rough, and global numbers JZ measured on DES Y6 +# metadetect catalogs to use as defaults for response values +# The value d(parameter)/d(g_i) has the key (parameter, i) +desy6_responses = { + ("g1", 1): 0.7522786345399349, + ("g1", 2): -7.221603811297746e-05, + ("g2", 1): 0.000695986991087691, + ("g2", 2): 0.753689800584438, + ("s2n", 1): 0.4495460648463734, + ("s2n", 2): -0.2446177233672131, + ("flux_g", 1): 2.200867381810667, + ("flux_g", 2): -0.7594371207147788, + ("flux_r", 1): 6.4665622482152685, + ("flux_r", 2): -3.319059863883922, + ("flux_i", 1): 9.973907233552382, + ("flux_i", 2): -5.844535374410498, + ("flux_z", 1): 13.01874158668852, + ("flux_z", 2): -7.8327478646315285, + ("T", 1): 0.0002450368855461127, + ("T", 2): -0.0005327801992860426, +} + + +mock_psf_stats = { + # trace size in arcsec^2 + # corresponds to FWHM 0.75 arcsec + "T": (0.20287899012501048, 0.025), + "e1": (0.0, 0.03), + "e2": (0.0, 0.03), +} + +unit_responses = { + ("g1", 1): 1.0, + ("g1", 2): 0.0, + ("g2", 1): 0.0, + ("g2", 2): 1.0, + ("s2n", 1): 0.0, + ("s2n", 2): 0.0, + ("flux_g", 1): 0.0, + ("flux_g", 2): 0.0, + ("flux_r", 1): 0.0, + ("flux_r", 2): 0.0, + ("flux_i", 1): 0.0, + ("flux_i", 2): 0.0, + ("flux_z", 1): 0.0, + ("flux_z", 2): 0.0, + ("T", 1): 0.0, + ("T", 2): 0.0, +} + + + +def add_lsst_like_noise(data, random_state, year=1, **config): + from photerr import LsstErrorModel + import pandas as pd + + # LsstErrorModel is the newer of the photometry models + # and should be more representative of real data. + + # Find all bands available in the data + bands = [b for b in "ugrizy" if f"mag_{b}" in data] + + model = LsstErrorModel( + nYrObs=year, + **config + ) + + df = pd.DataFrame({b: data[f"mag_{b}"] for b in bands}) + noisy_data = model(df, random_state=random_state) + + # Keep all the input columns as expected. + + for b in bands: + mag = np.array(noisy_data[b].values) + mag_err = np.array(noisy_data[f"{b}_err"].values) + flux = mag_ab_to_nanojansky(mag) + flux_err = mag_ab_err_to_nanojansky_err(mag, mag_err) + snr = flux / flux_err + data[f"mag_{b}"] = mag + data[f"mag_err_{b}"] = mag_err + data[f"snr_{b}"] = snr + data[f"s2n_{b}"] = snr + + + + +def make_metadetect_catalog(data, response_type, delta_gamma, bands, rng, snr_cut=4): + output = {} + + # These parameters have response factors defined for them. + params = ["g1", "g2", "T"] + + # For these parameters we do not apply any response correction, + # they have the same value in the metadetect variants. + # Note that this will not be the case in real life - different + # sets of galaxies will be identified in each metadetect variant, + # and locations will change. + non_response_params = ["ra", "dec", "id", "redshift_true"] + + if response_type == "desy6": + responses = desy6_responses + elif response_type == "unit": + responses = unit_responses + else: + raise ValueError(f"Unknown response type {response_type}") + + + + for param in params: + output[f"00/{param}"] = data[param] + output[f"1p/{param}"] = data[param] + responses[param, 1] * delta_gamma / 2 + output[f"1m/{param}"] = data[param] - responses[param, 1] * delta_gamma / 2 + output[f"2p/{param}"] = data[param] + responses[param, 2] * delta_gamma / 2 + output[f"2m/{param}"] = data[param] - responses[param, 2] * delta_gamma / 2 + + for param in non_response_params: + output[f"00/{param}"] = data[param] + output[f"1p/{param}"] = data[param] + output[f"1m/{param}"] = data[param] + output[f"2p/{param}"] = data[param] + output[f"2m/{param}"] = data[param] + + s2n_total = 0 + s2n_total_1p = 0 + s2n_total_1m = 0 + s2n_total_2p = 0 + s2n_total_2m = 0 + + dg2 = delta_gamma / 2 + + for b in bands: + mag = data["mag_" + b] + mag_err = data["mag_err_" + b] + flux = mag_ab_to_nanojansky(mag) + flux_err = mag_ab_err_to_nanojansky_err(mag, mag_err) + + s2n = flux / flux_err + s2n_1p = s2n + responses["s2n", 1] * dg2 + s2n_1m = s2n - responses["s2n", 1] * dg2 + s2n_2p = s2n + responses["s2n", 2] * dg2 + s2n_2m = s2n - responses["s2n", 2] * dg2 + + s2n_total += s2n ** 2 + s2n_total_1p += s2n_1p ** 2 + s2n_total_1m += s2n_1m ** 2 + s2n_total_2p += s2n_2p ** 2 + s2n_total_2m += s2n_2m ** 2 + + flux_1p = flux + responses[(f"flux_{b}", 1)] * dg2 + flux_1m = flux - responses[(f"flux_{b}", 1)] * dg2 + flux_2p = flux + responses[(f"flux_{b}", 2)] * dg2 + flux_2m = flux - responses[(f"flux_{b}", 2)] * dg2 + # there are some zero fluxes for non-detections. We silence the warning + with np.errstate(divide = 'ignore'): + output[f"00/mag_{b}"] = nanojansky_to_mag_ab(flux) + output[f"1p/mag_{b}"] = nanojansky_to_mag_ab(flux_1p) + output[f"1m/mag_{b}"] = nanojansky_to_mag_ab(flux_1m) + output[f"2p/mag_{b}"] = nanojansky_to_mag_ab(flux_2p) + output[f"2m/mag_{b}"] = nanojansky_to_mag_ab(flux_2m) + + output[f"00/mag_err_{b}"] = nanojansky_err_to_mag_ab(flux, flux_err) + output[f"1p/mag_err_{b}"] = nanojansky_err_to_mag_ab(flux_1p, flux_err) + output[f"1m/mag_err_{b}"] = nanojansky_err_to_mag_ab(flux_1m, flux_err) + output[f"2p/mag_err_{b}"] = nanojansky_err_to_mag_ab(flux_2p, flux_err) + output[f"2m/mag_err_{b}"] = nanojansky_err_to_mag_ab(flux_2m, flux_err) + + + + output["00/s2n"] = np.sqrt(s2n_total) + output["1p/s2n"] = np.sqrt(s2n_total_1p) + output["1m/s2n"] = np.sqrt(s2n_total_1m) + output["2p/s2n"] = np.sqrt(s2n_total_2p) + output["2m/s2n"] = np.sqrt(s2n_total_2m) + + + psf_T, psf_e1, psf_e2 = make_mock_psf(output["00/ra"].size, rng) + + for variant in ["00", "1p", "1m", "2p", "2m"]: + output[f"{variant}/psf_T_mean"] = psf_T + output[f"{variant}/psf_g1"] = psf_e1 + output[f"{variant}/psf_g2"] = psf_e2 + + # Now we do the cuts on each variant separately + for variant in ["00", "1p", "1m", "2p", "2m"]: + snr = output[f"{variant}/s2n"] + mask = snr > snr_cut + # T cut is done later + for key in output: + if key.startswith(f"{variant}/"): + output[key] = output[key][mask] + n = output[f"{variant}/ra"].size + output[f"{variant}/weight"] = np.ones(n, dtype=np.float32) + output[f"{variant}/flags"] = np.zeros(n, dtype=np.int32) + + return output + + +def make_mock_psf(n, rng): + T = rng.normal(mock_psf_stats["T"][0], mock_psf_stats["T"][1], n) + e1 = rng.normal(mock_psf_stats["e1"][0], mock_psf_stats["e1"][1], n) + e2 = rng.normal(mock_psf_stats["e2"][0], mock_psf_stats["e2"][1], n) + return T, e1, e2 + + +def generate_mock_metacal_mag_responses(bands, nobj): + nband = len(bands) + mu = np.zeros(nband) # seems approx mean of response across bands, from HSC tract + rho = 0.25 # approx correlation between response in bands, from HSC tract + sigma2 = 1.7**2 # approx variance of response, from HSC tract + covmat = np.full((nband, nband), rho * sigma2) + np.fill_diagonal(covmat, sigma2) + mag_responses = np.random.multivariate_normal(mu, covmat, nobj).T + return mag_responses + + +class TableWriter: + def __init__(self, group, initial_size=1_000_000): + import h5py + self.group = group + self.created_datasets = False + self.initial_size = initial_size + self.keys = [] + self.current_size = {} + self.start = 0 + + def do_size_check(self, data): + sz = None + for key, value in data.items(): + if sz is None: + sz = len(value) + elif sz != len(value): + raise ValueError("All arrays to write must have the same length") + return sz + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.resize(self.start) + + def create_datasets(self, data): + size = max(self.initial_size, len(data[next(iter(data))])) + shape = (size,) + for key, value in data.items(): + maxshape = (None,) + self.group.create_dataset(key, shape, maxshape=maxshape, dtype=value.dtype) + self.current_size = size + self.created_datasets = True + self.keys = list(data.keys()) + + def resize(self, new_size): + for key in self.keys: + self.group[key].resize((new_size,)) + self.current_size = new_size + + def write(self, data): + sz = self.do_size_check(data) + + if not self.created_datasets: + self.create_datasets(data) + + end = self.start + sz + + if end > self.current_size: + new_size = max(self.current_size * 2, end) + self.resize(new_size) + + for key in self.keys: + value = data[key] + self.group[key][self.start:end] = value + self.start = end + + +class ShearTableWriter: + def __init__(self, filename, initial_size=1_000_000): + import h5py + self.file = h5py.File(filename, "w") + self.group = self.file.create_group("shear") + self.writers = {} + for variant in ["00", "1p", "1m", "2p", "2m"]: + group = self.file.create_group(f"shear/{variant}") + self.writers[variant] = TableWriter(group, initial_size=initial_size) + + def __enter__(self): + for writer in self.writers.values(): + writer.__enter__() + return self + + def __exit__(self, exc_type, exc_value, traceback): + for writer in self.writers.values(): + writer.__exit__(exc_type, exc_value, traceback) + self.file.close() + + def write(self, data): + for variant in self.writers: + tag = variant + "/" + data_variant = {key.removeprefix(tag): value for key, value in data.items() if key.startswith(tag)} + self.writers[variant].write(data_variant) + + + + + +def make_photo_cuts(data, bands, snr_cut): + # check if object is detected in any band + detected = np.zeros_like(data["id"], dtype=bool) + for b in bands: + snr = data[f"snr_{b}"] + detected |= (snr >= snr_cut) + + output = {} + for key, value in data.items(): + output[key] = value[detected] + return output + + + + +def half_light_radius_to_trace(hlr): + """Convert half-light radius to trace T of the Gaussian matrix""" + size_sigma = hlr / np.sqrt(2 * np.log(2)) + size_T = 2 * size_sigma**2 + return size_T diff --git a/txpipe/ingest/mocks.py b/txpipe/ingest/mocks.py index 39981b780..766e04609 100755 --- a/txpipe/ingest/mocks.py +++ b/txpipe/ingest/mocks.py @@ -1,64 +1,170 @@ +import glob +import numpy as np +from ..data_types import ShearCatalog, TextFile, QPPDFFile, PhotometryCatalog from ..base_stage import PipelineStage -from ..data_types import ShearCatalog, HDFFile, TextFile, QPPDFFile -from ..utils import band_variants, metacal_variants, metadetect_variants, Timer +from .mock_tools import add_lsst_like_noise, make_metadetect_catalog, TableWriter, ShearTableWriter, make_photo_cuts, half_light_radius_to_trace from ceci.config import StageParameter -import numpy as np - - -class TXCosmoDC2Mock(PipelineStage): - """ - Simulate mock shear and photometry measurements from CosmoDC2 (or similar) - - This stage simulates metacal data and metacalibrated - photometry measurements, starting from a cosmology catalogs - of the kind used as an input to DC2 image and obs-catalog simulations. - This is mainly useful for testing infrastructure in advance - of the DC2 catalogs being available, but might also be handy - for starting from a purer simulation. - """ - name = "TXCosmoDC2Mock" +class TXIngestExtraGalactic(PipelineStage): + name = "TXIngestExtraGalactic"; + inputs = [] parallel = False - inputs = [("response_model", HDFFile)] - outputs = [ ("shear_catalog", ShearCatalog), - ("photometry_catalog", HDFFile), + ("photometry_catalog", PhotometryCatalog), ] config_options = { - "cat_name": StageParameter(str, "cosmoDC2", msg="Name of the mock catalog to use."), - "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), - "snr_limit": StageParameter(float, 4.0, msg="S/N limit for object selection."), - "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), - "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), - "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), - "unit_response": StageParameter(bool, False, msg="Whether to use unit response in simulation."), - "cat_size": StageParameter(int, 0, msg="Pre-computed catalog size, if known."), - "flip_g2": StageParameter( - bool, True, msg="Whether to flip g2 sign to match conventions. TreeCorr and NaMaster use flip_g2=True" - ), - "apply_mag_cut": StageParameter(bool, False, msg="Apply magnitude cut for descqa comparison."), - "Mag_r_limit": StageParameter(float, -19, msg="Magnitude r limit for object selection."), - "metadetect": StageParameter( - bool, True, msg="Whether to make a metadetect-style catalog (True) or metacal (False)." - ), - "add_shape_noise": StageParameter(bool, True, msg="Whether to add shape noise to simulation."), - "healpixels": StageParameter(list, [-1], msg="List of HEALPix pixels to use."), + "delta_gamma": StageParameter(float, default=0.02, msg="Delta gamma value for metadetect response calculations"), + "year": StageParameter(int, default=1, msg="Number of years of LSST observations to simulate photometric noise for"), + "response_type": StageParameter(str, default="desy6", msg="Type of response to apply for metadetect"), + "shear_snr_cut": StageParameter(float, default=4.0, msg="SNR cut for shear, total over shear bands"), + "phot_snr_cut": StageParameter(float, default=5.0, msg="SNR cut to be in the photometry catalog, any band"), + "T_ratio_cut": StageParameter(float, default=0.5, msg="T/PSF_T cut for metadetect catalog"), + "random_seed": StageParameter(int, default=0, msg="Random seed"), + "bands": StageParameter(str, default="ugrizy", msg="Bands to ingest photometry for"), + "shear_bands": StageParameter(str, default="griz", msg="Bands to use for shear metadetect"), + } + + def run(self): + shear_filename = self.get_output("shear_catalog") + photo_file = self.open_output("photometry_catalog") + photo_group = photo_file.create_group("photometry") + + shear_snr_cut = self.config["shear_snr_cut"] + phot_snr_cut = self.config["phot_snr_cut"] + + rng = np.random.default_rng(self.config['random_seed']) + size = self.config.get("initial_size", 10_000_000) + + with ShearTableWriter(shear_filename, initial_size=size) as shear_writer, \ + TableWriter(photo_group, initial_size=size) as photo_writer: + for data in self.data_iterator(): + self.add_additional_columns(data) + add_lsst_like_noise(data, rng, year=self.config["year"]) + shear_data = make_metadetect_catalog(data, self.config["response_type"], self.config["delta_gamma"], self.config["shear_bands"], rng, snr_cut=shear_snr_cut) + photo_data = make_photo_cuts(data, self.config["bands"], phot_snr_cut) + shear_writer.write(shear_data) + photo_writer.write(photo_data) + + def add_additional_columns(self, data): + if "extendedness" not in data: + # Extra-galactic objects are all extended, no stars are included + data["extendedness"] = np.ones_like(data["ra"], dtype=np.int8) + + def data_iterator(self): + raise NotImplementedError("data_iterator must be implemented in subclass") + + + + +class TXIngestRomanRubin(TXIngestExtraGalactic): + """Ingest skysim simulation data for TXPipe processing. + """ + name = "TXIngestRomanRubin" + config_options = TXIngestExtraGalactic.config_options | { + "xgal_dir_name": StageParameter(str, default="/global/cfs/cdirs/lsst/shared/xgal/roman-rubin/roman_rubin_2023_v1.1.3", msg="Directory name for skysim xgal files"), + "file_pattern": StageParameter(str, default="roman_rubin_2023_*.hdf5", msg="Pattern for file name"), + } + + def data_iterator(self): + import h5py + bands = "ugrizy" + file_format = self.config["file_pattern"] + files = glob.glob(f"{self.config.xgal_dir_name}/{file_format}") + nfile = len(files) + for j, filename in enumerate(files): + print(f"Processing file {j+1}/{nfile}: {filename}") + with h5py.File(filename, "r") as f: + nkey = len(f.keys()) - 1 + + # The keys here are mostly healpix pixel indices. + for i, key in enumerate(f.keys()): + if key == "metaData": + continue + + group = f[key] + + if "ra" not in group.keys(): + print(f" No data - skipping healpix pixel {i+1}/{nkey}: {key} ") + continue + + print(f" Processing healpix pixel {i+1}/{nkey}: {key}") + data = self.extract_roman_rubin_truth_info(group, bands) + yield data + + def extract_roman_rubin_truth_info(self, group, bands): + params = [f"LSST_obs_{b}" for b in bands] + params += ["redshift", "ra", "dec", "galaxy_id", "shear1", "shear2", "diskHalfLightRadiusArcsec", "spheroidHalfLightRadiusArcsec", "bulge_frac"] + if "totalEllipticity1" in group: + params += ["totalEllipticity1", "totalEllipticity2"] + else: + params += ["spheroidEllipticity1", "spheroidEllipticity2", "diskEllipticity1", "diskEllipticity2"] + data = {p: group[p][:] for p in params} + output = {} + output["ra"] = data["ra"] + output["dec"] = data["dec"] + output["redshift_true"] = data["redshift"] + + output["id"] = data["galaxy_id"] + for b in bands: + output[f"mag_{b}"] = data[f'LSST_obs_{b}'] + + # Compute the overall (bulge + disc) half-light radius. + # This copies the GCRCatalogs approach, which + # I'm not sure is quite right. + hlr_disc = data["diskHalfLightRadiusArcsec"] + hlr_bulge = data["spheroidHalfLightRadiusArcsec"] + f = data["bulge_frac"] + hlr = (hlr_disc * (1 - f) + hlr_bulge * f) + + if "totalEllipticity1" in data: + e1 = data["totalEllipticity1"] + e2 = data["totalEllipticity2"] + else: + e1 = data["spheroidEllipticity1"] * data["bulge_frac"] + data["diskEllipticity1"] * (1 - data["bulge_frac"]) + e2 = data["spheroidEllipticity2"] * data["bulge_frac"] + data["diskEllipticity2"] * (1 - data["bulge_frac"]) + + g = data["shear1"] + 1j * data["shear2"] + e = e1 + 1j * e2 + # Apply shear to get observed ellipticity + denom = 1 + np.conj(g) * e + e_obs = (e + g) / denom + output["g1"] = np.real(e_obs) + output["g2"] = np.imag(e_obs) + output["true_g1"] = data["shear1"] + output["true_g2"] = data["shear2"] + + + # Convert half-light radius to T + output["T"] = half_light_radius_to_trace(hlr) + return output + + + +class TXIngestSkySim(TXIngestExtraGalactic): + """Ingest skysim simulation data for TXPipe processing. + """ + name = "TXIngestSkySim" + config_options = TXIngestExtraGalactic.config_options | { + "cat_name": StageParameter(str, default="skysim5000_v1.2", msg="Name of the GCR catalog to use"), + "extra_cols": StageParameter(str, default="", msg="Extra columns to ingest from the catalog"), } - def data_iterator(self, gc): + def data_iterator(self): + import GCRCatalogs + gc = GCRCatalogs.load_catalog(self.config["cat_name"]) # Columns we need from the cosmo simulation cols = [ + "ra", + "dec", "mag_true_u_lsst", "mag_true_g_lsst", "mag_true_r_lsst", "mag_true_i_lsst", "mag_true_z_lsst", "mag_true_y_lsst", - "ra", - "dec", "ellipticity_1_true", "ellipticity_2_true", "shear_1", @@ -77,1094 +183,59 @@ def data_iterator(self, gc): if nfile: j = i + 1 print(f"Loading chunk {j}/{nfile}") - yield data - - def load_catalog(self): - import GCRCatalogs - - cat_name = self.config["cat_name"] - self.bands = ("u", "g", "r", "i", "z", "y") - - print(f"Loading from catalog {cat_name}") - - kw = {} - if self.config["healpixels"] != [-1]: - kw = {"config_overwrite": {"healpix_pixels": self.config["healpixels"]}} - - gc = GCRCatalogs.load_catalog(cat_name, **kw) - - # GCR sometimes tries to read the entire catalog - # to measure its length rather than looking at metadata - # this can take a very long time. - # allow the user to say that already know it. - N = self.config["cat_size"] - if N == 0: - N = len(gc) - - return gc, N - - def run(self): - gc, N = self.load_catalog() - - print(f"Rank {self.rank} loaded: length = {N}.") - - target_size = min(N, self.config["max_size"]) - select_fraction = target_size / N - - if target_size != N: - print(f"Will select approx {100 * select_fraction:.2f}% of objects ({target_size})") - - # Prepare output files - metacal_file = self.open_output("shear_catalog", parallel=self.is_mpi()) - photo_file = self.open_output("photometry_catalog", parallel=self.is_mpi()) - photo_cols = self.setup_photometry_output(photo_file, target_size) - if self.config["metadetect"]: - metacal_cols = self.setup_metadetect_output(metacal_file, target_size) - else: - metacal_cols = self.setup_metacal_output(metacal_file, target_size) - - # Load the metacal response file - self.load_metacal_response_model() - - # Keep track of catalog position - start = 0 - count = 0 - - # Loop through chunks of - for data in self.data_iterator(gc): - # The initial chunk size, of all the input data. - # This will be reduced later as we remove objects - some_col = list(data.keys())[0] - chunk_size = len(data[some_col]) - print(f"Process {self.rank} read chunk {count} - {count + chunk_size} of {N}") - count += chunk_size - # Select a random fraction of the catalog if we are cutting down - # We can't just take the earliest galaxies because they are ordered - # by redshift - if target_size != N: - select = np.random.uniform(size=chunk_size) < select_fraction - nselect = select.sum() - print(f"Cutting down to {nselect}/{chunk_size} objects") - for name in list(data.keys()): - data[name] = data[name][select] - - # Simulate the various output data sets - mock_photometry = self.make_mock_photometry(data) - - # Cut out any objects too faint to be detected and measured. - # We have to do this after the photometry, so that we know if - # the object is detected, but we can do it before making the mock - # metacal info, saving us some time simulating un-needed objects - if self.config["snr_limit"] > 0: # otherwise there is no need to run this function which is slow - self.remove_undetected(data, mock_photometry) - - if self.config["apply_mag_cut"]: - self.apply_magnitude_cut(data) - - if self.config["metadetect"]: - mock_shear = self.make_mock_metadetect(data, mock_photometry) - else: - mock_shear = self.make_mock_metacal(data, mock_photometry) - - # The chunk size has now changed - some_col = list(mock_photometry.keys())[0] - chunk_size = len(mock_photometry[some_col]) - - # start is where this process should start writing this - # chunk of data. end is where the final process will finish - # writing, becoming the starting point for the whole next - # chunk over all the processes - start, end = self.next_output_indices(start, chunk_size) - - # Save all output - self.write_output( - start, - target_size, - photo_cols, - metacal_cols, - photo_file, - mock_photometry, - metacal_file, - mock_shear, - ) - - # The next iteration starts writing where the current one ends. - start = end - - if start >= target_size: - break - # Tidy up - - photo_file.close() - metacal_file.close() - - self.truncate_outputs(end) - - def truncate_outputs(self, n): - import h5py - - if self.comm is not None: - self.comm.Barrier() - - def visitor(name, node): - if isinstance(node, h5py.Dataset): - print(f"Resizing {name}") - node.resize((n,)) - - if self.rank == 0: - # all files should now be closed for all procs - print(f"Resizing all outupts to size {n}") - - with h5py.File(self.get_output("photometry_catalog"), "r+") as f: - f["photometry"].visititems(visitor) - - with h5py.File(self.get_output("shear_catalog"), "r+") as f: - f["shear"].visititems(visitor) - - def next_output_indices(self, start, chunk_size): - if self.comm is None: - end = start + chunk_size - else: - all_indices = self.comm.allgather(chunk_size) - starting_points = np.concatenate(([0], np.cumsum(all_indices))) - # use the old start to find the end point. - # the final starting point (not used below, since it is larger - # than the largest self.rank value) is the total data length - end = start + starting_points[-1] - start = start + starting_points[self.rank] - print(f"- Rank {self.rank} writing output to {start}-{start + chunk_size}") - return start, end - - def setup_photometry_output(self, photo_file, target_size): - # Get a list of all the column names - cols = ["ra", "dec", "extendedness"] - for band in self.bands: - cols.append(f"mag_{band}") - cols.append(f"mag_{band}_err") - cols.append(f"snr_{band}") - - for col in self.config["extra_cols"].split(): - cols.append(col) - - # Make group for all the photometry - group = photo_file.create_group("photometry") - group.attrs["bands"] = self.bands - - # Extensible columns becase we don't know the size yet. - # We will cut down the size at the end. - for col in cols: - group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") - - # The only non-float column for now - group.create_dataset("id", (target_size,), maxshape=(target_size,), dtype="i8") - - return cols + ["id"] - - def setup_metadetect_output(self, metacal_file, target_size): - # Get a list of all the column names - cols = metadetect_variants( - "g1", - "g2", - "T", - "s2n", - "T_err", - "ra", - "dec", - "psf_g1", - "psf_g2", - "mcal_psf_g1", - "mcal_psf_g2", - "mcal_psf_T_mean", - "weight", - ) + band_variants("riz", "mag", "mag_err", shear_catalog_type="metadetect") - - # Store the truth values only for the primary catalog - cols += ["00/true_g1", "00/true_g2", "00/redshift_true"] - - # Make group for all the photometry - group = metacal_file.create_group("shear") - group.attrs["bands"] = self.bands - - # Extensible columns becase we don't know the size yet. - # We will cut down the size at the end. - for col in cols: - group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") - - # Integer columns - int_cols = metadetect_variants("id", "flags") - for col in int_cols: - group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="i8") - - return cols + int_cols - - def setup_metacal_output(self, metacal_file, target_size): - # Get a list of all the column names - cols = ( - [ - "ra", - "dec", - "psf_g1", - "psf_g2", - "mcal_psf_g1", - "mcal_psf_g2", - "mcal_psf_T_mean", - ] - + metacal_variants("mcal_g1", "mcal_g2", "mcal_T", "mcal_s2n", "mcal_T_err") - + band_variants("riz", "mcal_mag", "mcal_mag_err", shear_catalog_type="metacal") - + ["weight"] - ) - - cols += ["true_g1", "true_g2", "redshift_true"] - - # Make group for all the photometry - group = metacal_file.create_group("shear") - group.attrs["bands"] = self.bands - - # Extensible columns becase we don't know the size yet. - # We will cut down the size at the end. - - for col in cols: - group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") - - group.create_dataset("id", (target_size,), maxshape=(target_size,), dtype="i8") - - for col in metacal_variants("mcal_flags"): - group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="i8") - - return cols + ["id", "mcal_flags"] - - def load_metacal_response_model(self): - """ - Load an HDF file containing the response model - R(log10(snr), size) - R_std(log10(snr), size) - - where R is the mean metacal response in a bin and - R_std is its standard deviation. - - So far only one of these files exists! - - """ - import scipy.interpolate - - if self.config["unit_response"]: - return - - model_file = self.open_input("response_model") - snr_centers = model_file["R_model/log10_snr"][:] - sz_centers = model_file["R_model/size"][:] - R_mean = model_file["R_model/R_mean"][:] - R_std = model_file["R_model/R_std"][:] - model_file.close() - - # Save a 2D spline - snr_grid, sz_grid = np.meshgrid(snr_centers, sz_centers) - self.R_spline = scipy.interpolate.SmoothBivariateSpline( - snr_grid.T.flatten(), - sz_grid.T.flatten(), - R_mean.flatten(), - w=R_std.flatten(), - ) - self.Rstd_spline = scipy.interpolate.SmoothBivariateSpline( - snr_grid.T.flatten(), sz_grid.T.flatten(), R_std.flatten() - ) - - def write_output( - self, - start, - target_size, - photo_cols, - metacal_cols, - photo_file, - photo_data, - metacal_file, - metacal_data, - ): - """ - Save the photometry we have just simulated to disc - - Parameters - ---------- - - photo_file: HDF File object - - metacal_file: HDF File object - - photo_data: dict - Dictionary of simulated photometry - - metacal_data: dict - Dictionary of simulated metacal data - - """ - # Work out the range of data to output (since we will be - # doing this in chunks). If we have cut down to a random - # subset of the catalog then we may have gone over the - # target length, depending on the random selection - n = len(photo_data["id"]) - end = min(start + n, target_size) - - # assert photo_data['id'].min()>0 - - # Save each column - for name in photo_cols: - photo_file[f"photometry/{name}"][start:end] = photo_data[name] - - for name in metacal_cols: - metacal_file[f"shear/{name}"][start:end] = metacal_data[name] - - def make_mock_photometry(self, data): - # The visit count affects the overall noise levels - n_visit = self.config["visits_per_band"] - # Do all the work in the function below - photo = make_mock_photometry(n_visit, self.bands, data, self.config["unit_response"]) - - for col in self.config["extra_cols"].split(): - photo[col] = data[col] - - return photo + yield self.process_data(data) - def make_mock_metadetect(self, data, photo): - """ - !!! This function is not working properly yet!!! - Generate a mock metadetect table. - This is a long and complicated function unfortunately. - Throughout we assume a single R = R11 = R22, with R12 = R21 = 0 + def process_data(self, data): + output = {} - Parameters - ---------- - data: dict - Dictionary of arrays read from the input cosmo catalog - photo: dict - Dictionary of arrays generated to simulate photometry - """ + # Basic info + output["ra"] = data["ra"] + output["dec"] = data["dec"] + output["redshift_true"] = data["redshift_true"] + output["id"] = data["galaxy_id"] - # These are the numbers from figure F1 of the DES Y1 shear catalog paper - # (this version is not yet public but is awaiting a second referee response) + # Magnitudes + for b in "ugrizy": + output[f"mag_{b}"] = data[f"mag_true_{b}_lsst"] - # Overall SNR for the three bands usually used for shape measurement - # We use the true SNR not the estimated one, though these are pretty close - snr = (photo["snr_r"] ** 2 + photo["snr_i"] ** 2 + photo["snr_z"] ** 2) ** 0.5 - snr_1p = (photo["snr_r_1p"] ** 2 + photo["snr_i_1p"] ** 2 + photo["snr_z_1p"] ** 2) ** 0.5 - snr_1m = (photo["snr_r_1m"] ** 2 + photo["snr_i_1m"] ** 2 + photo["snr_z_1m"] ** 2) ** 0.5 - snr_2p = (photo["snr_r_2p"] ** 2 + photo["snr_i_2p"] ** 2 + photo["snr_z_2p"] ** 2) ** 0.5 - snr_2m = (photo["snr_r_2m"] ** 2 + photo["snr_i_2m"] ** 2 + photo["snr_z_2m"] ** 2) ** 0.5 + # Shear and ellipticity info + g = data["shear_1"] + 1j * data["shear_2"] + e = data["ellipticity_1_true"] + 1j * data["ellipticity_2_true"] - if self.config["unit_response"]: - assert np.allclose(snr, snr_1p) - assert np.allclose(snr, snr_2m) + # Apply shear to get observed ellipticity + denom = 1 + np.conj(g) * e + e_obs = (e + g) / denom + output["g1"] = np.real(e_obs) + output["g2"] = np.imag(e_obs) + output["true_g1"] = data["shear_1"] + output["true_g2"] = data["shear_2"] - nobj = snr.size - - log10_snr = np.log10(snr) - - # Convert from the half-light radius which is in the - # input catalogs to a sigma. Do this by pretending - # that it is a Gaussian. This is clearly wrong, and - # if this causes major errors in the size cuts we may - # have to modify this size_hlr = data["size_true"] - size_sigma = size_hlr / np.sqrt(2 * np.log(2)) - size_T = 2 * size_sigma**2 - - # Use a fixed PSF across all the objects - psf_fwhm = 0.75 - psf_sigma = psf_fwhm / (2 * np.sqrt(2 * np.log(2))) - psf_T = 2 * psf_sigma**2 - - if self.config["unit_response"]: - R = 1.0 - R_size = 0.0 - - else: - # Use the response model to get a reasonable response - # value for this size and SNR - R_mean = self.R_spline(log10_snr, size_T, grid=False) - R_std = self.Rstd_spline(log10_snr, size_T, grid=False) - - # Assume a 0.2 correlation between the size response - # and the shear response. - rho = 0.2 - f = np.random.multivariate_normal([0.0, 0.0], [[1.0, rho], [rho, 1.0]], nobj).T - R, R_size = f * R_std + R_mean - - # Convert magnitudes to fluxes according to the baseline - # use in the metacal numbers - flux_r = 10**0.4 * (27 - photo["mag_r"]) - flux_i = 10**0.4 * (27 - photo["mag_i"]) - flux_z = 10**0.4 * (27 - photo["mag_z"]) - - # Note that this is delta_gamma not 2*delta_gamma, because - # of how we use it below - delta_gamma = 0.01 - - # Use a fixed shape noise per component to generate - # an overall - if self.config["add_shape_noise"]: - shape_noise = 0.26 - else: - shape_noise = 0.0 - - eps = np.random.normal(0, shape_noise, nobj) + 1.0j * np.random.normal(0, shape_noise, nobj) - # True shears without shape noise - g1 = data["shear_1"] - g2 = data["shear_2"] - - if self.config["flip_g2"]: - g2 *= -1 - - # Do the full combination of (g,epsilon) -> e, not the approx one - g = g1 + 1j * g2 - e = (eps + g) / (1 + g.conj() * eps) - e1 = e.real - e2 = e.imag - - zero = np.zeros(nobj) - # Now collect together everything to go into the metacal - # file - output = { - # Basic values - "00/id": photo["id"], - "1p/id": photo["id"], - "1m/id": photo["id"], - "2p/id": photo["id"], - "2m/id": photo["id"], - "00/ra": photo["ra"], - "1p/ra": photo["ra"], - "1m/ra": photo["ra"], - "2p/ra": photo["ra"], - "2m/ra": photo["ra"], - "00/dec": photo["dec"], - "1p/dec": photo["dec"], - "1m/dec": photo["dec"], - "2p/dec": photo["dec"], - "2m/dec": photo["dec"], - # Keep the truth values for use in some testing paths - # We are not pretending to do meta - "00/true_g1": g1, - "00/true_g2": g2, - "00/redshift_true": photo["redshift_true"], - # g1 - "00/g1": e1 * R, - "1p/g1": (e1 + delta_gamma) * R, - "1m/g1": (e1 - delta_gamma) * R, - "2p/g1": e1 * R, - "2m/g1": e1 * R, - # g2 - "00/g2": e2 * R, - "1p/g2": e2 * R, - "1m/g2": e2 * R, - "2p/g2": (e2 + delta_gamma) * R, - "2m/g2": (e2 - delta_gamma) * R, - # T - "00/T": size_T, - "1p/T": size_T + R_size * delta_gamma, - "1m/T": size_T - R_size * delta_gamma, - "2p/T": size_T + R_size * delta_gamma, - "2m/T": size_T - R_size * delta_gamma, - # Terr - "00/T_err": zero, - "1p/T_err": zero, - "1m/T_err": zero, - "2p/T_err": zero, - "2m/T_err": zero, - # size - "00/s2n": snr, - "1p/s2n": snr_1p, - "1m/s2n": snr_1m, - "2p/s2n": snr_2p, - "2m/s2n": snr_2m, - # Magntiudes and fluxes, just copied from the inputs. - "00/mag_r": photo["mag_r"], - "00/mag_i": photo["mag_i"], - "00/mag_z": photo["mag_z"], - "00/mag_err_r": photo["mag_r_err"], - "00/mag_err_i": photo["mag_i_err"], - "00/mag_err_z": photo["mag_z_err"], - "1p/mag_r": photo["mag_r_1p"], - "2p/mag_r": photo["mag_r_2p"], - "1m/mag_r": photo["mag_r_1m"], - "2m/mag_r": photo["mag_r_2m"], - "1p/mag_i": photo["mag_i_1p"], - "2p/mag_i": photo["mag_i_2p"], - "1m/mag_i": photo["mag_i_1m"], - "2m/mag_i": photo["mag_i_2m"], - "1p/mag_z": photo["mag_z_1p"], - "2p/mag_z": photo["mag_z_2p"], - "1m/mag_z": photo["mag_z_1m"], - "2m/mag_z": photo["mag_z_2m"], - "1p/mag_err_r": photo["mag_r_err"], - "2p/mag_err_r": photo["mag_r_err"], - "1m/mag_err_r": photo["mag_r_err"], - "2m/mag_err_r": photo["mag_r_err"], - "1p/mag_err_i": photo["mag_i_err"], - "2p/mag_err_i": photo["mag_i_err"], - "1m/mag_err_i": photo["mag_i_err"], - "2m/mag_err_i": photo["mag_i_err"], - "1p/mag_err_z": photo["mag_z_err"], - "2p/mag_err_z": photo["mag_z_err"], - "1m/mag_err_z": photo["mag_z_err"], - "2m/mag_err_z": photo["mag_z_err"], - # Fixed PSF parameters - all round with same size - "00/mcal_psf_g1": zero, - "1p/mcal_psf_g1": zero, - "1m/mcal_psf_g1": zero, - "2p/mcal_psf_g1": zero, - "2m/mcal_psf_g1": zero, - "00/mcal_psf_g2": zero, - "1p/mcal_psf_g2": zero, - "1m/mcal_psf_g2": zero, - "2p/mcal_psf_g2": zero, - "2m/mcal_psf_g2": zero, - "00/psf_g1": zero, - "1p/psf_g1": zero, - "1m/psf_g1": zero, - "2p/psf_g1": zero, - "2m/psf_g1": zero, - "00/psf_g2": zero, - "1p/psf_g2": zero, - "1m/psf_g2": zero, - "2p/psf_g2": zero, - "2m/psf_g2": zero, - "00/mcal_psf_T_mean": np.repeat(psf_T, nobj), - "1p/mcal_psf_T_mean": np.repeat(psf_T, nobj), - "1m/mcal_psf_T_mean": np.repeat(psf_T, nobj), - "2p/mcal_psf_T_mean": np.repeat(psf_T, nobj), - "2m/mcal_psf_T_mean": np.repeat(psf_T, nobj), - # Everything that gets this far should be used, so flag=0 - "00/flags": np.zeros(nobj, dtype=np.int32), - "1p/flags": np.zeros(nobj, dtype=np.int32), - "1m/flags": np.zeros(nobj, dtype=np.int32), - "2p/flags": np.zeros(nobj, dtype=np.int32), - "2m/flags": np.zeros(nobj, dtype=np.int32), - # we use weights of one for everything for metacal - # if that ever changes we may also need to add - # weight_1p, etc. - "00/weight": np.ones(nobj), - "1p/weight": np.ones(nobj), - "1m/weight": np.ones(nobj), - "2p/weight": np.ones(nobj), - "2m/weight": np.ones(nobj), - } + output["T"] = half_light_radius_to_trace(size_hlr) return output - def make_mock_metacal(self, data, photo): - """ - I copied the old version here to use if we do not want metadetect. - Generate a mock metacal table. - This is a long and complicated function unfortunately. - Throughout we assume a single R = R11 = R22, with R12 = R21 = 0 - - Parameters - ---------- - data: dict - Dictionary of arrays read from the input cosmo catalog - photo: dict - Dictionary of arrays generated to simulate photometry - """ - - # These are the numbers from figure F1 of the DES Y1 shear catalog paper - # (this version is not yet public but is awaiting a second referee response) - - # Overall SNR for the three bands usually used for shape measurement - # We use the true SNR not the estimated one, though these are pretty close - snr = (photo["snr_r"] ** 2 + photo["snr_i"] ** 2 + photo["snr_z"] ** 2) ** 0.5 - snr_1p = (photo["snr_r_1p"] ** 2 + photo["snr_i_1p"] ** 2 + photo["snr_z_1p"] ** 2) ** 0.5 - snr_1m = (photo["snr_r_1m"] ** 2 + photo["snr_i_1m"] ** 2 + photo["snr_z_1m"] ** 2) ** 0.5 - snr_2p = (photo["snr_r_2p"] ** 2 + photo["snr_i_2p"] ** 2 + photo["snr_z_2p"] ** 2) ** 0.5 - snr_2m = (photo["snr_r_2m"] ** 2 + photo["snr_i_2m"] ** 2 + photo["snr_z_2m"] ** 2) ** 0.5 - - if self.config["unit_response"]: - assert np.allclose(snr, snr_1p) - assert np.allclose(snr, snr_2m) - - nobj = snr.size - - log10_snr = np.log10(snr) - - # Convert from the half-light radius which is in the - # input catalogs to a sigma. Do this by pretending - # that it is a Gaussian. This is clearly wrong, and - # if this causes major errors in the size cuts we may - # have to modify this - size_hlr = data["size_true"] - size_sigma = size_hlr / np.sqrt(2 * np.log(2)) - size_T = 2 * size_sigma**2 - - # Use a fixed PSF across all the objects - psf_fwhm = 0.75 - psf_sigma = psf_fwhm / (2 * np.sqrt(2 * np.log(2))) - psf_T = 2 * psf_sigma**2 - - if self.config["unit_response"]: - R = 1.0 - R_size = 0.0 - - else: - # Use the response model to get a reasonable response - # value for this size and SNR - R_mean = self.R_spline(log10_snr, size_T, grid=False) - R_std = self.Rstd_spline(log10_snr, size_T, grid=False) - - # Assume a 0.2 correlation between the size response - # and the shear response. - rho = 0.2 - f = np.random.multivariate_normal([0.0, 0.0], [[1.0, rho], [rho, 1.0]], nobj).T - R, R_size = f * R_std + R_mean - # Convert magnitudes to fluxes according to the baseline - # use in the metacal numbers - flux_r = 10**0.4 * (27 - photo["mag_r"]) - flux_i = 10**0.4 * (27 - photo["mag_i"]) - flux_z = 10**0.4 * (27 - photo["mag_z"]) - - # Note that this is delta_gamma not 2*delta_gamma, because - # of how we use it below - delta_gamma = 0.01 - - # Use a fixed shape noise per component to generate - # an overall - if self.config["add_shape_noise"]: - shape_noise = 0.26 - else: - shape_noise = 0.0 - - eps = np.random.normal(0, shape_noise, nobj) + 1.0j * np.random.normal(0, shape_noise, nobj) - # True shears without shape noise - g1 = data["shear_1"] - g2 = data["shear_2"] - - if self.config["flip_g2"]: - g2 *= -1 - - # Do the full combination of (g,epsilon) -> e, not the approx one - g = g1 + 1j * g2 - e = (eps + g) / (1 + g.conj() * eps) - e1 = e.real - e2 = e.imag - - zero = np.zeros(nobj) - # Now collect together everything to go into the metacal - # file - output = { - # Basic values - "id": photo["id"], - "ra": photo["ra"], - "dec": photo["dec"], - # Keep the truth value just in case - "true_g1": g1, - "true_g2": g2, - # add true redshift since it is used in source selector - "redshift_true": photo["redshift_true"], - # g1 - "mcal_g1": e1 * R, - "mcal_g1_1p": (e1 + delta_gamma) * R, - "mcal_g1_1m": (e1 - delta_gamma) * R, - "mcal_g1_2p": e1 * R, - "mcal_g1_2m": e1 * R, - # g2 - "mcal_g2": e2 * R, - "mcal_g2_1p": e2 * R, - "mcal_g2_1m": e2 * R, - "mcal_g2_2p": (e2 + delta_gamma) * R, - "mcal_g2_2m": (e2 - delta_gamma) * R, - # T - "mcal_T": size_T, - "mcal_T_1p": size_T + R_size * delta_gamma, - "mcal_T_1m": size_T - R_size * delta_gamma, - "mcal_T_2p": size_T + R_size * delta_gamma, - "mcal_T_2m": size_T - R_size * delta_gamma, - # Terr - "mcal_T_err": zero, - "mcal_T_err_1p": zero, - "mcal_T_err_1m": zero, - "mcal_T_err_2p": zero, - "mcal_T_err_2m": zero, - # size - "mcal_s2n": snr, - "mcal_s2n_1p": snr_1p, - "mcal_s2n_1m": snr_1m, - "mcal_s2n_2p": snr_2p, - "mcal_s2n_2m": snr_2m, - # Magntiudes and fluxes, just copied from the inputs. - "mcal_mag_r": photo["mag_r"], - "mcal_mag_i": photo["mag_i"], - "mcal_mag_z": photo["mag_z"], - "mcal_mag_err_r": photo["mag_r_err"], - "mcal_mag_err_i": photo["mag_i_err"], - "mcal_mag_err_z": photo["mag_z_err"], - "mcal_mag_r_1p": photo["mag_r_1p"], - "mcal_mag_r_2p": photo["mag_r_2p"], - "mcal_mag_r_1m": photo["mag_r_1m"], - "mcal_mag_r_2m": photo["mag_r_2m"], - "mcal_mag_i_1p": photo["mag_i_1p"], - "mcal_mag_i_2p": photo["mag_i_2p"], - "mcal_mag_i_1m": photo["mag_i_1m"], - "mcal_mag_i_2m": photo["mag_i_2m"], - "mcal_mag_z_1p": photo["mag_z_1p"], - "mcal_mag_z_2p": photo["mag_z_2p"], - "mcal_mag_z_1m": photo["mag_z_1m"], - "mcal_mag_z_2m": photo["mag_z_2m"], - "mcal_mag_err_r_1p": photo["mag_r_err"], - "mcal_mag_err_r_2p": photo["mag_r_err"], - "mcal_mag_err_r_1m": photo["mag_r_err"], - "mcal_mag_err_r_2m": photo["mag_r_err"], - "mcal_mag_err_i_1p": photo["mag_i_err"], - "mcal_mag_err_i_2p": photo["mag_i_err"], - "mcal_mag_err_i_1m": photo["mag_i_err"], - "mcal_mag_err_i_2m": photo["mag_i_err"], - "mcal_mag_err_z_1p": photo["mag_z_err"], - "mcal_mag_err_z_2p": photo["mag_z_err"], - "mcal_mag_err_z_1m": photo["mag_z_err"], - "mcal_mag_err_z_2m": photo["mag_z_err"], - # Fixed PSF parameters - all round with same size - "mcal_psf_g1": zero, - "mcal_psf_g2": zero, - "psf_g1": zero, - "psf_g2": zero, - "mcal_psf_T_mean": np.repeat(psf_T, nobj), - # Everything that gets this far should be used, so flag=0 - "mcal_flags": np.zeros(nobj, dtype=np.int32), - "mcal_flags_1p": np.zeros(nobj, dtype=np.int32), - "mcal_flags_1m": np.zeros(nobj, dtype=np.int32), - "mcal_flags_2p": np.zeros(nobj, dtype=np.int32), - "mcal_flags_2m": np.zeros(nobj, dtype=np.int32), - # we use weights of one for everything for metacal - # if that ever changes we may also need to add - # weight_1p, etc. - "weight": np.ones(nobj), - } - - return output - - def apply_magnitude_cut(self, data): - """ - Allow for a cut in absolute magnitude. - """ - mag_limit = self.config["Mag_r_limit"] - sel = data["Mag_true_r_sdss_z0"] < mag_limit - - ndet = sel.sum() - ntot = sel.size - fract = ndet * 100.0 / ntot - print(f"{ndet} objects pass magnitude cut out of {ntot} objects ({fract:.1f}%)") - - # Remove all objects not selected - for key in list(data.keys()): - data[key] = data[key][sel] - - def remove_undetected(self, data, photo): - """ - Strip out any undetected objects from the two - simulated data sets. - - Use a configuration parameter snr_limit to decide - on the detection limit. - """ - snr_limit = self.config["snr_limit"] - - # This will become a boolean array in a minute when - # we OR it with an array - detected = False - - # Check if detected in any band. Makes a boolean array - # Even though we started with just a single False. - for band in self.bands: - detected_in_band = photo[f"snr_{band}"] > snr_limit - not_detected_in_band = ~detected_in_band - # Set objects not detected in one band that are detected in another - # to inf magnitude in that band, and the SNR to zero. - # We have to do this for each of the variants also, because otherwise - # we end up with wildly different final SNR values later. - # This is the metadetection issue really! - for v in metacal_variants(f"snr_{band}"): - photo[v][not_detected_in_band] = 0.0 - for v in metacal_variants(f"mag_{band}"): - photo[v][not_detected_in_band] = np.inf - - # Record that we have detected this object at all - detected |= detected_in_band - - # the protoDC2 sims have an edge with zero shear. - # Remove it. - zero_shear_edge = (abs(data["shear_1"]) == 0) & (abs(data["shear_2"]) == 0) - print("Removing {} objects with identically zero shear in both terms".format(zero_shear_edge.sum())) - - detected &= ~zero_shear_edge - - # Print out interesting information - ndet = detected.sum() - ntot = detected.size - fract = ndet * 100.0 / ntot - print(f"Detected {ndet} out of {ntot} objects ({fract:.1f}%)") - - # Remove all objects not detected in *any* band - # make a copy of the keys with list(photo.keys()) so we are not - # modifying during the iteration - for key in list(photo.keys()): - photo[key] = photo[key][detected] - - for key in list(data.keys()): - data[key] = data[key][detected] - - -class TXBuzzardMock(TXCosmoDC2Mock): +class TXIngestCosmoDC2(TXIngestSkySim): + """Ingest CosmoDC2 data for TXPipe processing. """ - Simulate mock photometry from Buzzard. - - May be obsolete. - """ - - name = "TXBuzzardMock" - parallel = False - inputs = [("response_model", HDFFile)] - - outputs = [ - ("shear_catalog", ShearCatalog), - ("photometry_catalog", HDFFile), - ] - - config_options = { - "cat_name": StageParameter(str, "buzzard", msg="Name of the mock catalog to use."), - "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), - "snr_limit": StageParameter(float, 4.0, msg="S/N limit for object selection."), - "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), - "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), - "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), - "unit_response": StageParameter(bool, False, msg="Whether to use unit response in simulation."), - "flip_g2": StageParameter(bool, True, msg="Whether to flip g2 sign to match conventions."), + name = "TXIngestCosmoDC2" + config_options = TXIngestSkySim.config_options | { + "cat_name": StageParameter(str, default="cosmoDC2", msg="Name of the GCR catalog to use"), } -class TXGaussianSimsMock(TXCosmoDC2Mock): +class TXIngestBuzzard(TXIngestSkySim): + """Ingest Buzzard data for TXPipe processing. """ - Simulate mock photometry from gaussian simulations - - This stage simulates metacal data and metacalibrated - photometry measurements, starting from simple Gaussian simulations - produced starting from CCL power spectra and poission sampling galaxies - from it. - - This is mainly useful for testing infrastructure - starting from a purer simulation. - """ - - name = "TXGaussianSimsMock" - parallel = False - inputs = [("response_model", HDFFile)] - - outputs = [ - ("shear_catalog", ShearCatalog), - ("photometry_catalog", HDFFile), - ] - - config_options = { - "cat_name": StageParameter(str, "GaussianSims", msg="Name of the Gaussian simulation catalog."), - "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), - "snr_limit": StageParameter(float, 0.0, msg="S/N limit for object selection (0 for all)."), - "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), - "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), - "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), - "unit_response": StageParameter(bool, True, msg="Whether to use unit response in simulation."), - "cat_size": StageParameter(int, 0, msg="Catalog size (0 for all)."), - "flip_g2": StageParameter(bool, False, msg="Whether to flip g2 sign to match conventions."), - "apply_mag_cut": StageParameter(bool, False, msg="Apply magnitude cut for descqa comparison."), - "metadetect": StageParameter( - bool, True, msg="Whether to make a metadetect-style catalog (True) or metacal (False)." - ), - "add_shape_noise": StageParameter(bool, False, msg="Whether to add shape noise to simulation."), + name = "TXIngestBuzzard" + config_options = TXIngestSkySim.config_options | { + "cat_name": StageParameter(str, default="buzzard", msg="Name of the GCR catalog to use"), } - def data_iterator(self, cat): - # all cols we need - ( - ra, - dec, - g1, - g2, - z, - m_u, - m_g, - m_r, - m_i, - m_z, - m_y, - etrue1, - etrue2, - size, - galaxy_id, - ) = cat - - # figuring out number of chunks - chunk_size = 1_000_000 - nchunk = len(ra) // chunk_size - if nchunk * chunk_size < len(ra): - nchunk += 1 - - # main loop - for i in range(nchunk): - # start and end index of this chunk in full data - s = chunk_size * i - e = s + chunk_size - - # make dict just for this chunk of data - data_chunk = {} - data_chunk["ra"] = ra[s:e] - data_chunk["dec"] = dec[s:e] - data_chunk["shear_1"] = g1[s:e] - data_chunk["shear_2"] = g2[s:e] - data_chunk["size_true"] = size[s:e] - data_chunk["galaxy_id"] = galaxy_id[s:e] - data_chunk["redshift_true"] = z[s:e] - data_chunk["ellipticity_1_true"] = etrue1[s:e] - data_chunk["ellipticity_2_true"] = etrue2[s:e] - data_chunk["mag_true_u_lsst"] = m_u[s:e] - data_chunk["mag_true_g_lsst"] = m_g[s:e] - data_chunk["mag_true_r_lsst"] = m_r[s:e] - data_chunk["mag_true_i_lsst"] = m_i[s:e] - data_chunk["mag_true_z_lsst"] = m_z[s:e] - data_chunk["mag_true_y_lsst"] = m_y[s:e] - - # send this chunk of data back to caller - yield data_chunk - def load_catalog(self): - """ - This method loads the Gaussian sims produced in this notebook - https://github.com/LSSTDESC/txpipe-cosmodc2/blob/master/notebooks/MaskAreaGaussianSims_and_FakeTXPipeInput.ipynb - """ - cat_name = self.config["cat_name"] - self.bands = ("u", "g", "r", "i", "z", "y") - print(f"Loading from catalog {cat_name}") - - cat = np.load(cat_name, allow_pickle=True) - - N = len(cat[0]) - - return cat, N - - -def make_mock_photometry(n_visit, bands, data, unit_response): - """ - Generate a mock photometric table with noise added - - This is mostly from LSE‐40 by - Zeljko Ivezic, Lynne Jones, and Robert Lupton - retrieved here: - http://faculty.washington.edu/ivezic/Teaching/Astr511/LSST_SNRdoc.pdf - """ - - output = {} - nobj = data["galaxy_id"].size - output["ra"] = data["ra"] - output["dec"] = data["dec"] - output["id"] = data["galaxy_id"] - output["extendedness"] = np.ones(nobj) - - # Sky background, seeing FWHM, and system throughput, - # all from table 2 of Ivezic, Jones, & Lupton - B_b = np.array([85.07, 467.9, 1085.2, 1800.3, 2775.7, 3614.3]) - fwhm = np.array([0.77, 0.73, 0.70, 0.67, 0.65, 0.63]) - T_b = np.array([0.0379, 0.1493, 0.1386, 0.1198, 0.0838, 0.0413]) - - # effective pixels size for a Gaussian PSF, from equation - # 27 of Ivezic, Jones, & Lupton - pixel = 0.2 # arcsec - N_eff = 2.436 * (fwhm / pixel) ** 2 - - # other numbers from Ivezic, Jones, & Lupton - sigma_inst2 = 10.0**2 # instrumental noise in photons per pixel, just below eq 42 - gain = 1 # ADU units per photon, also just below eq 42 - D = 6.5 # primary mirror diameter in meters, from LSST key numbers page (effective clear diameter) - # Not sure what effective clear diameter means but it's closer to the number used in the paper - time = 30.0 # seconds per exposure, from LSST key numbers page - sigma_b2 = 0.0 # error on background, just above eq 42 - - # combination of these used below, from various equations - factor = 5455.0 / gain * (D / 6.5) ** 2 * (time / 30.0) - - # Fake some metacal responses - if unit_response: - mag_responses = [0.0 for i in bands] - else: - mag_responses = generate_mock_metacal_mag_responses(bands, nobj) - - delta_gamma = 0.01 # this is the half-delta gamma, i.e. gamma_+ - gamma_0 - # that's the right thing to use here because we are doing m+ = m0 + dm/dy*dy - # Use the same response for gamma1 and gamma2 - - for band, b_b, t_b, n_eff, mag_resp in zip(bands, B_b, T_b, N_eff, mag_responses): - # truth magnitude - mag = data[f"mag_true_{band}_lsst"] - output[f"mag_true_{band}_lsst"] = mag - - # expected signal photons, over all visits - c_b = factor * 10 ** (0.4 * (25 - mag)) * t_b * n_visit - - # expected background photons, over all visits - background = np.sqrt((b_b + sigma_inst2 + sigma_b2) * n_eff * n_visit) - # total expected photons - mu = c_b + background - - # Observed number of photons in excess of the expected background. - # This can go negative for faint magnitudes, indicating that the object is - # not going to be detected - n_obs = np.random.poisson(mu) - background - n_obs_err = np.sqrt(mu) - - # signal to noise, true and estimated values - true_snr = c_b / background - obs_snr = n_obs / background - - # observed magnitude from inverting c_b expression above - mag_obs = 25 - 2.5 * np.log10(n_obs / factor / t_b / n_visit) - - # converting error on n_obs to error on mag - mag_err = 2.5 / np.log(10.0) / obs_snr - - output[f"true_snr_{band}"] = true_snr - output[f"snr_{band}"] = obs_snr - output[f"mag_{band}"] = mag_obs - output[f"mag_err_{band}"] = mag_err - output[f"mag_{band}_err"] = mag_err - - m = mag_resp * delta_gamma - - m1 = mag_obs + m - m2 = mag_obs - m - mag_obs_1p = m1 - mag_obs_1m = m2 - - output[f"mag_{band}_1p"] = m1 - output[f"mag_{band}_1m"] = m2 - output[f"mag_{band}_2p"] = m1 - output[f"mag_{band}_2m"] = m2 - - # Scale the SNR values according the to change in magnitude.r - s = np.power(10.0, -0.4 * m) - - snr1 = obs_snr * s - snr2 = obs_snr / s - output[f"snr_{band}_1p"] = snr1 - output[f"snr_{band}_1m"] = snr2 - output[f"snr_{band}_2p"] = snr1 - output[f"snr_{band}_2m"] = snr2 - - return output - - -def generate_mock_metacal_mag_responses(bands, nobj): - nband = len(bands) - mu = np.zeros(nband) # seems approx mean of response across bands, from HSC tract - rho = 0.25 # approx correlation between response in bands, from HSC tract - sigma2 = 1.7**2 # approx variance of response, from HSC tract - covmat = np.full((nband, nband), rho * sigma2) - np.fill_diagonal(covmat, sigma2) - mag_responses = np.random.multivariate_normal(mu, covmat, nobj).T - return mag_responses class TXSimpleMock(PipelineStage): @@ -1264,21 +335,3 @@ def run(self): q = qp.Ensemble(qp.interp, data=dict(xvals=zgrid, yvals=pdfs)) q.set_ancil(dict(zmode=redshifts, zmean=redshifts, zmedian=redshifts)) q.write_to(self.get_output("photoz_pdfs")) - - -def test(): - import pylab - - data = { - "ra": None, - "dec": None, - "galaxy_id": None, - } - bands = "ugrizy" - n_visit = 165 - M5 = [24.22, 25.17, 24.74, 24.38, 23.80] - for b, m5 in zip(bands, M5): - data[f"mag_true_{b}_lsst"] = np.repeat(m5, 10000) - results = make_mock_photometry(n_visit, bands, data, True) - pylab.hist(results["snr_r"], bins=50, histtype="step") - pylab.savefig("snr_r.png") diff --git a/txpipe/ingest/old_mocks.py b/txpipe/ingest/old_mocks.py new file mode 100755 index 000000000..3171d5526 --- /dev/null +++ b/txpipe/ingest/old_mocks.py @@ -0,0 +1,1266 @@ +from ..base_stage import PipelineStage +from ..data_types import ShearCatalog, HDFFile, TextFile, QPPDFFile +from ..utils import band_variants, metacal_variants, metadetect_variants, Timer +from ceci.config import StageParameter +import numpy as np + + +class TXCosmoDC2MockOld(PipelineStage): + """ + Simulate mock shear and photometry measurements from CosmoDC2 (or similar) + + This stage simulates metacal data and metacalibrated + photometry measurements, starting from a cosmology catalogs + of the kind used as an input to DC2 image and obs-catalog simulations. + + This is mainly useful for testing infrastructure in advance + of the DC2 catalogs being available, but might also be handy + for starting from a purer simulation. + """ + + name = "TXCosmoDC2MockOld" + parallel = False + inputs = [("response_model", HDFFile)] + + outputs = [ + ("shear_catalog", ShearCatalog), + ("photometry_catalog", HDFFile), + ] + + config_options = { + "cat_name": StageParameter(str, "cosmoDC2", msg="Name of the mock catalog to use."), + "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), + "snr_limit": StageParameter(float, 4.0, msg="S/N limit for object selection."), + "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), + "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), + "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), + "unit_response": StageParameter(bool, False, msg="Whether to use unit response in simulation."), + "cat_size": StageParameter(int, 0, msg="Pre-computed catalog size, if known."), + "flip_g2": StageParameter( + bool, True, msg="Whether to flip g2 sign to match conventions. TreeCorr and NaMaster use flip_g2=True" + ), + "apply_mag_cut": StageParameter(bool, False, msg="Apply magnitude cut for descqa comparison."), + "Mag_r_limit": StageParameter(float, -19, msg="Magnitude r limit for object selection."), + "metadetect": StageParameter( + bool, True, msg="Whether to make a metadetect-style catalog (True) or metacal (False)." + ), + "add_shape_noise": StageParameter(bool, True, msg="Whether to add shape noise to simulation."), + "healpixels": StageParameter(list, [-1], msg="List of HEALPix pixels to use."), + } + + def data_iterator(self, gc): + # Columns we need from the cosmo simulation + cols = [ + "mag_true_u_lsst", + "mag_true_g_lsst", + "mag_true_r_lsst", + "mag_true_i_lsst", + "mag_true_z_lsst", + "mag_true_y_lsst", + "ra", + "dec", + "ellipticity_1_true", + "ellipticity_2_true", + "shear_1", + "shear_2", + "size_true", + "galaxy_id", + "redshift_true", + ] + # Add any extra requestd columns + cols += self.config["extra_cols"].split() + + it = gc.get_quantities(cols, return_iterator=True) + nfile = len(gc._file_list) if hasattr(gc, "_file_list") else 0 + + for i, data in enumerate(it): + if nfile: + j = i + 1 + print(f"Loading chunk {j}/{nfile}") + yield data + + def load_catalog(self): + import GCRCatalogs + + cat_name = self.config["cat_name"] + self.bands = ("u", "g", "r", "i", "z", "y") + + print(f"Loading from catalog {cat_name}") + + kw = {} + if self.config["healpixels"] != [-1]: + kw = {"config_overwrite": {"healpix_pixels": self.config["healpixels"]}} + + gc = GCRCatalogs.load_catalog(cat_name, **kw) + + # GCR sometimes tries to read the entire catalog + # to measure its length rather than looking at metadata + # this can take a very long time. + # allow the user to say that already know it. + N = self.config["cat_size"] + if N == 0: + N = len(gc) + + return gc, N + + def run(self): + gc, N = self.load_catalog() + + print(f"Rank {self.rank} loaded: length = {N}.") + + target_size = min(N, self.config["max_size"]) + select_fraction = target_size / N + + if target_size != N: + print(f"Will select approx {100 * select_fraction:.2f}% of objects ({target_size})") + + # Prepare output files + metacal_file = self.open_output("shear_catalog", parallel=self.is_mpi()) + photo_file = self.open_output("photometry_catalog", parallel=self.is_mpi()) + photo_cols = self.setup_photometry_output(photo_file, target_size) + if self.config["metadetect"]: + metacal_cols = self.setup_metadetect_output(metacal_file, target_size) + else: + metacal_cols = self.setup_metacal_output(metacal_file, target_size) + + # Load the metacal response file + self.load_metacal_response_model() + + # Keep track of catalog position + start = 0 + count = 0 + + # Loop through chunks of + for data in self.data_iterator(gc): + # The initial chunk size, of all the input data. + # This will be reduced later as we remove objects + some_col = list(data.keys())[0] + chunk_size = len(data[some_col]) + print(f"Process {self.rank} read chunk {count} - {count + chunk_size} of {N}") + count += chunk_size + # Select a random fraction of the catalog if we are cutting down + # We can't just take the earliest galaxies because they are ordered + # by redshift + if target_size != N: + select = np.random.uniform(size=chunk_size) < select_fraction + nselect = select.sum() + print(f"Cutting down to {nselect}/{chunk_size} objects") + for name in list(data.keys()): + data[name] = data[name][select] + + # Simulate the various output data sets + mock_photometry = self.make_mock_photometry(data) + + # Cut out any objects too faint to be detected and measured. + # We have to do this after the photometry, so that we know if + # the object is detected, but we can do it before making the mock + # metacal info, saving us some time simulating un-needed objects + if self.config["snr_limit"] > 0: # otherwise there is no need to run this function which is slow + self.remove_undetected(data, mock_photometry) + + if self.config["apply_mag_cut"]: + self.apply_magnitude_cut(data) + + if self.config["metadetect"]: + mock_shear = self.make_mock_metadetect(data, mock_photometry) + else: + mock_shear = self.make_mock_metacal(data, mock_photometry) + + # The chunk size has now changed + some_col = list(mock_photometry.keys())[0] + chunk_size = len(mock_photometry[some_col]) + + # start is where this process should start writing this + # chunk of data. end is where the final process will finish + # writing, becoming the starting point for the whole next + # chunk over all the processes + start, end = self.next_output_indices(start, chunk_size) + + # Save all output + self.write_output( + start, + target_size, + photo_cols, + metacal_cols, + photo_file, + mock_photometry, + metacal_file, + mock_shear, + ) + + # The next iteration starts writing where the current one ends. + start = end + + if start >= target_size: + break + # Tidy up + + photo_file.close() + metacal_file.close() + + self.truncate_outputs(end) + + def truncate_outputs(self, n): + import h5py + + if self.comm is not None: + self.comm.Barrier() + + def visitor(name, node): + if isinstance(node, h5py.Dataset): + print(f"Resizing {name}") + node.resize((n,)) + + if self.rank == 0: + # all files should now be closed for all procs + print(f"Resizing all outupts to size {n}") + + with h5py.File(self.get_output("photometry_catalog"), "r+") as f: + f["photometry"].visititems(visitor) + + with h5py.File(self.get_output("shear_catalog"), "r+") as f: + f["shear"].visititems(visitor) + + def next_output_indices(self, start, chunk_size): + if self.comm is None: + end = start + chunk_size + else: + all_indices = self.comm.allgather(chunk_size) + starting_points = np.concatenate(([0], np.cumsum(all_indices))) + # use the old start to find the end point. + # the final starting point (not used below, since it is larger + # than the largest self.rank value) is the total data length + end = start + starting_points[-1] + start = start + starting_points[self.rank] + print(f"- Rank {self.rank} writing output to {start}-{start + chunk_size}") + return start, end + + def setup_photometry_output(self, photo_file, target_size): + # Get a list of all the column names + cols = ["ra", "dec", "extendedness"] + for band in self.bands: + cols.append(f"mag_{band}") + cols.append(f"mag_{band}_err") + cols.append(f"snr_{band}") + + for col in self.config["extra_cols"].split(): + cols.append(col) + + # Make group for all the photometry + group = photo_file.create_group("photometry") + group.attrs["bands"] = self.bands + + # Extensible columns becase we don't know the size yet. + # We will cut down the size at the end. + for col in cols: + group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") + + # The only non-float column for now + group.create_dataset("id", (target_size,), maxshape=(target_size,), dtype="i8") + + return cols + ["id"] + + def setup_metadetect_output(self, metacal_file, target_size): + # Get a list of all the column names + cols = metadetect_variants( + "g1", + "g2", + "T", + "s2n", + "T_err", + "ra", + "dec", + "psf_g1", + "psf_g2", + "mcal_psf_g1", + "mcal_psf_g2", + "mcal_psf_T_mean", + "weight", + ) + band_variants("riz", "mag", "mag_err", shear_catalog_type="metadetect") + + # Store the truth values only for the primary catalog + cols += ["00/true_g1", "00/true_g2", "00/redshift_true"] + + # Make group for all the photometry + group = metacal_file.create_group("shear") + group.attrs["bands"] = self.bands + + # Extensible columns becase we don't know the size yet. + # We will cut down the size at the end. + for col in cols: + group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") + + # Integer columns + int_cols = metadetect_variants("id", "flags") + for col in int_cols: + group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="i8") + + return cols + int_cols + + def setup_metacal_output(self, metacal_file, target_size): + # Get a list of all the column names + cols = ( + [ + "ra", + "dec", + "psf_g1", + "psf_g2", + "mcal_psf_g1", + "mcal_psf_g2", + "mcal_psf_T_mean", + ] + + metacal_variants("mcal_g1", "mcal_g2", "mcal_T", "mcal_s2n", "mcal_T_err") + + band_variants("riz", "mcal_mag", "mcal_mag_err", shear_catalog_type="metacal") + + ["weight"] + ) + + cols += ["true_g1", "true_g2", "redshift_true"] + + # Make group for all the photometry + group = metacal_file.create_group("shear") + group.attrs["bands"] = self.bands + + # Extensible columns becase we don't know the size yet. + # We will cut down the size at the end. + + for col in cols: + group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="f8") + + group.create_dataset("id", (target_size,), maxshape=(target_size,), dtype="i8") + + for col in metacal_variants("mcal_flags"): + group.create_dataset(col, (target_size,), maxshape=(target_size,), dtype="i8") + + return cols + ["id", "mcal_flags"] + + def load_metacal_response_model(self): + """ + Load an HDF file containing the response model + R(log10(snr), size) + R_std(log10(snr), size) + + where R is the mean metacal response in a bin and + R_std is its standard deviation. + + So far only one of these files exists! + + """ + import scipy.interpolate + + if self.config["unit_response"]: + return + + model_file = self.open_input("response_model") + snr_centers = model_file["R_model/log10_snr"][:] + sz_centers = model_file["R_model/size"][:] + R_mean = model_file["R_model/R_mean"][:] + R_std = model_file["R_model/R_std"][:] + model_file.close() + + # Save a 2D spline + snr_grid, sz_grid = np.meshgrid(snr_centers, sz_centers) + self.R_spline = scipy.interpolate.SmoothBivariateSpline( + snr_grid.T.flatten(), + sz_grid.T.flatten(), + R_mean.flatten(), + w=R_std.flatten(), + ) + self.Rstd_spline = scipy.interpolate.SmoothBivariateSpline( + snr_grid.T.flatten(), sz_grid.T.flatten(), R_std.flatten() + ) + + def write_output( + self, + start, + target_size, + photo_cols, + metacal_cols, + photo_file, + photo_data, + metacal_file, + metacal_data, + ): + """ + Save the photometry we have just simulated to disc + + Parameters + ---------- + + photo_file: HDF File object + + metacal_file: HDF File object + + photo_data: dict + Dictionary of simulated photometry + + metacal_data: dict + Dictionary of simulated metacal data + + """ + # Work out the range of data to output (since we will be + # doing this in chunks). If we have cut down to a random + # subset of the catalog then we may have gone over the + # target length, depending on the random selection + n = len(photo_data["id"]) + end = min(start + n, target_size) + + # assert photo_data['id'].min()>0 + + # Save each column + for name in photo_cols: + photo_file[f"photometry/{name}"][start:end] = photo_data[name] + + for name in metacal_cols: + metacal_file[f"shear/{name}"][start:end] = metacal_data[name] + + def make_mock_photometry(self, data): + # The visit count affects the overall noise levels + n_visit = self.config["visits_per_band"] + # Do all the work in the function below + photo = make_mock_photometry(n_visit, self.bands, data, self.config["unit_response"]) + + for col in self.config["extra_cols"].split(): + photo[col] = data[col] + + return photo + + def make_mock_metadetect(self, data, photo): + """ + !!! This function is not working properly yet!!! + Generate a mock metadetect table. + This is a long and complicated function unfortunately. + + Throughout we assume a single R = R11 = R22, with R12 = R21 = 0 + + Parameters + ---------- + data: dict + Dictionary of arrays read from the input cosmo catalog + photo: dict + Dictionary of arrays generated to simulate photometry + """ + + # These are the numbers from figure F1 of the DES Y1 shear catalog paper + # (this version is not yet public but is awaiting a second referee response) + + # Overall SNR for the three bands usually used for shape measurement + # We use the true SNR not the estimated one, though these are pretty close + snr = (photo["snr_r"] ** 2 + photo["snr_i"] ** 2 + photo["snr_z"] ** 2) ** 0.5 + snr_1p = (photo["snr_r_1p"] ** 2 + photo["snr_i_1p"] ** 2 + photo["snr_z_1p"] ** 2) ** 0.5 + snr_1m = (photo["snr_r_1m"] ** 2 + photo["snr_i_1m"] ** 2 + photo["snr_z_1m"] ** 2) ** 0.5 + snr_2p = (photo["snr_r_2p"] ** 2 + photo["snr_i_2p"] ** 2 + photo["snr_z_2p"] ** 2) ** 0.5 + snr_2m = (photo["snr_r_2m"] ** 2 + photo["snr_i_2m"] ** 2 + photo["snr_z_2m"] ** 2) ** 0.5 + + if self.config["unit_response"]: + assert np.allclose(snr, snr_1p) + assert np.allclose(snr, snr_2m) + + nobj = snr.size + + log10_snr = np.log10(snr) + + # Convert from the half-light radius which is in the + # input catalogs to a sigma. Do this by pretending + # that it is a Gaussian. This is clearly wrong, and + # if this causes major errors in the size cuts we may + # have to modify this + size_hlr = data["size_true"] + size_sigma = size_hlr / np.sqrt(2 * np.log(2)) + size_T = 2 * size_sigma**2 + + # Use a fixed PSF across all the objects + psf_fwhm = 0.75 + psf_sigma = psf_fwhm / (2 * np.sqrt(2 * np.log(2))) + psf_T = 2 * psf_sigma**2 + + if self.config["unit_response"]: + R = 1.0 + R_size = 0.0 + + else: + # Use the response model to get a reasonable response + # value for this size and SNR + R_mean = self.R_spline(log10_snr, size_T, grid=False) + R_std = self.Rstd_spline(log10_snr, size_T, grid=False) + + # Assume a 0.2 correlation between the size response + # and the shear response. + rho = 0.2 + f = np.random.multivariate_normal([0.0, 0.0], [[1.0, rho], [rho, 1.0]], nobj).T + R, R_size = f * R_std + R_mean + + # Convert magnitudes to fluxes according to the baseline + # use in the metacal numbers + flux_r = 10**0.4 * (27 - photo["mag_r"]) + flux_i = 10**0.4 * (27 - photo["mag_i"]) + flux_z = 10**0.4 * (27 - photo["mag_z"]) + + # Note that this is delta_gamma not 2*delta_gamma, because + # of how we use it below + delta_gamma = 0.01 + + # Use a fixed shape noise per component to generate + # an overall + if self.config["add_shape_noise"]: + shape_noise = 0.26 + else: + shape_noise = 0.0 + + eps = np.random.normal(0, shape_noise, nobj) + 1.0j * np.random.normal(0, shape_noise, nobj) + # True shears without shape noise + g1 = data["shear_1"] + g2 = data["shear_2"] + + if self.config["flip_g2"]: + g2 *= -1 + + # Do the full combination of (g,epsilon) -> e, not the approx one + g = g1 + 1j * g2 + e = (eps + g) / (1 + g.conj() * eps) + e1 = e.real + e2 = e.imag + + zero = np.zeros(nobj) + # Now collect together everything to go into the metacal + # file + output = { + # Basic values + "00/id": photo["id"], + "1p/id": photo["id"], + "1m/id": photo["id"], + "2p/id": photo["id"], + "2m/id": photo["id"], + "00/ra": photo["ra"], + "1p/ra": photo["ra"], + "1m/ra": photo["ra"], + "2p/ra": photo["ra"], + "2m/ra": photo["ra"], + "00/dec": photo["dec"], + "1p/dec": photo["dec"], + "1m/dec": photo["dec"], + "2p/dec": photo["dec"], + "2m/dec": photo["dec"], + # Keep the truth values for use in some testing paths + # We are not pretending to do meta + "00/true_g1": g1, + "00/true_g2": g2, + "00/redshift_true": photo["redshift_true"], + # g1 + "00/g1": e1 * R, + "1p/g1": (e1 + delta_gamma) * R, + "1m/g1": (e1 - delta_gamma) * R, + "2p/g1": e1 * R, + "2m/g1": e1 * R, + # g2 + "00/g2": e2 * R, + "1p/g2": e2 * R, + "1m/g2": e2 * R, + "2p/g2": (e2 + delta_gamma) * R, + "2m/g2": (e2 - delta_gamma) * R, + # T + "00/T": size_T, + "1p/T": size_T + R_size * delta_gamma, + "1m/T": size_T - R_size * delta_gamma, + "2p/T": size_T + R_size * delta_gamma, + "2m/T": size_T - R_size * delta_gamma, + # Terr + "00/T_err": zero, + "1p/T_err": zero, + "1m/T_err": zero, + "2p/T_err": zero, + "2m/T_err": zero, + # size + "00/s2n": snr, + "1p/s2n": snr_1p, + "1m/s2n": snr_1m, + "2p/s2n": snr_2p, + "2m/s2n": snr_2m, + # Magntiudes and fluxes, just copied from the inputs. + "00/mag_r": photo["mag_r"], + "00/mag_i": photo["mag_i"], + "00/mag_z": photo["mag_z"], + "00/mag_err_r": photo["mag_r_err"], + "00/mag_err_i": photo["mag_i_err"], + "00/mag_err_z": photo["mag_z_err"], + "1p/mag_r": photo["mag_r_1p"], + "2p/mag_r": photo["mag_r_2p"], + "1m/mag_r": photo["mag_r_1m"], + "2m/mag_r": photo["mag_r_2m"], + "1p/mag_i": photo["mag_i_1p"], + "2p/mag_i": photo["mag_i_2p"], + "1m/mag_i": photo["mag_i_1m"], + "2m/mag_i": photo["mag_i_2m"], + "1p/mag_z": photo["mag_z_1p"], + "2p/mag_z": photo["mag_z_2p"], + "1m/mag_z": photo["mag_z_1m"], + "2m/mag_z": photo["mag_z_2m"], + "1p/mag_err_r": photo["mag_r_err"], + "2p/mag_err_r": photo["mag_r_err"], + "1m/mag_err_r": photo["mag_r_err"], + "2m/mag_err_r": photo["mag_r_err"], + "1p/mag_err_i": photo["mag_i_err"], + "2p/mag_err_i": photo["mag_i_err"], + "1m/mag_err_i": photo["mag_i_err"], + "2m/mag_err_i": photo["mag_i_err"], + "1p/mag_err_z": photo["mag_z_err"], + "2p/mag_err_z": photo["mag_z_err"], + "1m/mag_err_z": photo["mag_z_err"], + "2m/mag_err_z": photo["mag_z_err"], + # Fixed PSF parameters - all round with same size + "00/mcal_psf_g1": zero, + "1p/mcal_psf_g1": zero, + "1m/mcal_psf_g1": zero, + "2p/mcal_psf_g1": zero, + "2m/mcal_psf_g1": zero, + "00/mcal_psf_g2": zero, + "1p/mcal_psf_g2": zero, + "1m/mcal_psf_g2": zero, + "2p/mcal_psf_g2": zero, + "2m/mcal_psf_g2": zero, + "00/psf_g1": zero, + "1p/psf_g1": zero, + "1m/psf_g1": zero, + "2p/psf_g1": zero, + "2m/psf_g1": zero, + "00/psf_g2": zero, + "1p/psf_g2": zero, + "1m/psf_g2": zero, + "2p/psf_g2": zero, + "2m/psf_g2": zero, + "00/mcal_psf_T_mean": np.repeat(psf_T, nobj), + "1p/mcal_psf_T_mean": np.repeat(psf_T, nobj), + "1m/mcal_psf_T_mean": np.repeat(psf_T, nobj), + "2p/mcal_psf_T_mean": np.repeat(psf_T, nobj), + "2m/mcal_psf_T_mean": np.repeat(psf_T, nobj), + # Everything that gets this far should be used, so flag=0 + "00/flags": np.zeros(nobj, dtype=np.int32), + "1p/flags": np.zeros(nobj, dtype=np.int32), + "1m/flags": np.zeros(nobj, dtype=np.int32), + "2p/flags": np.zeros(nobj, dtype=np.int32), + "2m/flags": np.zeros(nobj, dtype=np.int32), + # we use weights of one for everything for metacal + # if that ever changes we may also need to add + # weight_1p, etc. + "00/weight": np.ones(nobj), + "1p/weight": np.ones(nobj), + "1m/weight": np.ones(nobj), + "2p/weight": np.ones(nobj), + "2m/weight": np.ones(nobj), + } + + return output + + def make_mock_metacal(self, data, photo): + """ + I copied the old version here to use if we do not want metadetect. + Generate a mock metacal table. + This is a long and complicated function unfortunately. + Throughout we assume a single R = R11 = R22, with R12 = R21 = 0 + + Parameters + ---------- + data: dict + Dictionary of arrays read from the input cosmo catalog + photo: dict + Dictionary of arrays generated to simulate photometry + """ + + # These are the numbers from figure F1 of the DES Y1 shear catalog paper + # (this version is not yet public but is awaiting a second referee response) + + # Overall SNR for the three bands usually used for shape measurement + # We use the true SNR not the estimated one, though these are pretty close + snr = (photo["snr_r"] ** 2 + photo["snr_i"] ** 2 + photo["snr_z"] ** 2) ** 0.5 + snr_1p = (photo["snr_r_1p"] ** 2 + photo["snr_i_1p"] ** 2 + photo["snr_z_1p"] ** 2) ** 0.5 + snr_1m = (photo["snr_r_1m"] ** 2 + photo["snr_i_1m"] ** 2 + photo["snr_z_1m"] ** 2) ** 0.5 + snr_2p = (photo["snr_r_2p"] ** 2 + photo["snr_i_2p"] ** 2 + photo["snr_z_2p"] ** 2) ** 0.5 + snr_2m = (photo["snr_r_2m"] ** 2 + photo["snr_i_2m"] ** 2 + photo["snr_z_2m"] ** 2) ** 0.5 + + if self.config["unit_response"]: + assert np.allclose(snr, snr_1p) + assert np.allclose(snr, snr_2m) + + nobj = snr.size + + log10_snr = np.log10(snr) + + # Convert from the half-light radius which is in the + # input catalogs to a sigma. Do this by pretending + # that it is a Gaussian. This is clearly wrong, and + # if this causes major errors in the size cuts we may + # have to modify this + size_hlr = data["size_true"] + size_sigma = size_hlr / np.sqrt(2 * np.log(2)) + size_T = 2 * size_sigma**2 + + # Use a fixed PSF across all the objects + psf_fwhm = 0.75 + psf_sigma = psf_fwhm / (2 * np.sqrt(2 * np.log(2))) + psf_T = 2 * psf_sigma**2 + + if self.config["unit_response"]: + R = 1.0 + R_size = 0.0 + + else: + # Use the response model to get a reasonable response + # value for this size and SNR + R_mean = self.R_spline(log10_snr, size_T, grid=False) + R_std = self.Rstd_spline(log10_snr, size_T, grid=False) + + # Assume a 0.2 correlation between the size response + # and the shear response. + rho = 0.2 + f = np.random.multivariate_normal([0.0, 0.0], [[1.0, rho], [rho, 1.0]], nobj).T + R, R_size = f * R_std + R_mean + + # Convert magnitudes to fluxes according to the baseline + # use in the metacal numbers + flux_r = 10**0.4 * (27 - photo["mag_r"]) + flux_i = 10**0.4 * (27 - photo["mag_i"]) + flux_z = 10**0.4 * (27 - photo["mag_z"]) + + # Note that this is delta_gamma not 2*delta_gamma, because + # of how we use it below + delta_gamma = 0.01 + + # Use a fixed shape noise per component to generate + # an overall + if self.config["add_shape_noise"]: + shape_noise = 0.26 + else: + shape_noise = 0.0 + + eps = np.random.normal(0, shape_noise, nobj) + 1.0j * np.random.normal(0, shape_noise, nobj) + # True shears without shape noise + g1 = data["shear_1"] + g2 = data["shear_2"] + + if self.config["flip_g2"]: + g2 *= -1 + + # Do the full combination of (g,epsilon) -> e, not the approx one + g = g1 + 1j * g2 + e = (eps + g) / (1 + g.conj() * eps) + e1 = e.real + e2 = e.imag + + zero = np.zeros(nobj) + # Now collect together everything to go into the metacal + # file + output = { + # Basic values + "id": photo["id"], + "ra": photo["ra"], + "dec": photo["dec"], + # Keep the truth value just in case + "true_g1": g1, + "true_g2": g2, + # add true redshift since it is used in source selector + "redshift_true": photo["redshift_true"], + # g1 + "mcal_g1": e1 * R, + "mcal_g1_1p": (e1 + delta_gamma) * R, + "mcal_g1_1m": (e1 - delta_gamma) * R, + "mcal_g1_2p": e1 * R, + "mcal_g1_2m": e1 * R, + # g2 + "mcal_g2": e2 * R, + "mcal_g2_1p": e2 * R, + "mcal_g2_1m": e2 * R, + "mcal_g2_2p": (e2 + delta_gamma) * R, + "mcal_g2_2m": (e2 - delta_gamma) * R, + # T + "mcal_T": size_T, + "mcal_T_1p": size_T + R_size * delta_gamma, + "mcal_T_1m": size_T - R_size * delta_gamma, + "mcal_T_2p": size_T + R_size * delta_gamma, + "mcal_T_2m": size_T - R_size * delta_gamma, + # Terr + "mcal_T_err": zero, + "mcal_T_err_1p": zero, + "mcal_T_err_1m": zero, + "mcal_T_err_2p": zero, + "mcal_T_err_2m": zero, + # size + "mcal_s2n": snr, + "mcal_s2n_1p": snr_1p, + "mcal_s2n_1m": snr_1m, + "mcal_s2n_2p": snr_2p, + "mcal_s2n_2m": snr_2m, + # Magntiudes and fluxes, just copied from the inputs. + "mcal_mag_r": photo["mag_r"], + "mcal_mag_i": photo["mag_i"], + "mcal_mag_z": photo["mag_z"], + "mcal_mag_err_r": photo["mag_r_err"], + "mcal_mag_err_i": photo["mag_i_err"], + "mcal_mag_err_z": photo["mag_z_err"], + "mcal_mag_r_1p": photo["mag_r_1p"], + "mcal_mag_r_2p": photo["mag_r_2p"], + "mcal_mag_r_1m": photo["mag_r_1m"], + "mcal_mag_r_2m": photo["mag_r_2m"], + "mcal_mag_i_1p": photo["mag_i_1p"], + "mcal_mag_i_2p": photo["mag_i_2p"], + "mcal_mag_i_1m": photo["mag_i_1m"], + "mcal_mag_i_2m": photo["mag_i_2m"], + "mcal_mag_z_1p": photo["mag_z_1p"], + "mcal_mag_z_2p": photo["mag_z_2p"], + "mcal_mag_z_1m": photo["mag_z_1m"], + "mcal_mag_z_2m": photo["mag_z_2m"], + "mcal_mag_err_r_1p": photo["mag_r_err"], + "mcal_mag_err_r_2p": photo["mag_r_err"], + "mcal_mag_err_r_1m": photo["mag_r_err"], + "mcal_mag_err_r_2m": photo["mag_r_err"], + "mcal_mag_err_i_1p": photo["mag_i_err"], + "mcal_mag_err_i_2p": photo["mag_i_err"], + "mcal_mag_err_i_1m": photo["mag_i_err"], + "mcal_mag_err_i_2m": photo["mag_i_err"], + "mcal_mag_err_z_1p": photo["mag_z_err"], + "mcal_mag_err_z_2p": photo["mag_z_err"], + "mcal_mag_err_z_1m": photo["mag_z_err"], + "mcal_mag_err_z_2m": photo["mag_z_err"], + # Fixed PSF parameters - all round with same size + "mcal_psf_g1": zero, + "mcal_psf_g2": zero, + "psf_g1": zero, + "psf_g2": zero, + "mcal_psf_T_mean": np.repeat(psf_T, nobj), + # Everything that gets this far should be used, so flag=0 + "mcal_flags": np.zeros(nobj, dtype=np.int32), + "mcal_flags_1p": np.zeros(nobj, dtype=np.int32), + "mcal_flags_1m": np.zeros(nobj, dtype=np.int32), + "mcal_flags_2p": np.zeros(nobj, dtype=np.int32), + "mcal_flags_2m": np.zeros(nobj, dtype=np.int32), + # we use weights of one for everything for metacal + # if that ever changes we may also need to add + # weight_1p, etc. + "weight": np.ones(nobj), + } + + return output + + def apply_magnitude_cut(self, data): + """ + Allow for a cut in absolute magnitude. + """ + mag_limit = self.config["Mag_r_limit"] + sel = data["Mag_true_r_sdss_z0"] < mag_limit + + ndet = sel.sum() + ntot = sel.size + fract = ndet * 100.0 / ntot + print(f"{ndet} objects pass magnitude cut out of {ntot} objects ({fract:.1f}%)") + + # Remove all objects not selected + for key in list(data.keys()): + data[key] = data[key][sel] + + def remove_undetected(self, data, photo): + """ + Strip out any undetected objects from the two + simulated data sets. + + Use a configuration parameter snr_limit to decide + on the detection limit. + """ + snr_limit = self.config["snr_limit"] + + # This will become a boolean array in a minute when + # we OR it with an array + detected = False + + # Check if detected in any band. Makes a boolean array + # Even though we started with just a single False. + for band in self.bands: + detected_in_band = photo[f"snr_{band}"] > snr_limit + not_detected_in_band = ~detected_in_band + # Set objects not detected in one band that are detected in another + # to inf magnitude in that band, and the SNR to zero. + # We have to do this for each of the variants also, because otherwise + # we end up with wildly different final SNR values later. + # This is the metadetection issue really! + for v in metacal_variants(f"snr_{band}"): + photo[v][not_detected_in_band] = 0.0 + for v in metacal_variants(f"mag_{band}"): + photo[v][not_detected_in_band] = np.inf + + # Record that we have detected this object at all + detected |= detected_in_band + + # the protoDC2 sims have an edge with zero shear. + # Remove it. + zero_shear_edge = (abs(data["shear_1"]) == 0) & (abs(data["shear_2"]) == 0) + print("Removing {} objects with identically zero shear in both terms".format(zero_shear_edge.sum())) + + detected &= ~zero_shear_edge + + # Print out interesting information + ndet = detected.sum() + ntot = detected.size + fract = ndet * 100.0 / ntot + print(f"Detected {ndet} out of {ntot} objects ({fract:.1f}%)") + + # Remove all objects not detected in *any* band + # make a copy of the keys with list(photo.keys()) so we are not + # modifying during the iteration + for key in list(photo.keys()): + photo[key] = photo[key][detected] + + for key in list(data.keys()): + data[key] = data[key][detected] + + +class TXBuzzardMockOld(TXCosmoDC2MockOld): + """ + Simulate mock photometry from Buzzard. + + May be obsolete. + """ + + name = "TXBuzzardMockOld" + parallel = False + inputs = [("response_model", HDFFile)] + + outputs = [ + ("shear_catalog", ShearCatalog), + ("photometry_catalog", HDFFile), + ] + + config_options = { + "cat_name": StageParameter(str, "buzzard", msg="Name of the mock catalog to use."), + "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), + "snr_limit": StageParameter(float, 4.0, msg="S/N limit for object selection."), + "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), + "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), + "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), + "unit_response": StageParameter(bool, False, msg="Whether to use unit response in simulation."), + "flip_g2": StageParameter(bool, True, msg="Whether to flip g2 sign to match conventions."), + } + + +class TXGaussianSimsMockOld(TXCosmoDC2MockOld): + """ + Simulate mock photometry from gaussian simulations + + This stage simulates metacal data and metacalibrated + photometry measurements, starting from simple Gaussian simulations + produced starting from CCL power spectra and poission sampling galaxies + from it. + + This is mainly useful for testing infrastructure + starting from a purer simulation. + """ + + name = "TXGaussianSimsMockOld" + parallel = False + inputs = [("response_model", HDFFile)] + + outputs = [ + ("shear_catalog", ShearCatalog), + ("photometry_catalog", HDFFile), + ] + + config_options = { + "cat_name": StageParameter(str, "GaussianSims", msg="Name of the Gaussian simulation catalog."), + "visits_per_band": StageParameter(int, 165, msg="Number of visits per band for noise simulation."), + "snr_limit": StageParameter(float, 0.0, msg="S/N limit for object selection (0 for all)."), + "max_size": StageParameter(int, 99999999999999, msg="Maximum catalog size for testing."), + "extra_cols": StageParameter(str, "", msg="Extra columns to include (space-separated)."), + "max_npix": StageParameter(int, 99999999999999, msg="Maximum number of pixels."), + "unit_response": StageParameter(bool, True, msg="Whether to use unit response in simulation."), + "cat_size": StageParameter(int, 0, msg="Catalog size (0 for all)."), + "flip_g2": StageParameter(bool, False, msg="Whether to flip g2 sign to match conventions."), + "apply_mag_cut": StageParameter(bool, False, msg="Apply magnitude cut for descqa comparison."), + "metadetect": StageParameter( + bool, True, msg="Whether to make a metadetect-style catalog (True) or metacal (False)." + ), + "add_shape_noise": StageParameter(bool, False, msg="Whether to add shape noise to simulation."), + } + + def data_iterator(self, cat): + # all cols we need + ( + ra, + dec, + g1, + g2, + z, + m_u, + m_g, + m_r, + m_i, + m_z, + m_y, + etrue1, + etrue2, + size, + galaxy_id, + ) = cat + + # figuring out number of chunks + chunk_size = 1_000_000 + nchunk = len(ra) // chunk_size + if nchunk * chunk_size < len(ra): + nchunk += 1 + + # main loop + for i in range(nchunk): + # start and end index of this chunk in full data + s = chunk_size * i + e = s + chunk_size + + # make dict just for this chunk of data + data_chunk = {} + data_chunk["ra"] = ra[s:e] + data_chunk["dec"] = dec[s:e] + data_chunk["shear_1"] = g1[s:e] + data_chunk["shear_2"] = g2[s:e] + data_chunk["size_true"] = size[s:e] + data_chunk["galaxy_id"] = galaxy_id[s:e] + data_chunk["redshift_true"] = z[s:e] + data_chunk["ellipticity_1_true"] = etrue1[s:e] + data_chunk["ellipticity_2_true"] = etrue2[s:e] + data_chunk["mag_true_u_lsst"] = m_u[s:e] + data_chunk["mag_true_g_lsst"] = m_g[s:e] + data_chunk["mag_true_r_lsst"] = m_r[s:e] + data_chunk["mag_true_i_lsst"] = m_i[s:e] + data_chunk["mag_true_z_lsst"] = m_z[s:e] + data_chunk["mag_true_y_lsst"] = m_y[s:e] + + # send this chunk of data back to caller + yield data_chunk + + def load_catalog(self): + """ + This method loads the Gaussian sims produced in this notebook + https://github.com/LSSTDESC/txpipe-cosmodc2/blob/master/notebooks/MaskAreaGaussianSims_and_FakeTXPipeInput.ipynb + """ + cat_name = self.config["cat_name"] + self.bands = ("u", "g", "r", "i", "z", "y") + + print(f"Loading from catalog {cat_name}") + + cat = np.load(cat_name, allow_pickle=True) + + N = len(cat[0]) + + return cat, N + + +def make_mock_photometry(n_visit, bands, data, unit_response): + """ + Generate a mock photometric table with noise added + + This is mostly from LSE‐40 by + Zeljko Ivezic, Lynne Jones, and Robert Lupton + retrieved here: + http://faculty.washington.edu/ivezic/Teaching/Astr511/LSST_SNRdoc.pdf + """ + + output = {} + nobj = data["galaxy_id"].size + output["ra"] = data["ra"] + output["dec"] = data["dec"] + output["id"] = data["galaxy_id"] + output["extendedness"] = np.ones(nobj) + + # Sky background, seeing FWHM, and system throughput, + # all from table 2 of Ivezic, Jones, & Lupton + B_b = np.array([85.07, 467.9, 1085.2, 1800.3, 2775.7, 3614.3]) + fwhm = np.array([0.77, 0.73, 0.70, 0.67, 0.65, 0.63]) + T_b = np.array([0.0379, 0.1493, 0.1386, 0.1198, 0.0838, 0.0413]) + + # effective pixels size for a Gaussian PSF, from equation + # 27 of Ivezic, Jones, & Lupton + pixel = 0.2 # arcsec + N_eff = 2.436 * (fwhm / pixel) ** 2 + + # other numbers from Ivezic, Jones, & Lupton + sigma_inst2 = 10.0**2 # instrumental noise in photons per pixel, just below eq 42 + gain = 1 # ADU units per photon, also just below eq 42 + D = 6.5 # primary mirror diameter in meters, from LSST key numbers page (effective clear diameter) + # Not sure what effective clear diameter means but it's closer to the number used in the paper + time = 30.0 # seconds per exposure, from LSST key numbers page + sigma_b2 = 0.0 # error on background, just above eq 42 + + # combination of these used below, from various equations + factor = 5455.0 / gain * (D / 6.5) ** 2 * (time / 30.0) + + # Fake some metacal responses + if unit_response: + mag_responses = [0.0 for i in bands] + else: + mag_responses = generate_mock_metacal_mag_responses(bands, nobj) + + delta_gamma = 0.01 # this is the half-delta gamma, i.e. gamma_+ - gamma_0 + # that's the right thing to use here because we are doing m+ = m0 + dm/dy*dy + # Use the same response for gamma1 and gamma2 + + for band, b_b, t_b, n_eff, mag_resp in zip(bands, B_b, T_b, N_eff, mag_responses): + # truth magnitude + mag = data[f"mag_true_{band}_lsst"] + output[f"mag_true_{band}_lsst"] = mag + + # expected signal photons, over all visits + c_b = factor * 10 ** (0.4 * (25 - mag)) * t_b * n_visit + + # expected background photons, over all visits + background = np.sqrt((b_b + sigma_inst2 + sigma_b2) * n_eff * n_visit) + # total expected photons + mu = c_b + background + + # Observed number of photons in excess of the expected background. + # This can go negative for faint magnitudes, indicating that the object is + # not going to be detected + n_obs = np.random.poisson(mu) - background + n_obs_err = np.sqrt(mu) + + # signal to noise, true and estimated values + true_snr = c_b / background + obs_snr = n_obs / background + + # observed magnitude from inverting c_b expression above + mag_obs = 25 - 2.5 * np.log10(n_obs / factor / t_b / n_visit) + + # converting error on n_obs to error on mag + mag_err = 2.5 / np.log(10.0) / obs_snr + + output[f"true_snr_{band}"] = true_snr + output[f"snr_{band}"] = obs_snr + output[f"mag_{band}"] = mag_obs + output[f"mag_err_{band}"] = mag_err + output[f"mag_{band}_err"] = mag_err + + m = mag_resp * delta_gamma + + m1 = mag_obs + m + m2 = mag_obs - m + mag_obs_1p = m1 + mag_obs_1m = m2 + + output[f"mag_{band}_1p"] = m1 + output[f"mag_{band}_1m"] = m2 + output[f"mag_{band}_2p"] = m1 + output[f"mag_{band}_2m"] = m2 + + # Scale the SNR values according the to change in magnitude.r + s = np.power(10.0, -0.4 * m) + + snr1 = obs_snr * s + snr2 = obs_snr / s + output[f"snr_{band}_1p"] = snr1 + output[f"snr_{band}_1m"] = snr2 + output[f"snr_{band}_2p"] = snr1 + output[f"snr_{band}_2m"] = snr2 + + return output + + +def generate_mock_metacal_mag_responses(bands, nobj): + nband = len(bands) + mu = np.zeros(nband) # seems approx mean of response across bands, from HSC tract + rho = 0.25 # approx correlation between response in bands, from HSC tract + sigma2 = 1.7**2 # approx variance of response, from HSC tract + covmat = np.full((nband, nband), rho * sigma2) + np.fill_diagonal(covmat, sigma2) + mag_responses = np.random.multivariate_normal(mu, covmat, nobj).T + return mag_responses + + +class TXSimpleMockOld(PipelineStage): + """ + Load an ascii astropy table and put it in shear catalog format. + """ + + name = "TXSimpleMockOld" + parallel = False + inputs = [("mock_shear_catalog", TextFile)] + outputs = [("shear_catalog", ShearCatalog)] + config_options = {} + + def run(self): + from astropy.table import Table + import numpy as np + + # Load the data. We are assuming here it is small enough to fit in memory + input_filename = self.get_input("mock_shear_catalog") + input_data = Table.read(input_filename, format="ascii") + n = len(input_data) + + data = {} + # required columns + for col in ["ra", "dec", "g1", "g2", "s2n", "T"]: + data[col] = input_data[col] + + # It's most likely we will have a redshift column. + # Check for both that and "redshift_true" + if "redshift" in input_data.colnames: + data["redshift_true"] = input_data["redshift"] + elif "redshift_true" in input_data.colnames: + data["redshift_true"] = input_data["redshift_true"] + + # If there is an ID column then use it, but otherwise just use + # sequential IDs + if "id" in input_data.colnames: + data["galaxy_id"] = input_data["id"] + else: + data["galaxy_id"] = np.arange(len(input_data)) + + # if these catalogs are not present then we fake them. + defaults = { + "T_err": 0.0, + "psf_g1": 0.0, + "psf_g2": 0.0, + "psf_T_mean": 0.202, # this corresponds to a FWHM of 0.75 arcsec + "weight": 1.0, + "flags": 0, + } + + for key, value in defaults.items(): + if key in input_data.colnames: + data[key] = input_data[key] + else: + data[key] = np.full(n, value) + + self.save_catalog(data) + + def save_catalog(self, data): + with self.open_output("shear_catalog") as f: + g = f.create_group("shear") + g.attrs["catalog_type"] = "simple" + for key, value in data.items(): + g.create_dataset(key, data=value) + + +class TXMockTruthPZOld(PipelineStage): + name = "TXMockTruthPZOld" + parallel = False + inputs = [("shear_catalog", ShearCatalog)] + outputs = [("photoz_pdfs", QPPDFFile)] + config_options = { + "mock_sigma_z": StageParameter(float, 0.001, msg="Sigma_z for mock photo-z PDF generation."), + } + + def run(self): + import qp + import numpy as np + + sigma_z = self.config["mock_sigma_z"] + + # read the input truth redshifts + with self.open_input("shear_catalog", wrapper=True) as f: + group = f.file[f.get_primary_catalog_group()] + n = group["ra"].size + redshifts = group["redshift_true"][:] + + zgrid = np.linspace(0, 3, 301) + pdfs = np.zeros((n, len(zgrid))) + + spread_z = sigma_z * (1 + redshifts) + # make a gaussian PDF for each object + delta = zgrid[np.newaxis, :] - redshifts[:, np.newaxis] + pdfs = np.exp(-0.5 * (delta / spread_z[:, np.newaxis]) ** 2) / np.sqrt(2 * np.pi) / spread_z[:, np.newaxis] + + q = qp.Ensemble(qp.interp, data=dict(xvals=zgrid, yvals=pdfs)) + q.set_ancil(dict(zmode=redshifts, zmean=redshifts, zmedian=redshifts)) + q.write_to(self.get_output("photoz_pdfs")) diff --git a/txpipe/source_selection/__init__.py b/txpipe/source_selection/__init__.py index 7879fdbdc..4fac2c893 100644 --- a/txpipe/source_selection/__init__.py +++ b/txpipe/source_selection/__init__.py @@ -3,3 +3,4 @@ from .lensfit import TXSourceSelectorLensfit from .metacal import TXSourceSelectorMetacal from .metadetect import TXSourceSelectorMetadetect +from .tomography import TXSourceTomography diff --git a/txpipe/source_selection/base.py b/txpipe/source_selection/base.py index 8c617f8fa..8da536f3d 100644 --- a/txpipe/source_selection/base.py +++ b/txpipe/source_selection/base.py @@ -1,5 +1,5 @@ from ..base_stage import PipelineStage -from ..data_types import ShearCatalog, TomographyCatalog, HDFFile +from ..data_types import ShearCatalog, TomographyCatalog, HDFFile, PickleFile from .utils import SourceNumberDensityStats from ..utils.calibration_tools import read_shear_catalog_type from ..binning import build_tomographic_classifier, apply_classifier, read_training_data @@ -96,6 +96,7 @@ class TXSourceSelectorBase(PipelineStage): inputs = [ ("shear_catalog", ShearCatalog), ("spectroscopic_catalog", HDFFile), + ("shear_tomography_classifier", PickleFile), ] outputs = [("shear_tomography_catalog", TomographyCatalog)] @@ -103,7 +104,7 @@ class TXSourceSelectorBase(PipelineStage): config_options = { "input_pz": StageParameter(bool, False, msg="Whether to use input photo-z posteriors"), "true_z": StageParameter(bool, False, msg="Whether to use true redshifts instead of photo-z"), - "bands": StageParameter(str, "riz", msg="Bands from the catalog to use for selection"), + "bands": StageParameter(list, ["r", "i", "z"], msg="Bands from the catalog to use for selection"), "verbose": StageParameter(bool, False, msg="Whether to print verbose output"), "T_cut": StageParameter(float, required=True, msg="Size cut threshold for object selection"), "s2n_cut": StageParameter( @@ -113,15 +114,6 @@ class TXSourceSelectorBase(PipelineStage): ), "chunk_rows": StageParameter(int, 10000, msg="Number of rows to process in each chunk"), "source_zbin_edges": StageParameter(list, required=True, msg="Redshift bin edges for source tomography"), - "random_seed": StageParameter(int, 42, msg="Random seed for reproducibility"), - "spec_mag_column_format": StageParameter( - str, - "photometry/{band}", - msg="Format string for spectroscopic magnitude columns", - ), - "spec_redshift_column": StageParameter( - str, "photometry/redshift", msg="Column name for spectroscopic redshifts" - ), } def run(self): @@ -154,21 +146,7 @@ def run(self): # Build a classifier used to put objects into tomographic bins if not (self.config["input_pz"] or self.config["true_z"]): - with self.open_input("spectroscopic_catalog") as spec_file: - training_data = read_training_data( - spec_file, - bands, - self.config["spec_mag_column_format"], - self.config["spec_redshift_column"], - ) - - classifier, features = build_tomographic_classifier( - bands, - training_data, - self.config["source_zbin_edges"], - self.config["random_seed"], - self.comm, - ) + classifier, features = self.read_tomography_classifier() # We will collect the selection biases for each bin # as a matrix. We will collect together the different @@ -226,6 +204,30 @@ def accumulate_statistics(self, output_file, shear_data, start, end, tomo_bin, p # These will be brought together at the end. number_density_stats.add_data(shear_data, tomo_bin) # check this + def read_tomography_classifier(self): + # Read the tomography classifier from file + with self.open_input("shear_tomography_classifier", wrapper=True) as infile: + pickle_data = infile.read() + + # Check that the tomographer used the same configuration + # as we have here. + if not pickle_data["bands"] == self.config["bands"]: + raise ValueError( + "Bands used in tomography classifier do not match those " + "in source selector configuration." + ) + # Also check bin edges are close enough + if not np.allclose(pickle_data["source_zbin_edges"], self.config["source_zbin_edges"]): + raise ValueError( + "Source redshift bin edges used in tomography classifier do not match those " + "in source selector configuration." + ) + + # This is all that is actually needed in this class + classifier = pickle_data["classifier"] + features = pickle_data["features"] + return classifier, features + def apply_simple_redshift_cut(self, shear_data): if self.config["input_pz"]: diff --git a/txpipe/source_selection/metadetect.py b/txpipe/source_selection/metadetect.py index bf499b6d9..3de03ea51 100644 --- a/txpipe/source_selection/metadetect.py +++ b/txpipe/source_selection/metadetect.py @@ -46,7 +46,7 @@ def data_iterator(self): bands = self.config["bands"] # Core quantities we need - shear_cols = metadetect_variants("T", "s2n", "g1", "g2", "ra", "dec", "mcal_psf_T_mean", "weight", "flags") + shear_cols = metadetect_variants("T", "s2n", "g1", "g2", "ra", "dec", "psf_T_mean", "weight", "flags") # Magnitudes and errors shear_cols += band_variants(bands, "mag", "mag_err", shear_catalog_type="metadetect") @@ -59,7 +59,7 @@ def data_iterator(self): shear_cols += metadetect_variants("redshift_true") for prefix in ["00", "1p", "1m", "2p", "2m"]: - renames[f"{prefix}/mcal_psf_T_mean"] = f"{prefix}/psf_T_mean" + renames[f"{prefix}/psf_T_mean"] = f"{prefix}/psf_T_mean" # This is a parent ceci.PipelineStage method. # It returns an iterator we loop through. diff --git a/txpipe/source_selection/tomography.py b/txpipe/source_selection/tomography.py new file mode 100644 index 000000000..9a4c2a70f --- /dev/null +++ b/txpipe/source_selection/tomography.py @@ -0,0 +1,48 @@ +from .base import PipelineStage +from ceci.config import StageParameter +from ..data_types import HDFFile, PickleFile +from ..binning import read_training_data, build_tomographic_classifier + +class TXSourceTomography(PipelineStage): + name = "TXSourceTomography" + inputs = [ + ("spectroscopic_catalog", HDFFile), + ] + outputs = [ + ("shear_tomography_classifier", PickleFile), + ] + config_options = { + "bands": StageParameter(list, ["r", "i", "z"], msg="Bands from the catalog to use for selection"), + "spec_mag_column_format": StageParameter(str, "photometry/{band}", msg="Format string for spectroscopic magnitude columns"), + "spec_redshift_column": StageParameter(str, "photometry/redshift", msg="Column name for spectroscopic redshifts"), + "source_zbin_edges": StageParameter(list, required=True, msg="Redshift bin edges for source tomography"), + "random_seed": StageParameter(int, 42, msg="Random seed for reproducibility"), + } + + def run(self): + bands = self.config['bands'] + with self.open_input("spectroscopic_catalog") as spec_file: + training_data = read_training_data( + spec_file, + bands, + self.config["spec_mag_column_format"], + self.config["spec_redshift_column"], + ) + + + classifier, features = build_tomographic_classifier( + bands, + training_data, + self.config["source_zbin_edges"], + self.config["random_seed"], + self.comm, + ) + + with self.open_output("shear_tomography_classifier", wrapper=True) as outfile: + pickle_data = { + "classifier": classifier, + "features": features, + "bands": bands, + "source_zbin_edges": self.config["source_zbin_edges"], + } + outfile.write(pickle_data) diff --git a/txpipe/utils/calibration_tools.py b/txpipe/utils/calibration_tools.py index 8907a0778..ea6d0f0d9 100755 --- a/txpipe/utils/calibration_tools.py +++ b/txpipe/utils/calibration_tools.py @@ -876,6 +876,7 @@ def __init__( raise ValueError(f"Please specify metacal, metadetect, lensfit or hsc for shear_catalog in config.") def selector(self, data, i): + print(type(data), i, self.x_name, data.prefix) x = data[self.x_name] if (self.shear_catalog_type == "lensfit") & (self.psf_unit_conv == True) & ("T" in self.x_name): pix2arcsec = 0.214 @@ -908,7 +909,13 @@ def add_data(self, data): weight = data["weight"][w] self.g1.add_data(i, data["g1"][w] - data["c1"][w], weight) self.g2.add_data(i, data["g2"][w] - data["c2"][w], weight) - self.x.add_data(i, data[self.x_name][w], weight) + + if self.x_name in data: + self.x.add_data(i, data[self.x_name][w], weight) + elif self.shear_catalog_type == "metadetect": + self.x.add_data(i, data["00/" + self.x_name][w], weight) + else: + raise ValueError(f"Please specify a valid x_name for the shear catalog type {self.shear_catalog_type}.") def collect(self, comm=None): count1, g1, var1 = self.g1.collect(comm, mode="gather") diff --git a/txpipe/utils/conversion.py b/txpipe/utils/conversion.py index bb601e810..1870a4a7c 100644 --- a/txpipe/utils/conversion.py +++ b/txpipe/utils/conversion.py @@ -5,7 +5,15 @@ # follow https://pipelines.lsst.io/v/DM-22499/cpp-api/file/_photo_calib_8h.html def nanojansky_to_mag_ab(flux): - return -2.5 * np.log10(flux * 1e-9 / REF_FLUX) + if isinstance(flux, np.ndarray): + zero = flux == 0 + out = np.empty_like(flux) + out[zero] = np.inf + out[~zero] = -2.5 * np.log10(flux[~zero] * 1e-9 / REF_FLUX) + elif flux == 0: + out = np.inf + + return out def mag_ab_to_nanojansky(mag): @@ -21,3 +29,9 @@ def moments_to_shear(Ixx, Iyy, Ixy): e1 = (Ixx - Iyy) / b e2 = 2 * Ixy / b return e1, e2 + +def mag_ab_err_to_nanojansky_err(mag, mag_err): + flux = mag_ab_to_nanojansky(mag) + with np.errstate(invalid='ignore'): + flux_err = flux * np.log(10) / 2.5 * mag_err + return flux_err \ No newline at end of file