diff --git a/examples/desy3/config.yml b/examples/desy3/config.yml
index 1094244cd..f06c9d280 100644
--- a/examples/desy3/config.yml
+++ b/examples/desy3/config.yml
@@ -1,225 +1,198 @@
-# Values in this section are accessible to all the different stages.
-# They can be overridden by individual stages though.
 global:
-    # This is read by many stages that read complete
-    # catalog data, and tells them how many rows to read
-    # at once
-    chunk_rows: 1000000000
-    # These mapping options are also read by a range of stages
     pixelization: healpix
-    nside: 128
-    sparse: True  # Generate sparse maps - faster if using small areas
-
-# Here's the list of stages we are using:
-#
-# TXSourceSelectorMetacal
-# TXShearCalibration
-# TXTruthLensCatalogSplitter
-# TXStarCatalogSplitter     
-# TXPhotozPlots 
-# TXSourceMaps
-# TXLensMaps
-# TXAuxiliarySourceMaps  
-# TXAuxiliaryLensMaps    
-# TXSourceNoiseMaps    
-# TXDensityMaps        
-# TXMapPlots           
-# TXTracerMetadata     
-# TXJackknifeCenters   
-# TXTwoPoint           
-# TXBlinding           
-# TXTwoPointTheoryReal 
-# TXTwoPointPlots      
-# TXSourceDiagnosticPlots    
-# TXLensDiagnosticPlots    
-# TXGammaTFieldCenters # Compute and plot gamma_t around center points
-# TXGammaTStars  # Compute and plot gamma_t around bright stars
-# TXGammaTRandoms      # Compute and plot gamma_t around randoms
-# TXRoweStatistics     # Compute and plot Rowe statistics
-# TXGalaxyStarDensity  # Compute and plot the star-galaxy density cross-correlation
-# TXGalaxyStarShear    # Compute and plot the star-galaxy shear cross-correlation
-# TXPSFDiagnostics     # Compute and plots other PSF diagnostics
-# TXBrighterFatterPlot # Make plots tracking the brighter-fatter effect
-# TXConvergenceMaps    # Make convergence kappa maps from g1, g2 maps
-# TXConvergenceMapPlots # Plot the convergence map
-# TXMapCorrelations    # plot the correlations between systematics and data
-# TXApertureMass        # Compute aperture-mass statistics
-    
-##- name: TXRealGaussianCovariance   # Compute covariance of real-space correlations
-##- name: TXTwoPointFourier          # Compute power spectra C_ell
-##- name: TXFourierNamasterCovariance # Compute the C_ell covariance
-##- name: TXFourierTJPCovariance     # Compute the C_ell covariance with TJPCov 
+    chunk_rows: 1_000_000 #default for TXPipe stages (can be overwritten in individual stages)
+    chunk_size: 100_000 #default for RAIL stages (can be overwritten in individual stages)
+    sparse: true
+    nside: 4096
+
+
+TXIngestDESY3Gold:
+    name: TXIngestDESY3Gold
+    input_group_name: catalog/gold/
+    chunk_rows: 10_000_000
+
+TXIngestDESY3Footprint:
+    name: TXIngestDESY3Footprint
+    input_nside: 4096
+    input_filepaths:
+        - data/desy3/inputs/footprint_maps/y3a2_footprint_griz_1exp_v2.0_healsparse.hs
+        - data/desy3/inputs/footprint_maps/y3a2_griz_o.4096_t.32768_coverfoot_EQU_healsparse.hs
+        - data/desy3/inputs/footprint_maps/y3a2_foreground_mask_v2.1_healsparse.hs
+        - data/desy3/inputs/footprint_maps/y3a2_badregions_mask_v2.1_healsparse.hs
+        - data/desy3/inputs/footprint_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highdens_vl05_vlim_zmask_ZMAX.hs
+        - data/desy3/inputs/footprint_maps/y3_gold_2.2.1_wide_sofcol_run_redmapper_v0.5.1_redmagic_highlum_vl10_vlim_zmask_ZMAX.hs
+        - data/desy3/inputs/footprint_maps/y3a2_gold_2_2_1_sof_nside4096_nest_i_depth.hs
+
+    input_labels:
+        - footprint/footprint
+        - footprint/fracdet_griz
+        - foreground/foreground
+        - foreground/badregions
+        - zmax/highdens
+        - zmax/highlum
+        - depth/depth_i
+
+TXIngestDESY3SpeczCat:
+    name: TXIngestDESY3SpeczCat
+
+
+TXCustomMask: 
+    name: TXCustomMask
+    fracdet_name: footprint/fracdet_griz
+    nside: 4096
+    #cuts from Rodriguez-Monroy et al https://arxiv.org/abs/2105.13540 
+    cuts: 
+        - "footprint/footprint == 1"
+        - "footprint/fracdet_griz > 0.8"
+        - "foreground/foreground == 0"
+        - "foreground/badregions <= 1"
+        - "zmax/highdens >= 0.65"
+        - "zmax/highlum >= 0.95"
+        - "depth/depth_i > 22.2"
+    degrade: True
+
+
+
+PZPrepareEstimatorLens:
+    name: PZPrepareEstimatorLens
+    classname: DNFInformer 
+    hdf5_groupname: photometry
+    redshift_col: redshift
+    bands: ['mag_g', 'mag_r', 'mag_i', 'mag_z']
+    err_bands: ['mag_err_g', 'mag_err_r', 'mag_err_i', 'mag_err_z' ] 
+    mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6}
+
+PZEstimatorLens:
+    name: PZEstimatorLens 
+    classname: DNFEstimator
+    hdf5_groupname: photometry
+    redshift_col: redshift
+    bands: ['mag_g', 'mag_r', 'mag_i', 'mag_z']
+    err_bands: ['mag_err_g', 'mag_err_r', 'mag_err_i', 'mag_err_z' ] 
+    mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6}
+    selection_mode: 2    #0: ENF, 1: ANF, 2: DNF see https://github.com/LSSTDESC/rail_dnf
+    nondetect_val: .inf
+    chunk_size: 10000  # We have to make this very low to avoid memory issues
+
+PZRailSummarizeLens:
+    name: PZRailSummarizeLens
+    classname: PZRailPZSummarize
+    catalog_group: lens
+    tomography_name: lens
+    nsamples: 0
+    zmin: 0.0
+    zmax: 2.0
+    nzbins: 150
+    point_estimate_key: DNF_ZN
+    hdf5_groupname: tomography
+    summarizer: PointEstHistMaskedSummarizer
+    module: "rail.estimation.algos.point_est_hist"
+    chunk_size: 100_000
+
+PZRailSummarizeSource:
+    name: PZRailSummarizeSource
+    classname: PZRailPZSummarize
+    catalog_group: source
+    tomography_name: source
+    nsamples: 0
+    zmin: 0.0
+    zmax: 2.0
+    nzbins: 150
+    point_estimate_key: DNF_ZN
+    hdf5_groupname: tomography
+    summarizer: PointEstHistMaskedSummarizer
+    module: "rail.estimation.algos.point_est_hist"
+    chunk_size: 100_000
+
+TXPhotozPlotSource:
+    name: TXPhotozPlotSource
+
+TXPhotozPlotLens:
+    name: TXPhotozPlotLens
+
+PZRealizationsPlotSource:
+    name: PZRealizationsPlotSource
+
+
+TXCustomLensSelector:
+    name: TXCustomLensSelector
+    lens_zbin_edges: [0.20, 0.40, 0.55, 0.70, 0.85, 0.95, 1.05]
+    zcol: DNF_Z
+    selection_type: DESmaglim
+    maglim_band: i
+    bright_limit: 17.5
+    a: 4
+    b: 18
+    extra_cols: [EXTENDED_CLASS_SOF, FLAGS_GOLD] #extra columns that are used in the sample selection #TO DO: make this automatic
+    apply_mask: True
+
+
+TXLSSWeightsLinBinned:
+    name: TXLSSWeightsLinBinned
+    supreme_path_root: data/desy3/inputs/patch_survey_property_maps/
+    nbin: 10 
+    outlier_fraction: 0.05
+    pvalue_threshold: 0.05  # max p-value for maps to be included in the corrected (a very simple form of regularization)
+    allow_weighted_input: False
+    nside_coverage: 32
+
+TXLSSDensityNullTests:
+    name: TXLSSDensityNullTests
+    supreme_path_root: data/desy3/inputs/patch_survey_property_maps/
+    nbin: 10 
+    outlier_fraction: 0.05
+    pvalue_threshold: 0.05  # max p-value for maps to be included in the corrected (a very simple form of regularization)
+    equal_area_bins: True  # if you are using binned 1d correlations, should the bins have equal area (set False for equal spacing)
+    simple_cov: True  # if True will use a diagonal shot noise only covariance for the 1d relations
+    diag_blocks_only: True  # If True, will compute only the diagonal blocks of the 1D covariance matrix (no correlation between SP maps)
+    b0: [1.5, 1.8, 1.8, 1.9, 2.3, 2.3]
+    allow_weighted_input: False
+    nside_coverage: 32
+
+TXLensCatalogSplitter:
+    name: TXLensCatalogSplitter
+    redshift_column: DNF_Z
+
+PZPrepareEstimatorSource:
+    name: PZPrepareEstimatorSource
+    classname: DNFInformer
+    hdf5_groupname: photometry
+    redshift_col: redshift
+    bands: ['mag_r', 'mag_i', 'mag_z']
+    err_bands: ['mag_err_r', 'mag_err_i', 'mag_err_z' ] 
+    mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6}
+
+PZEstimatorSource:
+    name: PZEstimatorSource
+    classname: DNFEstimator
+    hdf5_groupname: shear
+    redshift_col: redshift
+    bands: ['mag_r', 'mag_i', 'mag_z']
+    err_bands: ['mag_err_r', 'mag_err_i', 'mag_err_z' ] 
+    mag_limits: {'mag_g': 24.3, 'mag_r': 24.0, 'mag_i': 23.3, 'mag_z': 22.6}
+    selection_mode: 2    #0: ENF, 1: ANF, 2: DNF see https://github.com/LSSTDESC/rail_dnf
+    nondetect_val: .inf
+    chunk_size: 10000 # low to avoid memory issues
+
+TXSourceTomography:
+    bands: ["r", "i", "z"]
+    source_zbin_edges: [0.0, 0.2, 0.4, 0.6, 0.8]
+    seed: 6548756
+    spec_mag_column_format: "photometry/mag_{band}"
+
 
 TXSourceSelectorMetacal:
-    input_pz: True
+    input_pz: False
     true_z: False
-    bands: riz #used for selection
+    bands: ["r", "i", "z"]
     T_cut: 0.5
     s2n_cut: 10.0
     max_rows: 10000000000
     delta_gamma: 0.02
+    use_diagonal_response : False
     source_zbin_edges: [0.0, 0.2, 0.4, 0.6, 0.8]
-    resp_mean_diag : True
-
-TXSourceSelectorLensfit:
-    bands: ri #used for selection
-    T_cut: 0.0
-    s2n_cut: 0.0
-    max_rows: 1000
-    delta_gamma: 0.02
-    source_zbin_edges: [0.1, 0.3, 0.5, 0.7, 0.9, 1.2]
-    nsrc: 5
-    nlens: 0
-    shear_catalog_type: 'lensfit'
-    input_pz: True
-    # may also need one for r_cpar_cut
-    input_m_is_weighted: True
-
-TXShearCalibration: {}
-TXTruthLensCatalogSplitter: {}
-TXStarCatalogSplitter: {} 
-TXPhotozPlots: {}
-
-TXSourceMaps:
-    sparse: True  # Generate sparse maps - faster if using small areas
-
-TXLensMaps:
-    sparse: True  # Generate sparse maps - faster if using small areas
-
-TXAuxiliarySourceMaps:
-    flag_exponent_max: 8
-    nside: 128
-
-TXAuxiliaryLensMaps:
-    flag_exponent_max: 8
-    bright_obj_threshold: 10.0 # The magnitude threshold for a object to be counted as bright
-    depth_band : i
-    snr_threshold: 20.0  # The S/N value to generate maps for (e.g. 5 for 5-sigma depth)
-    snr_delta: 1.0  # The range threshold +/- delta is used for finding objects at the boundary
-
-TXSourceNoiseMaps: {}
-TXDensityMaps: {}
-TXMapPlots: {}
-TXTracerMetadata: {}
-
-TXJackknifeCenters:
-    npatch: 100
-    every_nth: 100
-
-TXTwoPoint:
-    calcs: [0]
-    bin_slop: 0.5
-    delta_gamma: 0.02
-    do_pos_pos: False
-    do_shear_shear: False
-    do_shear_pos: False
-    flip_g2: False  
-    min_sep: 2.5
-    max_sep: 250
-    nbins: 20
-    verbose: 0
-    subtract_mean_shear: True
-    source_bins: [0]
-    lens_bins: [0]
-
-TXBlinding:
-    seed: 1972  ## seed uniquely specifies the shift in parameters
-    Omega_b: [0.0485, 0.001] ## fiducial_model_value, shift_sigma
-    Omega_c: [0.2545, 0.01]
-    w0: [-1.0, 0.1]
-    h: [0.682, 0.02]
-    sigma8: [0.801, 0.01]
-    n_s: [0.971, 0.03]
-    b0: 0.95  ## we use bias of the form b0/g
-    delete_unblinded: True
-    
-TXTwoPointTheoryReal: {}
-TXTwoPointPlots: {}    
-    
-TXSourceDiagnosticPlots:
-    psfT_min: 0.04
-    psfT_max: 0.36
-    T_min: 0.04
-    T_max: 4.0
-    nbins: 20
-    bands: ri
-
-TXLensDiagnosticPlots: {}
-
-TXGammaTFieldCenters: {}
-
-TXGammaTStars: {}
-
-TXGammaTRandoms: {}
-
-TXRoweStatistics: 
-    psf_size_units: 'T'
-
-TXTauStatistics:
-    flip_g2: False
-
-TXFocalPlanePlot: {}
-
-TXGalaxyStarDensity: {}
-
-
-
-
-
 
-TXExposureInfo:
-    dc2_name: '1.2p'
 
-
-
-# Mock version of stacking:
-TXSourceTrueNumberDensity:
-    nz: 301
-    zmax: 3.0
-
-# Mock version of stacking:
-TXTrueNumberDensity:
-    nz: 301
-    zmax: 3.0
-
-
-
-
-TXGammaTBrightStars: {}
-
-TXGammaTDimStars: {}
-
-
-
-
-TXDiagnosticMaps:
-    sparse: True  # Generate sparse maps - faster if using small areas
-    snr_threshold: 10.0
-    snr_delta: 1.0
-    # pixelization: gnomonic
-    pixel_size: 0.2
-    ra_cent: 62.
-    dec_cent: -35.
-    npix_x: 60
-    npix_y: 60
-    depth_cut: 23.0
-
-
-TXRealGaussianCovariance:
-    min_sep: 2.5
-    max_sep: 60.
-    nbins: 10
-    pickled_wigner_transform: data/example/inputs/wigner.pkl
-
-
-
-TXTwoPointFourier:
-    flip_g2: True
-    bandwidth: 100
-
-
-
-TXMapCorrelations:
-    supreme_path_root: data/example/inputs/supreme
-    outlier_fraction: 0.05
-    nbin: 20
+TXRandomCat:
+    depth_band: i
+    density: 10 # gals per sq arcmin
diff --git a/examples/desy3/pipeline.yml b/examples/desy3/pipeline.yml
index cdc7eb949..26c376cd3 100644
--- a/examples/desy3/pipeline.yml
+++ b/examples/desy3/pipeline.yml
@@ -1,78 +1,187 @@
 
 # Stages to run
 stages:
+  # INGESTION
+  - name: TXIngestDESY3Gold
+  - name: TXIngestDESY3Footprint
+  - name: TXIngestDESY3SpeczCat
+  - name: TXIngestDESY3Shear
+  - name: TXCustomMask
 
-    - name: TXSourceSelectorMetacal # select and split objects into source bins
-    - name: TXShearCalibration   # Calibrate and split the source sample tomographically
-    #- name: TXPhotozPlots
-    #- name: TXPhotozSourceStack 
-    #- name: TXTruthLensCatalogSplitter
-    #- name: TXStarCatalogSplitter  # Split the star catalog into separate bins (psf/non-psf)    
-    #- name: TXSourceMaps
-      #nprocess: 6
-      #threads_per_process: 1
-      #nodes: 1
-    #- name: TXLensMaps
-    #- name: TXAuxiliarySourceMaps  # make PSF and flag maps
-    #- name: TXAuxiliaryLensMaps    # make depth and bright object maps
-    #- name: TXSourceNoiseMaps    # noise maps for sources using random rotations
-    # stage below is not working because TXAuxiliaryLensMaps is broken
-    #- name: TXLensNoiseMaps      # noise maps for lenses using density splits
-    #- name: TXDensityMaps        # turn mask and ngal maps into overdensity maps
-    #- name: TXMapPlots           # make pictures of all the maps
-    #- name: TXTracerMetadata     # collate metadata
-    #- name: TXRandomCat
-    #- name: TXJackknifeCenters   # Split the area into jackknife regions
-    #- name: TXTwoPoint           # Compute real-space 2-point correlations
-    #  threads_per_process: 128
-    #- name: TXBlinding           # Blind the data following Muir et al
-        #threads_per_process: 2
-    #- name: TXTwoPointTheoryReal # compute theory using CCL to save in sacc file and plot later
-    #- name: TXTwoPointPlots      # Make plots of 2pt correlations
-    #- name: TXSourceDiagnosticPlots    # Make a suite of diagnostic plots
-      #nprocess: 8 
-      #nodes: 2
-    #- name: TXLensDiagnosticPlots    # Make a suite of diagnostic plots
-    #- name: TXGammaTFieldCenters # Compute and plot gamma_t around center points # ERROR: EXPOSURE FILE
-      ##threads_per_process: 2
-    #- name: TXGammaTStars  # Compute and plot gamma_t around bright stars
-      #threads_per_process: 2
-    #- name: TXGammaTRandoms      # Compute and plot gamma_t around randoms
-      #threads_per_process: 2
-    #- name: TXRoweStatistics     # Compute and plot Rowe statistics
-    #  threads_per_process: 128
-    #- name: TXFocalPlanePlot      # mean PSF ellipticity and mean residual on the focal plane 
-      #- name: TXGalaxyStarDensity  # Compute and plot the star-galaxy density cross-correlation
-      #- name: TXGalaxyStarShear    # Compute and plot the star-galaxy shear cross-correlation
-    #- name: TXPSFDiagnostics     # Compute and plots other PSF diagnostics
-    #- name: TXBrighterFatterPlot # Make plots tracking the brighter-fatter effect
-    #- name: TXConvergenceMaps    # Make convergence kappa maps from g1, g2 maps #ERROR: WLMASSMAP
-    #- name: TXConvergenceMapPlots # Plot the convergence map
-    #- name: TXMapCorrelations    # plot the correlations between systematics and data # ERROR: NEEDS CONVERGENCE MAP
-    #- name: TXApertureMass        # Compute aperture-mass statistics
-      ##threads_per_process: 2
-    # Disabled since not yet synchronised with new Treecorr MPI
-    # - name: TXSelfCalibrationIA   # Self-calibration intrinsic alignments of galaxies
-    #
-    # Disabling these as they takes too long for a quick test.
-    # The latter three need NaMaster
-    
-      ##- name: TXRealGaussianCovariance   # Compute covariance of real-space correlations
-      ##- name: TXTwoPointFourier          # Compute power spectra C_ell
-      ##- name: TXFourierNamasterCovariance # Compute the C_ell covariance
-      ##- name: TXFourierTJPCovariance     # Compute the C_ell covariance with TJPCov 
+  # PER-OBJECT PHOTO-Z
+
+  # Train the DNF model on the spectroscopic sample
+  - name: PZPrepareEstimatorLens 
+    classname: DNFInformer 
+    aliases:
+        input: spectroscopic_catalog
+        model: lens_photoz_model
+
+  # Apply the trained DNF model to the photometric sample to get lens photo-z
+  - name: PZEstimatorLens 
+    classname: DNFEstimator
+    nprocess: 64
+    threads_per_process: 1
+    aliases:
+        model: lens_photoz_model
+        input: photometry_catalog
+        output: lens_photoz_pdfs
+
+  # Train the DNF model on the spectroscopic sample. This uses a restricted set of bands
+  # measured under metacalibration
+  - name: PZPrepareEstimatorSource
+    classname: DNFInformer 
+    aliases:
+        input: spectroscopic_catalog
+        model: source_photoz_model
+
+  # Apply the trained DNF model to the metacalibration photometric sample to get source photo-z
+  - name: PZEstimatorSource
+    nprocess: 64
+    threads_per_process: 1
+    classname: DNFEstimator
+    aliases:
+        model: source_photoz_model
+        input: shear_catalog
+        output: source_photoz_pdfs
+
+  # SELECTION & CALIBRATION
+
+  # DES-Y3 maglim selection
+  - name: TXCustomLensSelector
+    nprocess: 32
+
+  # Split the lens catalog into tomographic bins
+  - name: TXLensCatalogSplitter
+    nprocess: 8
+
+  # Compute correlations between survey properties and lens density
+  - name: TXLSSDensityNullTests
+    threads_per_process: 8
+
+  # Compute weights that mitigate correlations between survey properties and lens density
+  - name: TXLSSWeightsLinBinned
+    threads_per_process: 8
+
+  # Train a tomography model from the spectroscopic sample for the sources
+  - name: TXSourceTomography
+    threads_per_process: 8
+
+  # Compute metacalibration shear response and selection bias corrections
+  - name: TXSourceSelectorMetacal
+    nprocess: 32
+    threads_per_process: 4
+
+  # Generate calibrated tomographic source catalogs
+  - name: TXShearCalibration
+    nproces: 8
+    threads_per_process: 4
+
+  # PHOTO-Z SUMMARIZATION
+
+  # Compute the ensemble n(z) for the lens bins using point estimate histograms
+  - name: PZRailSummarizeLens
+    classname: PZRailPZSummarize
+    nprocess: 32
+    threads_per_process: 4
+    aliases:
+        tomography_catalog: lens_tomography_catalog_unweighted
+        photoz_pdfs: lens_photoz_pdfs
+        model: lens_photoz_model
+        photoz_stack: lens_photoz_stack
+        photoz_realizations: lens_photoz_realizations
+
+  # Compute the ensemble n(z) for the source bins using point estimate histograms
+  - name: PZRailSummarizeSource
+    classname: PZRailPZSummarize
+    nprocess: 32
+    threads_per_process: 4
+    aliases:
+        tomography_catalog: shear_tomography_catalog
+        photoz_pdfs: source_photoz_pdfs
+        model: source_photoz_model
+        photoz_stack: shear_photoz_stack
+        photoz_realizations: source_photoz_realizations
+
+  # Plot the source n(z) distribution
+  - name: TXPhotozPlotSource
+    classname: TXPhotozPlot
+    aliases:
+        photoz_stack: shear_photoz_stack
+        nz_plot: nz_source
+
+  # Plot the lens n(z) distribution
+  - name: TXPhotozPlotLens
+    classname: TXPhotozPlot
+    aliases:
+        photoz_stack: lens_photoz_stack
+        nz_plot: nz_lens
+
+  # Plot realizations of the source n(z) distribution
+  - name: PZRealizationsPlotSource
+    classname: PZRealizationsPlot
+    aliases:
+        photoz_realizations: source_photoz_realizations
+        photoz_realizations_plot: source_photoz_realizations_plot
+
+
+  # TWO-POINT ESTIMATION
+
+  # Generate random catalogs for the lens sample, for use in two-point estimation
+  - name: TXRandomCat
+    nprocess: 32
+    threads_per_process: 4
+
+  # Collate metadata for the samples
+  - name: TXTracerMetadata
+
+  # Choose jackknife centers for two-point estimation speed from randoms
+  - name: TXJackknifeCenters
+
+  # Measure the real-space two-point correlation functions
+  - name: TXTwoPoint
+    threads_per_process: 128
+
+  # Plot the correlations functions
+  - name: TXTwoPointPlots
+
+  # Pretend to blind the two-point functions
+  - name: TXNullBlinding
+  # - name: TXRealGaussianCovariance
+
+  # MAPPING
+  
+  # Make PSF and similar maps for the sources
+  - name: TXAuxiliarySourceMaps
+
+  # Make maps of the lens counts
+  - name: TXLensMaps
+
+  # Make maps of the lens over-density
+  - name: TXDensityMaps
+
+  # Make maps of the shear field
+  - name: TXSourceMaps
+
+  # Plot all the maps
+  - name: TXMapPlots  
 
-#===========
 
 launcher:
     name: mini
     interval: 1.0
     
 site:
-    name: local 
+    name: nersc-interactive
     max_threads: 128
 
-modules: txpipe
+modules: >
+    txpipe
+    rail.estimation.algos.dnf
+    rail.estimation.algos.naive_stack
+    rail.estimation.algos.point_est_hist
+    rail.estimation.algos.uniform_binning 
+
 
 # where to find any modules that are not in this repo,
 # and any other code we need.
@@ -88,22 +197,14 @@ config: examples/desy3/config.yml
 
 # These are overall inputs to the whole pipeline, not generated within it
 inputs:
-    shear_catalog: //global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/shear_catalog_desy3_unmasked_withfakez_v2.h5
-    photometry_catalog: 
-    #shear_tomography_catalog: /global/cfs/cdirs/desc-wl/projects/txpipe-sys-tests/des-y3/shear_tomography_desy3_unmasked_test.h5
-    lens_tomography_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/lens_tomography_catalog_desy1_RM.h5
-    lens_photoz_stack: 
-    mask: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/mask_desy1.h5
-    star_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y3/DES_psf_y3_catalog.h5
-    calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat
+    des_photometry_catalog: data/desy3/inputs/DESY3_GOLD_2_2.1.h5
+    des_shear_catalog: data/desy3/inputs/DESY3_metacal_v03-004.h5
+    des_specz_catalog: data/desy3/inputs/y6gold22_trainnov20_matchedtoy3.fits
     fiducial_cosmology: data/fiducial_cosmology.yml
-    random_cats_source: Null
-    shear_tomography_classifier: none
-    flow: data/example/inputs/example_flow.pkl
 
 # if supported by the launcher, restart the pipeline where it left off
 # if interrupted
-resume: False
+resume: resume
 
 # where to put output logs for individual stages
 log_dir: data/desy3/logs
diff --git a/examples/lssweights/des-y1/config_binned.yml b/examples/lssweights/des-y1/config_binned.yml
index f9724c0d2..dd88cd3d8 100755
--- a/examples/lssweights/des-y1/config_binned.yml
+++ b/examples/lssweights/des-y1/config_binned.yml
@@ -11,7 +11,7 @@ global:
     sparse: True  # Generate sparse maps - faster if using small areas
 
 TXLSSDensityNullTests:
-    supreme_path_root: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/spmaps/
+    supreme_path_root: data/desy1//spmaps/
     outlier_fraction: 0.01  # Will remove this fraction of the maps is most extreme regions when computing 1d trends
     nbin: 10                # number of bins per SP map to compute 1d trends
     simple_cov: True        # If True will use diagonal shot noise covariance
@@ -19,7 +19,7 @@ TXLSSDensityNullTests:
     b0: [1.4, 1.6, 1.6, 1.93, 1.98]
 
 TXLSSWeightsLinBinned:
-    supreme_path_root: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/spmaps/
+    supreme_path_root: data/desy1//spmaps/
     outlier_fraction: 0.01  # Will remove this fraction of the maps is most extreme regions when computing 1d trends
     nbin: 10                # number of bins per SP map to compute 1d trends
     pvalue_threshold: 0.05  # max p-value for maps to be corrected (0 is no maps, 1 is all maps)
@@ -30,7 +30,7 @@ TXTwoPoint:
     do_pos_pos: True
     do_shear_shear: False
     do_shear_pos: False
-    flip_g2: True  # use true when using metacal shears
+    flip_g2: True  # use true when using metacal sh ears
     min_sep: 2.5
     max_sep: 250.0
     nbins: 20
diff --git a/examples/lssweights/des-y1/pipeline_binned.yml b/examples/lssweights/des-y1/pipeline_binned.yml
index 9dc5a052c..d6c962c8a 100755
--- a/examples/lssweights/des-y1/pipeline_binned.yml
+++ b/examples/lssweights/des-y1/pipeline_binned.yml
@@ -23,7 +23,7 @@ stages:
 modules: txpipe 
 
 # Where to put outputs
-output_dir: examples/lssweights/des-y1/output_binned/
+output_dir: data/desy1/output
 
 # How to run the pipeline: mini, parsl, or cwl
 launcher:
@@ -32,20 +32,20 @@ launcher:
 
 # Where to run the pipeline: cori-interactive, cori-batch, or local
 site:
-    name: nersc-interactive
+    name: local
 
 # configuration settings
-config: examples/lssweights/des-y1//config_binned.yml
+config: examples/lssweights/des-y1/config_binned.yml
 
 inputs:
     # See README for paths to download these files
-    photometry_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/photometry_catalog_desy1_RM.h5
-    lens_tomography_catalog_unweighted: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/lens_tomography_catalog_desy1_RM_unweighted.h5
-    random_cats: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/randoms_desy1_RM.hdf5
-    binned_random_catalog: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/binned_randoms_desy1_RM.hdf5
-    binned_random_catalog_sub: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/binned_randoms_desy1_RM.hdf5
-    mask: /global/cfs/cdirs/lsst/groups/WL/users/jelvinpo/txpipe-weights/des-y1/mask_desy1_4096.h5
-    lens_photoz_stack: /global/cfs/cdirs/lsst/groups/WL/projects/txpipe-sys-tests/des-y1/lens_photoz_stack.hdf5
+    photometry_catalog: data/desy1/photometry_catalog_desy1_RM.h5
+    lens_tomography_catalog_unweighted: data/desy1/lens_tomography_catalog_desy1_RM_unweighted.h5
+    random_cats: data/desy1/randoms_desy1_RM.hdf5
+    binned_random_catalog: data/desy1/binned_randoms_desy1_RM.hdf5
+    binned_random_catalog_sub: data/desy1/binned_randoms_desy1_RM.hdf5
+    mask: data/desy1/mask_desy1_4096.h5
+    lens_photoz_stack: data/desy1/lens_photoz_stack.hdf5
     shear_catalog: none
     shear_tomography_catalog: none
     binned_shear_catalog: none
@@ -62,6 +62,6 @@ inputs:
 # if interrupted
 resume: False
 # where to put output logs for individual stages
-log_dir: examples/lssweights/des-y1/output_binned/logs/
+log_dir: lssweights_logs
 # where to put an overall parsl pipeline log
-pipeline_log: examples/lssweights/des-y1/output_binned/log.txt
+pipeline_log: ./log.txt
diff --git a/txpipe/base_stage.py b/txpipe/base_stage.py
index 8cb7dccbc..fb7f20db7 100755
--- a/txpipe/base_stage.py
+++ b/txpipe/base_stage.py
@@ -1,4 +1,4 @@
-from ceci import PipelineStage as PipelineStageBase
+from ceci import PipelineStage as PipelineStageBase, file_types
 from .data_types import HDFFile
 from textwrap import dedent
 from .utils.provenance import find_module_versions, git_diff, git_current_revision
@@ -183,13 +183,17 @@ def open_output(self, tag, wrapper=False, **kwargs):
         new_tag = self.get_aliased_tag(tag)
         path = self.get_output(new_tag)
 
-        # HDF files can be opened for parallel writing
+        # HDF files and directory outputs can be opened for parallel writing
         # under MPI.  This checks if:
         # - we have been told to open in parallel
         # - we are actually running under MPI
         # and adds the flags required if all these are true
         run_parallel = kwargs.pop("parallel", False) and self.is_mpi()
-        if run_parallel:
+        if run_parallel and issubclass(output_class, file_types.Directory):
+            kwargs["parallel"] = True
+            kwargs["comm"] = self.comm
+
+        elif run_parallel:
             if not output_class.supports_parallel_write:
                 raise ValueError(
                     f"Tried to open file for parallel output, but not"
diff --git a/txpipe/ingest/base.py b/txpipe/ingest/base.py
index 5dbc19519..4a6f4d2a1 100644
--- a/txpipe/ingest/base.py
+++ b/txpipe/ingest/base.py
@@ -282,7 +282,7 @@ def process_maps(self, input_filepaths, input_labels, output_name):
         metadata = {
             "pixelization": "healpix",
             "nside": self.config["input_nside"],
-            "nest": False,  # currently TXPipe defaults to ring format
+            "nest": True,  # currently TXPipe defaults to ring format
         }
         print(f"Input nside {self.config['input_nside']}")
 
diff --git a/txpipe/ingest/stage3/desy3.py b/txpipe/ingest/stage3/desy3.py
index 4f58c22ea..f76d9455d 100644
--- a/txpipe/ingest/stage3/desy3.py
+++ b/txpipe/ingest/stage3/desy3.py
@@ -1,6 +1,20 @@
-from ...data_types import HDFFile, MapsFile, FitsFile
-from ..base import TXIngestCatalogH5, TXIngestMapsHsp, TXIngestCatalogFits
+from ...data_types import HDFFile, MapsFile, FitsFile, ShearCatalog
+from ..base import TXIngestCatalogH5, TXIngestMapsHsp, TXIngestCatalogFits, PipelineStage
 from ceci.config import StageParameter
+import numpy as np
+
+def to_native_endian(arr: np.ndarray) -> np.ndarray:
+    """
+    Convert a NumPy array to the machine's native byte order.
+
+    Various downstream stages use NUMBA, which doesn't like
+    non-native endianness, and some of the DES Y3 data files seem
+    to be big-endian.
+    """
+    native_dtype = arr.dtype.newbyteorder('=')
+    # copy=False avoids copying if already correct
+    return arr.astype(native_dtype, copy=False)
+
 
 
 class TXIngestDESY3Gold(TXIngestCatalogH5):
@@ -146,3 +160,111 @@ def run(self):
             column_names,
             dummy_columns,
         )
+
+
+class TXIngestDESY3Shear(PipelineStage):
+    name = "TXIngestDESY3Shear"
+    parallel = False
+    inputs = [
+        ("des_shear_catalog", HDFFile),
+    ]
+
+    outputs = [
+        ("shear_catalog", ShearCatalog),
+    ]
+
+    config_options = {
+        "input_group_name": StageParameter(str, "catalog/", msg="Input group name in the HDF5 file."),
+        "chunk_rows": StageParameter(int, 100_000, msg="Number of rows to process in each chunk."),
+    }
+
+    def des_metacal_flux_to_mag(self, flux, flux_err):
+        # Seems to use a zero-point of 30 based on comparison
+        # to the gold catalog.
+        mag = -2.5 * np.log10(flux) + 30
+        mag_err = 2.5 / np.log(10) * (flux_err / flux)
+        return mag, mag_err
+
+
+    def run(self):
+        """
+        Run the analysis for this stage.
+        """
+        print("Ingesting DES Y3 Metacal catalog from ", self.get_input("des_shear_catalog"))
+
+
+        subgroups_to_suffixes = {
+            "unsheared": "",
+            "sheared_1p": "_1p",
+            "sheared_1m": "_1m",
+            "sheared_2p": "_2p",
+            "sheared_2m": "_2m",
+        }
+
+        single_columns = {
+            "unsheared/coadd_object_id": "id",
+            "unsheared/ra": "ra",
+            "unsheared/dec": "dec",
+            "unsheared/psf_T": "psf_T_mean",
+            "unsheared/psf_e1": "psf_e1",
+            "unsheared/psf_e2": "psf_e2",
+            "unsheared/flags": "flags",
+        }
+
+        sheared_columns = {
+            "T": "T",
+            "T_err": "T_err",
+            "e_1": "g1",
+            "e_2": "g2",
+            "snr": "s2n",  # we originally called the TXPipe version "s2n" instead of "snr" 
+                           # precisely to be more consistent with metacal, but now apparently
+                           # metacal uses "snr" again. After this PR we should update to use "snr" everywhere.
+            "weight": "weight",
+        }
+
+        fluxes = ["r", "i", "z"]
+
+        # The input and output sections in the files
+        fin = self.open_input("des_shear_catalog")
+        fout = self.open_output("shear_catalog")
+        gin = fin.file["catalog/"]
+        gout = fout.file.create_group("shear")
+        gout.attrs['catalog_type'] = 'metacal'
+
+        compression = None
+
+        # save all the columns that are only measured once, not on all the
+        # sheared versions. In metacal this includes positions and PSF properties.
+        for input_col, output_col in single_columns.items():
+            d = to_native_endian(gin[input_col][:])
+            print(f"Copying {input_col} -> {output_col}")
+            gout.create_dataset(output_col, data=d, chunks=True, compression=compression)
+        
+        # Now save the stuff that is different for each sheared version
+        for variant, suffix in subgroups_to_suffixes.items():
+            gin_sub = gin[variant]
+
+            # First handle the ones that are just renamed, including the shears
+            # themselves and the SNR and weight etc.
+            for input_col, output_col in sheared_columns.items():
+                print(f"Copying {variant}/{input_col} -> {output_col}{suffix}")
+                d = to_native_endian(gin_sub[input_col][:])
+                gout.create_dataset(output_col + suffix, data=d, chunks=True, compression=compression)
+            
+            # Now deal with fluxes, which we have to turn into magnitudes.
+            # Note that we might change our mind on this and at some point
+            # just start storing fluxes.
+            for band in fluxes:
+                # convert fluxes to mags
+                flux = to_native_endian(gin_sub[f"flux_{band}"][:])
+                flux_err = to_native_endian(gin_sub[f"flux_err_{band}"][:])
+
+                flux[flux < 0] = 0
+
+                mag, mag_err = self.des_metacal_flux_to_mag(flux, flux_err)
+                print(f"Copying fluxes and errs {variant}/flux_{band} -> mags mag_{band}{suffix}")
+
+                # save mags and mag_errs
+                gout.create_dataset(f"mag_{band}{suffix}", data=mag, chunks=True, compression=compression)
+                gout.create_dataset(f"mag_err_{band}{suffix}", data=mag_err, chunks=True, compression=compression)
+
diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py
index 0c264ec92..4b2ded81d 100755
--- a/txpipe/lens_selector.py
+++ b/txpipe/lens_selector.py
@@ -88,7 +88,11 @@ def run(self):
         # If we are going to remove lens galaxies that sit outside the mask, load the mask
         if self.config["apply_mask"]:
             with self.open_input("mask", wrapper=True) as f:
-                self.mask, self.mask_nside = f.read_healsparse("mask", return_all=True)
+                info = f.read_map_info("mask")
+                print(info)
+                self.mask_nside = info['nside']
+                self.mask_nest = info['nest']
+                self.mask = f.read_mask_healpix("mask")
 
         # We will collect the selection biases for each bin
         # as a matrix.  We will collect together the different
@@ -334,7 +338,7 @@ def select_in_footprint(self, phot_data):
             # with self.open_input('mask', wrapper=True) as f:
             # mask, nside= f.read_healsparse('mask', return_all=True)
 
-            pix = hp.ang2pix(self.mask_nside, phot_data["ra"], phot_data["dec"], lonlat=True)
+            pix = hp.ang2pix(self.mask_nside, phot_data["ra"], phot_data["dec"], lonlat=True, nest=self.mask_nest)
             s = np.where(self.mask[pix] == hp.UNSEEN, 0, 1)
             print(f"{len(s) - np.sum(s)}/{len(s)} objects removed because they are outside the mask")
             return s
diff --git a/txpipe/lsstools/lsstools.py b/txpipe/lsstools/lsstools.py
index 0c5af02f4..43b736287 100644
--- a/txpipe/lsstools/lsstools.py
+++ b/txpipe/lsstools/lsstools.py
@@ -552,6 +552,7 @@ def linear_model(
     sysmap_table=None,
     map_index=None,
     frac=None,
+    do_grid_hist=True,
 ):
     """
     linear contamination model:
@@ -605,6 +606,7 @@ def linear_model(
             map_input=True,
             use_precompute=True,
             frac=frac_vals,
+            do_grid_hist=do_grid_hist,
         )
 
     return F, F_density_corrs
diff --git a/txpipe/lssweights.py b/txpipe/lssweights.py
index a2bfa6d55..cf0468abb 100644
--- a/txpipe/lssweights.py
+++ b/txpipe/lssweights.py
@@ -156,6 +156,11 @@ def load_and_mask_sysmaps(self):
         nsys = len(sys_files)
         print(f"Found {nsys} total systematic maps")
 
+        if nsys == 0:
+            raise ValueError(f"No systematic maps found in {root} with extension .hs. "
+                             "Please check your config supreme_path_root setting and "
+                             "make sure it points to the right location")
+
         with self.open_input("mask", wrapper=True) as map_file:
             # load *boolean* mask
             mask = map_file.read_mask(
@@ -344,7 +349,7 @@ class TXLSSDensityNullTests(TXLSSDensityBase):
     """
 
     name = "TXLSSDensityNullTests"
-    parallel = False
+    parallel = True
     inputs = [
         (
             "binned_lens_catalog_unweighted",
@@ -405,15 +410,16 @@ def run(self):
             self.Ntomo = f["lens"].attrs["nbin_lens"]
 
         # output directory for the plots and summary stats
-        output_dir = self.open_output("lss_density_plots", wrapper=True)
+        # open in parallel so that each process can write to it as needed.
+        output_dir = self.open_output("lss_density_plots", wrapper=True, parallel=True)
 
-        # open outdir density correlation file
-        dens_output = self.open_output("unweighted_density_correlation", wrapper=False)
 
         # load the SP maps, apply the mask, normalize the maps (as needed by the method)
         self.sys_maps, self.sys_names, self.sys_meta = self.prepare_sys_maps()
 
-        for ibin in range(self.Ntomo):
+        results = []
+        for ibin in self.split_tasks_by_rank(range(self.Ntomo)):
+            print("Computing density correlations for lens bin {0}/{1}".format(ibin + 1, self.Ntomo))
             # compute density vs SP map data vector
             density_corrs = self.calculate_1d_density_correlations(ibin)
 
@@ -423,10 +429,20 @@ def run(self):
             # Add the null model n_dens/<n_dens> = 1 to the
             # DensityCorrelation object and compute its chi2
             density_corrs.add_null_model()
-
-            # make summary stats and plots
-            self.summarize_density(output_dir, dens_output, density_corrs)
-        dens_output.close()
+            results.append(density_corrs)
+
+        # gather all results on root process
+        if self.comm is not None:
+            results = self.comm.gather(results, root=0)
+
+        if self.rank == 0:
+            # flatten list of lists
+            results = [result for sublist in results for result in sublist]
+            # open outdir density correlation file.
+            # only the root process does any writing.
+            with self.open_output("unweighted_density_correlation", wrapper=False) as dens_output:
+                for density_corrs in results:
+                    self.summarize_density(output_dir, dens_output, density_corrs)
 
     def summarize_density(self, output_dir, dens_output, density_correlation):
         """
diff --git a/txpipe/masks.py b/txpipe/masks.py
index 10e53d72e..3d2077722 100644
--- a/txpipe/masks.py
+++ b/txpipe/masks.py
@@ -367,10 +367,10 @@ def make_binary_mask(self):
         import operator
 
         OP_MAP = {
-            ">": operator.gt,
             ">=": operator.ge,
-            "<": operator.lt,
             "<=": operator.le,
+            ">": operator.gt,
+            "<": operator.lt,
             "==": operator.eq,
             "!=": operator.ne,
         }
diff --git a/txpipe/metadata.py b/txpipe/metadata.py
index 3aff38b8a..c0f648d5b 100644
--- a/txpipe/metadata.py
+++ b/txpipe/metadata.py
@@ -60,10 +60,8 @@ def copy_source_metadata(self, meta_file, metadata, area, area_sq_arcmin):
             if shear_catalog_type == "metacal":
                 copy(shear_tomo_file, "response", "tracers", "R_gamma_mean", meta_file, metadata)
                 copy(shear_tomo_file, "response", "tracers", "R_S", meta_file, metadata)
-                copy(shear_tomo_file, "response", "tracers", "R_total", meta_file, metadata)
                 copy(shear_tomo_file, "response", "tracers", "R_gamma_mean_2d", meta_file, metadata)
                 copy(shear_tomo_file, "response", "tracers", "R_S_2d", meta_file, metadata)
-                copy(shear_tomo_file, "response", "tracers", "R_total_2d", meta_file, metadata)
             elif shear_catalog_type == "metadetect":
                 copy(shear_tomo_file, "response", "tracers", "R", meta_file, metadata)
                 copy(shear_tomo_file, "response", "tracers", "R_2d", meta_file, metadata)
diff --git a/txpipe/rail/summarize.py b/txpipe/rail/summarize.py
index f3e41d258..803d52b54 100644
--- a/txpipe/rail/summarize.py
+++ b/txpipe/rail/summarize.py
@@ -35,6 +35,7 @@ def run(self):
         # But there is also often a special bin_all
         # that is non-tomographic
         bins = self.get_bin_list()
+        print("Summarizing the following bins: ", bins)
 
         # These parameters are common to all the bins runs
         if "binned_catalog" in self.input_tags():
@@ -210,15 +211,19 @@ def get_bin_list(self):
             bins = [
                 f"bin_{bin_index}" for bin_index in unique_bins[unique_bins != -1]
             ]  # do not include the "unselected" bin, -1
+        bins.append("bin_all")  # add the non-tomographic bin at the end
         return bins
 
     def get_extra_sub_config(self, bin):
         """
         Additional config options needed by the PZSummarizers
         """
-        with self.open_input("tomography_catalog", wrapper=True) as f:
-            tomo_path = f.path
-        bin_index = int(bin.split("_")[-1])
+        from rail.core.common_params import TOMOGRAPHY_ALL
+        tomo_path = self.get_input("tomography_catalog")
+        if bin == "bin_all":
+            bin_index = TOMOGRAPHY_ALL
+        else:
+            bin_index = int(bin.split("_")[-1])
 
         extra_sub_config = {
             "selected_bin": bin_index,
diff --git a/txpipe/random_cats.py b/txpipe/random_cats.py
index 4e8d21fe8..360b9efd8 100644
--- a/txpipe/random_cats.py
+++ b/txpipe/random_cats.py
@@ -44,6 +44,7 @@ class TXRandomCat(PipelineStage):
         "sample_rate": StageParameter(
             float, 0.5, msg="Fraction of random catalog to be retained in the sub-sampled catalog."
         ),
+        "depth_band": StageParameter(str, "", msg="Band to use for depth map when calculating density. If not set just use 'depth'"),
     }
 
     def run(self):
@@ -56,11 +57,15 @@ def run(self):
         import healpix
 
         # Load the input depth map
+        depth_band = self.config["depth_band"]
+        if depth_band:
+            depth_band = "_" + depth_band
         with self.open_input("aux_lens_maps", wrapper=True) as maps_file:
-            depth = maps_file.read_map("depth/depth")
-            info = maps_file.read_map_info("depth/depth")
+            depth = maps_file.read_map(f"depth/depth{depth_band}")
+            info = maps_file.read_map_info(f"depth/depth{depth_band}")
             nside = info["nside"]
             scheme = choose_pixelization(**info)
+        print(scheme.nest, scheme.nside)
 
         # Load the input mask
         with self.open_input("mask", wrapper=True) as maps_file:
@@ -70,6 +75,7 @@ def run(self):
             # This is a QP object
             n_of_z_object = f.read_ensemble()
             Ntomo = n_of_z_object.npdf - 1  # ensemble includes the non-tomo 2D n(z)
+        print("Generating randoms for {0} tomographic bins".format(Ntomo))
 
         # We also generate comoving distances under a fiducial cosmology
         # for each random, for use in Rlens type metrics
@@ -81,6 +87,10 @@ def run(self):
         depth = depth[pixel]
         frac = mask[pixel]
         npix = depth.size
+        print("npix = ", npix)
+        print("frac = ", frac)
+        print("depth = ", depth)
+        print("pixel = ", pixel)
 
         if len(pixel) == 1:
             raise ValueError("Only one pixel in depth map!")
@@ -263,7 +273,8 @@ def run(self):
                 pix_catalog = np.repeat(pixel[start_vertex:end_vertex], numbers[j, :][start_vertex:end_vertex])
 
                 # generate a random location within the pixel using healpix
-                ra, dec = healpix.randang(nside, pix_catalog, lonlat=True)
+                ra, dec = healpix.randang(nside, pix_catalog, lonlat=True, nest=scheme.nest)
+                print(ra.min(), ra.max(), dec.min(), dec.max())
 
                 N = len(pix_catalog)
                 bin_index = np.repeat(j, N)
diff --git a/txpipe/source_selection/base.py b/txpipe/source_selection/base.py
index 86136fa86..7e82eb949 100644
--- a/txpipe/source_selection/base.py
+++ b/txpipe/source_selection/base.py
@@ -289,6 +289,8 @@ def write_global_values(self, outfile, calculators):
         calculators: list of Calculator objects, one for each tomographic bin and one for the 2D sample,
                      that have been fed all the data and are ready to have their results collected and written out.
         """
+        # hard link to bin column. needed for RAIL masked summarizer stages
+        outfile["tomography/class_id"] = outfile["tomography/bin"]
 
         for i, calculator in enumerate(calculators):
             stats = calculator.collect(self.comm, allgather=True)
diff --git a/txpipe/source_selection/metacal.py b/txpipe/source_selection/metacal.py
index 99130ff9c..ca008af52 100644
--- a/txpipe/source_selection/metacal.py
+++ b/txpipe/source_selection/metacal.py
@@ -40,8 +40,19 @@ def data_iterator(self):
         We call to a parent class method to do the main iteration; the work here is
         just choosing which columns to read.
         """
+        with self.open_input("shear_catalog") as f:
+            if "flags_1p" in f.file["shear"]:
+                flag_mode = "variant"
+            else:
+                flag_mode = "single"
+        if self.rank == 0:
+            print(f"Using {flag_mode} flags")
         bands = self.config["bands"]
-        shear_cols = metacal_variants("T", "s2n", "g1", "g2", "flags", "weight")
+        shear_cols = metacal_variants("T", "s2n", "g1", "g2", "weight")
+        if flag_mode == "variant":
+            shear_cols += metacal_variants("flags")
+        else:
+            shear_cols += ["flags"]
         shear_cols += ["ra", "dec", "psf_T_mean"]
         shear_cols += band_variants(bands, "mag", "mag_err", shear_catalog_type="metacal")
 
@@ -51,7 +62,13 @@ def data_iterator(self):
             shear_cols += ["redshift_true"]
 
         chunk_rows = self.config["chunk_rows"]
-        return self.iterate_hdf("shear_catalog", "shear", shear_cols, chunk_rows)
+        for s, e, data in  self.iterate_hdf("shear_catalog", "shear", shear_cols, chunk_rows):
+            if flag_mode == "single":
+                data["flags_1p"] = data["flags"]
+                data["flags_1m"] = data["flags"]
+                data["flags_2p"] = data["flags"]
+                data["flags_2m"] = data["flags"]
+            yield s, e, data
 
     def setup_output(self):
         """
@@ -61,7 +78,6 @@ def setup_output(self):
             R_gamma: the per-object estimator response
             R_S: the per-bin selection response
             R_gamma_mean: the mean per-bin estimator response
-            R_total: the complete per-bin response
 
         and the 2D versions of the per-bin values.
         """
@@ -75,10 +91,8 @@ def setup_output(self):
         group.create_dataset("R_gamma", (n, 2, 2), dtype="f")
         group.create_dataset("R_S", (nbin_source, 2, 2), dtype="f")
         group.create_dataset("R_gamma_mean", (nbin_source, 2, 2), dtype="f")
-        group.create_dataset("R_total", (nbin_source, 2, 2), dtype="f")
         group.create_dataset("R_S_2d", (2, 2), dtype="f")
         group.create_dataset("R_gamma_mean_2d", (2, 2), dtype="f")
-        group.create_dataset("R_total_2d", (2, 2), dtype="f")
         return outfile
 
     def setup_response_calculators(self, nbin_source):
diff --git a/txpipe/utils/pixel_schemes.py b/txpipe/utils/pixel_schemes.py
index 0a31586a7..b289fcffd 100755
--- a/txpipe/utils/pixel_schemes.py
+++ b/txpipe/utils/pixel_schemes.py
@@ -159,7 +159,7 @@ def read_map(HealpixScheme, fname_map, i_map=0):
         return p, maps
 
     def vertices(self, pix):
-        return self.healpy.boundaries(self.nside, pix)
+        return self.healpy.boundaries(self.nside, pix, nest=self.nest)
 
 
 class GnomonicPixelScheme: