PolicyEngine · baogorek · Feb 17, 2026 · Feb 17, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/.github/workflows/local_area_publish.yaml b/.github/workflows/local_area_publish.yaml
@@ -4,7 +4,7 @@ on:
   push:
     branches: [main]
     paths:
-      - 'policyengine_us_data/datasets/cps/local_area_calibration/**'
+      - 'policyengine_us_data/calibration/**'
       - '.github/workflows/local_area_publish.yaml'
       - 'modal_app/**'
   repository_dispatch:
@@ -23,7 +23,7 @@ on:
         type: boolean
 
 # Trigger strategy:
-# 1. Automatic: Code changes to local_area_calibration/ pushed to main
+# 1. Automatic: Code changes to calibration/ pushed to main
 # 2. repository_dispatch: Calibration workflow triggers after uploading new weights
 # 3. workflow_dispatch: Manual trigger with optional parameters
 
@@ -55,7 +55,7 @@ jobs:
           SKIP_UPLOAD="${{ github.event.inputs.skip_upload || 'false' }}"
           BRANCH="${{ github.head_ref || github.ref_name }}"
 
-          CMD="modal run modal_app/local_area.py --branch=${BRANCH} --num-workers=${NUM_WORKERS}"
+          CMD="modal run modal_app/local_area.py::main --branch=${BRANCH} --num-workers=${NUM_WORKERS}"
 
           if [ "$SKIP_UPLOAD" = "true" ]; then
             CMD="${CMD} --skip-upload"
@@ -71,5 +71,60 @@ jobs:
           echo "" >> $GITHUB_STEP_SUMMARY
           echo "Files have been uploaded to GCS and staged on HuggingFace." >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
-          echo "### Next step: Promote to production" >> $GITHUB_STEP_SUMMARY
-          echo "Trigger the **Promote Local Area H5 Files** workflow with the version from the build output." >> $GITHUB_STEP_SUMMARY
+          echo "### Next step: Validation runs automatically" >> $GITHUB_STEP_SUMMARY
+          echo "The validate-staging job will now check all staged H5s." >> $GITHUB_STEP_SUMMARY
+
+  validate-staging:
+    needs: publish-local-area
+    runs-on: ubuntu-latest
+    env:
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Install dependencies
+        run: uv sync
+
+      - name: Validate staged H5s
+        run: |
+          uv run python -m policyengine_us_data.calibration.validate_staging \
+            --area-type states --output validation_results.csv
+
+      - name: Upload validation results to HF
+        run: |
+          uv run python -c "
+          from policyengine_us_data.utils.huggingface import upload
+          upload('validation_results.csv',
+                 'policyengine/policyengine-us-data',
+                 'calibration/logs/validation_results.csv')
+          "
+
+      - name: Post validation summary
+        if: always()
+        run: |
+          echo "## Validation Results" >> $GITHUB_STEP_SUMMARY
+          if [ -f validation_results.csv ]; then
+            TOTAL=$(tail -n +2 validation_results.csv | wc -l)
+            FAILS=$(grep -c ',FAIL,' validation_results.csv || true)
+            echo "- **${TOTAL}** targets validated" >> $GITHUB_STEP_SUMMARY
+            echo "- **${FAILS}** sanity failures" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "Review in dashboard, then trigger **Promote** workflow." >> $GITHUB_STEP_SUMMARY
+          else
+            echo "Validation did not produce output." >> $GITHUB_STEP_SUMMARY
+          fi
+
+      - name: Upload validation artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: validation-results
+          path: validation_results.csv
diff --git a/.gitignore b/.gitignore
@@ -30,12 +30,12 @@ docs/.ipynb_checkpoints/
 ## ACA PTC state-level uprating factors
 !policyengine_us_data/storage/aca_ptc_multipliers_2022_2024.csv
 
-## Raw input cache for database pipeline
-policyengine_us_data/storage/calibration/raw_inputs/
+## Calibration run outputs (weights, diagnostics, packages, config)
+policyengine_us_data/storage/calibration/
 
 ## Batch processing checkpoints
 completed_*.txt
 
 ## Test fixtures
-!policyengine_us_data/tests/test_local_area_calibration/test_fixture_50hh.h5
+!policyengine_us_data/tests/test_calibration/test_fixture_50hh.h5
 oregon_ctc_analysis.py
diff --git a/Makefile b/Makefile
@@ -1,4 +1,12 @@
-.PHONY: all format test install download upload docker documentation data validate-data calibrate publish-local-area clean build paper clean-paper presentations database database-refresh promote-database promote-dataset
+.PHONY: all format test install download upload docker documentation data validate-data calibrate calibrate-build publish-local-area upload-calibration upload-dataset upload-database build-matrices calibrate-modal calibrate-modal-national calibrate-both stage-h5s stage-national-h5 stage-all-h5s pipeline validate-staging validate-staging-full upload-validation check-staging check-sanity clean build paper clean-paper presentations database database-refresh promote-database promote-dataset promote
+
+GPU ?= A100-80GB
+EPOCHS ?= 200
+NATIONAL_GPU ?= T4
+NATIONAL_EPOCHS ?= 200
+BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD)
+NUM_WORKERS ?= 8
+VERSION ?=
 
 HF_CLONE_DIR ?= $(HOME)/huggingface/policyengine-us-data
 
@@ -79,8 +87,8 @@ promote-database:
 	@echo "Copied DB and raw_inputs to HF clone. Now cd to HF repo, commit, and push."
 
 promote-dataset:
-	cp policyengine_us_data/storage/stratified_extended_cps_2024.h5 \
-		$(HF_CLONE_DIR)/calibration/stratified_extended_cps.h5
+	cp policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5 \
+		$(HF_CLONE_DIR)/calibration/source_imputed_stratified_extended_cps.h5
 	@echo "Copied dataset to HF clone. Now cd to HF repo, commit, and push."
 
 data: download
@@ -90,20 +98,111 @@ data: download
 	python policyengine_us_data/datasets/puf/irs_puf.py
 	python policyengine_us_data/datasets/puf/puf.py
 	python policyengine_us_data/datasets/cps/extended_cps.py
+	python policyengine_us_data/calibration/create_stratified_cps.py
+	python policyengine_us_data/calibration/create_source_imputed_cps.py
+
+data-legacy: data
 	python policyengine_us_data/datasets/cps/enhanced_cps.py
 	python policyengine_us_data/datasets/cps/small_enhanced_cps.py
-	python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py
 
 calibrate: data
 	python -m policyengine_us_data.calibration.unified_calibration \
-		--puf-dataset policyengine_us_data/storage/puf_2024.h5
+		--target-config policyengine_us_data/calibration/target_config.yaml
+
+calibrate-build: data
+	python -m policyengine_us_data.calibration.unified_calibration \
+		--target-config policyengine_us_data/calibration/target_config.yaml \
+		--build-only
+
+validate-package:
+	python -m policyengine_us_data.calibration.validate_package
 
 publish-local-area:
-	python policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py
+	python policyengine_us_data/calibration/publish_local_area.py
 
 validate-data:
 	python -c "from policyengine_us_data.storage.upload_completed_datasets import validate_all_datasets; validate_all_datasets()"
 
+upload-calibration:
+	python -c "from policyengine_us_data.utils.huggingface import upload_calibration_artifacts; \
+		upload_calibration_artifacts()"
+
+upload-dataset:
+	python -c "from policyengine_us_data.utils.huggingface import upload; \
+		upload('policyengine_us_data/storage/source_imputed_stratified_extended_cps_2024.h5', \
+		'policyengine/policyengine-us-data', \
+		'calibration/source_imputed_stratified_extended_cps.h5')"
+	@echo "Dataset uploaded to HF."
+
+upload-database:
+	python -c "from policyengine_us_data.utils.huggingface import upload; \
+		upload('policyengine_us_data/storage/calibration/policy_data.db', \
+		'policyengine/policyengine-us-data', \
+		'calibration/policy_data.db')"
+	@echo "Database uploaded to HF."
+
+build-matrices:
+	modal run modal_app/remote_calibration_runner.py::build_package \
+		--branch $(BRANCH)
+
+calibrate-modal:
+	modal run modal_app/remote_calibration_runner.py::main \
+		--branch $(BRANCH) --gpu $(GPU) --epochs $(EPOCHS) \
+		--push-results
+
+calibrate-modal-national:
+	modal run modal_app/remote_calibration_runner.py::main \
+		--branch $(BRANCH) --gpu $(NATIONAL_GPU) \
+		--epochs $(NATIONAL_EPOCHS) \
+		--push-results --national
+
+calibrate-both:
+	$(MAKE) calibrate-modal & $(MAKE) calibrate-modal-national & wait
+
+stage-h5s:
+	modal run modal_app/local_area.py::main \
+		--branch $(BRANCH) --num-workers $(NUM_WORKERS)
+
+stage-national-h5:
+	modal run modal_app/local_area.py::main_national \
+		--branch $(BRANCH)
+
+stage-all-h5s:
+	$(MAKE) stage-h5s & $(MAKE) stage-national-h5 & wait
+
+promote:
+	$(eval VERSION := $(or $(VERSION),$(shell python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")))
+	modal run modal_app/local_area.py::main_promote \
+		--branch $(BRANCH) --version $(VERSION)
+
+validate-staging:
+	python -m policyengine_us_data.calibration.validate_staging \
+		--area-type states --output validation_results.csv
+
+validate-staging-full:
+	python -m policyengine_us_data.calibration.validate_staging \
+		--area-type states,districts --output validation_results.csv
+
+upload-validation:
+	python -c "from policyengine_us_data.utils.huggingface import upload; \
+		upload('validation_results.csv', \
+		'policyengine/policyengine-us-data', \
+		'calibration/logs/validation_results.csv')"
+
+check-staging:
+	python -m policyengine_us_data.calibration.check_staging_sums
+
+check-sanity:
+	python -m policyengine_us_data.calibration.validate_staging \
+		--sanity-only --area-type states --areas NC
+
+pipeline: data upload-dataset build-matrices calibrate-both stage-all-h5s
+	@echo ""
+	@echo "========================================"
+	@echo "Pipeline complete. H5s are in HF staging."
+	@echo "Run 'Promote Local Area H5 Files' workflow in GitHub to publish."
+	@echo "========================================"
+
 clean:
 	rm -f policyengine_us_data/storage/*.h5
 	rm -f policyengine_us_data/storage/*.db

diff --git a/changelog.d/add-database-build-test.added.md b/changelog.d/add-database-build-test.added.md
@@ -0,0 +1 @@
+Add end-to-end test for calibration database build pipeline.
diff --git a/changelog.d/calibration-pipeline-improvements.added.md b/changelog.d/calibration-pipeline-improvements.added.md
@@ -0,0 +1,8 @@
+Unified calibration pipeline with GPU-accelerated L1/L0 solver, target config YAML, and CLI package validator.
+Per-state and per-county precomputation replacing per-clone Microsimulation (51 sims instead of 436).
+Parallel state, county, and clone loop processing via ProcessPoolExecutor.
+Block-level takeup re-randomization with deterministic seeded draws.
+Hierarchical uprating with ACA PTC state-level CSV factors and CD reconciliation.
+Modal remote runner with Volume support, CUDA OOM fixes, and checkpointing.
+Stacked dataset builder with sparse CD subsets and calibration block propagation.
+Staging validation script (validate_staging.py) with sim.calculate() comparison and sanity checks.
diff --git a/changelog.d/calibration-pipeline-improvements.changed.md b/changelog.d/calibration-pipeline-improvements.changed.md
@@ -0,0 +1,3 @@
+Geography assignment now prevents clone-to-CD collisions.
+County-dependent vars (aca_ptc) selectively precomputed per county; other vars use state-only path.
+Target config switched to finest-grain include mode (~18K targets).
diff --git a/changelog.d/calibration-pipeline-improvements.fixed.md b/changelog.d/calibration-pipeline-improvements.fixed.md
@@ -0,0 +1,3 @@
+Cross-state cache pollution in matrix builder precomputation.
+Takeup draw ordering mismatch between matrix builder and stacked builder.
+At-large district geoid mismatch (7 districts had 0 estimates).
diff --git a/changelog.d/migrate-to-towncrier.changed.md b/changelog.d/migrate-to-towncrier.changed.md
@@ -0,0 +1 @@
+Migrated from changelog_entry.yaml to towncrier fragments to eliminate merge conflicts.
diff --git a/docs/build_h5.md b/docs/build_h5.md
@@ -0,0 +1,123 @@
+# build_h5 — Unified H5 Builder
+
+`build_h5` is the single function that produces all local-area H5 datasets (national, state, district, city). It lives in `policyengine_us_data/calibration/publish_local_area.py`.
+
+## Signature
+
+```python
+def build_h5(
+    weights: np.ndarray,
+    blocks: np.ndarray,
+    dataset_path: Path,
+    output_path: Path,
+    cds_to_calibrate: List[str],
+    cd_subset: List[str] = None,
+    county_filter: set = None,
+    rerandomize_takeup: bool = False,
+    takeup_filter: List[str] = None,
+) -> Path:
+```
+
+## Parameter Semantics
+
+| Parameter | Type | Purpose |
+|---|---|---|
+| `weights` | `np.ndarray` | Stacked weight vector, shape `(n_geo * n_hh,)` |
+| `blocks` | `np.ndarray` | Block GEOID per weight entry (same shape). If `None`, generated from CD assignments. |
+| `dataset_path` | `Path` | Path to base dataset H5 file |
+| `output_path` | `Path` | Where to write the output H5 file |
+| `cds_to_calibrate` | `List[str]` | Ordered list of CD GEOIDs defining weight matrix row ordering |
+| `cd_subset` | `List[str]` | If provided, only include rows for these CDs |
+| `county_filter` | `set` | If provided, scale weights by P(target counties \| CD) for city datasets |
+| `rerandomize_takeup` | `bool` | Re-draw takeup using block-level seeds |
+| `takeup_filter` | `List[str]` | List of takeup variables to re-randomize |
+
+## How `cd_subset` Controls Output Level
+
+The `cd_subset` parameter determines what geographic level the output represents:
+
+- **National** (`cd_subset=None`): All CDs included — produces a full national dataset.
+- **State** (`cd_subset=[CDs in state]`): Filter to CDs whose FIPS prefix matches the state — produces a state dataset.
+- **District** (`cd_subset=[single_cd]`): Single CD — produces a district dataset.
+- **City** (`cd_subset=[NYC CDs]` + `county_filter=NYC_COUNTIES`): Multiple CDs with county filtering — produces a city dataset. The `county_filter` scales weights by the probability that a household in each CD falls within the target counties.
+
+## Internal Pipeline
+
+1. **Load base simulation** — One `Microsimulation` loaded from `dataset_path`. Entity arrays and membership mappings extracted.
+
+2. **Reshape weights** — The flat weight vector is reshaped to `(n_geo, n_hh)`.
+
+3. **CD subset filtering** — Rows for CDs not in `cd_subset` are zeroed out.
+
+4. **County filtering** — If `county_filter` is set, each row is scaled by `P(target_counties | CD)` via `get_county_filter_probability()`.
+
+5. **Identify active clones** — `np.where(W > 0)` finds all nonzero entries. Each represents a distinct household clone.
+
+6. **Clone entity arrays** — Entity arrays (household, person, tax_unit, spm_unit, family, marital_unit) are cloned using fancy indexing on the base simulation arrays.
+
+7. **Reindex entity IDs** — All entity IDs are reassigned to be globally unique. Cross-reference arrays (e.g., `person_household_id`) are updated accordingly.
+
+8. **Derive geography** — Block GEOIDs are mapped to state FIPS, county, tract, CBSA, etc. via `derive_geography_from_blocks()`. Unique blocks are deduplicated for efficiency.
+
+9. **Recalculate SPM thresholds** — SPM thresholds are recomputed using `calculate_spm_thresholds_vectorized()` with the clone's CD-level geographic adjustment factor.
+
+10. **Rerandomize takeup** (optional) — If enabled, takeup booleans are redrawn per census block using `apply_block_takeup_to_arrays()`.
+
+11. **Write H5** — All variable arrays are written to the output file.
+
+## Usage Examples
+
+### National
+```python
+build_h5(
+    weights=w,
+    blocks=blocks,
+    dataset_path=Path("base.h5"),
+    output_path=Path("national/US.h5"),
+    cds_to_calibrate=cds,
+)
+```
+
+### State
+```python
+state_fips = 6  # California
+cd_subset = [cd for cd in cds if int(cd) // 100 == state_fips]
+build_h5(
+    weights=w,
+    blocks=blocks,
+    dataset_path=Path("base.h5"),
+    output_path=Path("states/CA.h5"),
+    cds_to_calibrate=cds,
+    cd_subset=cd_subset,
+)
+```
+
+### District
+```python
+build_h5(
+    weights=w,
+    blocks=blocks,
+    dataset_path=Path("base.h5"),
+    output_path=Path("districts/CA-12.h5"),
+    cds_to_calibrate=cds,
+    cd_subset=["0612"],
+)
+```
+
+### City (NYC)
+```python
+from policyengine_us_data.calibration.publish_local_area import (
+    NYC_COUNTIES, NYC_CDS,
+)
+
+cd_subset = [cd for cd in cds if cd in NYC_CDS]
+build_h5(
+    weights=w,
+    blocks=blocks,
+    dataset_path=Path("base.h5"),
+    output_path=Path("cities/NYC.h5"),
+    cds_to_calibrate=cds,
+    cd_subset=cd_subset,
+    county_filter=NYC_COUNTIES,
+)
+```
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add end-to-end test for calibration database build pipeline.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Migrated from changelog_entry.yaml to towncrier fragments to eliminate merge conflicts.