diff --git a/.coveragerc b/.coveragerc
index fe9ca39c..fa06a372 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,17 +1,20 @@
 [run]
 source = src
+parallel = True
+concurrency = multiprocessing
+sigterm = True
 omit = 
     src/pybella/data_assimilation/*
     src/pybella/inputs/*
     src/pybella/utils/debug_helpers.py
     src/pybella/utils/slices.py
-    */tests/*
-    */test_*
+    src/pybella/interfaces/postprocessing/strip_target_file.py
+    src/pybella/utils/operators/laplacian/lap3D.py
+    src/pybella/utils/operators/laplacian/lap2D_numba.py
     setup.py
 
 [report]
 exclude_lines =
-    pragma: no cover
     def __repr__
     raise AssertionError
     raise NotImplementedError
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 45d1ab20..2e5a916f 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -1,6 +1,10 @@
 name: deploy
 
-on: [push, pull_request, workflow_dispatch]
+on:
+  push:
+    branches: [develop]
+  pull_request:
+  workflow_dispatch:
 
 permissions:
   contents: write
@@ -36,6 +40,22 @@ jobs:
           # pytest --cov=src --cov-report=xml --cov-report=html --cov-report=term
           pytest ./test_scripts/test_blending.py
           pytest ./test_scripts/test_flow_solver.py
+          pytest ./test_scripts/test_3d_elliptic_oracle.py
+          pytest ./test_scripts/test_igw_analytic.py
+          pytest ./test_scripts/test_3d_coriolis_oracle.py
+          pytest ./test_scripts/test_axes.py
+          pytest ./test_scripts/test_permutation_oracle.py
+          pytest ./test_scripts/test_zvert_smoke.py
+          pytest ./test_scripts/test_terrain_transform.py
+          pytest ./test_scripts/test_terrain_identity_oracle.py
+          pytest ./test_scripts/test_terrain_elliptic_oracle.py
+          pytest ./test_scripts/test_terrain_2d_oracle.py
+          pytest ./test_scripts/test_terrain_resting_atmosphere.py
+          pytest ./test_scripts/test_agnesi_smoke.py
+          pytest ./test_scripts/test_agnesi_analytic.py
+          pytest ./test_scripts/test_agnesi_2d_equivalence.py
+          pytest ./test_scripts/test_schaer_smoke.py
+          pytest ./test_scripts/test_schaer_analytic.py
 
       # - name: Upload coverage HTML report
       #   if: ${{ !env.ACT }}
diff --git a/changelog.d/agnesi_2d_equivalence.added.md b/changelog.d/agnesi_2d_equivalence.added.md
new file mode 100644
index 00000000..9b6b298e
--- /dev/null
+++ b/changelog.d/agnesi_2d_equivalence.added.md
@@ -0,0 +1,6 @@
+Native-2D Agnesi equivalence proof: the 2D terrain path reproduces the
+quasi-2D 3D golden-master run to the pre-existing lap2D/lap3D wall-convention
+floor (~10% on the small wave fields, solver-tolerance independent, same gap
+as a flat wall-bounded impulse) and — the strong gate — passes the Smith
+(1980) analytic oracle with the same thresholds and near-identical
+calibration (w 0.397, u' 0.432, drag 0.981, flux 3.9%) at ~5x less runtime.
diff --git a/changelog.d/axial_phase0_axes_module.added.md b/changelog.d/axial_phase0_axes_module.added.md
new file mode 100644
index 00000000..d472e9f3
--- /dev/null
+++ b/changelog.d/axial_phase0_axes_module.added.md
@@ -0,0 +1,5 @@
+Added the axis-geometry module `pybella/utils/axes.py` (role-space convention:
+cyclic permutation mapping (h1, v, h2) roles onto array axes, vertical-axis
+accessors, slab/profile/permutation helpers) with unit tests, plus the
+bit-for-bit H5 run comparator `test_scripts/compare_h5_runs.py` used to gate
+the pure-refactor phases of the axial-agnosticity work. No behaviour change.
diff --git a/changelog.d/axial_phase1_gravity_config.changed.md b/changelog.d/axial_phase1_gravity_config.changed.md
new file mode 100644
index 00000000..9f90c459
--- /dev/null
+++ b/changelog.d/axial_phase1_gravity_config.changed.md
@@ -0,0 +1,5 @@
+Axial-agnosticity Phase 1: `gravity_direction` is now a configuration input
+(default 1 = y-vertical, validated; 2D runs require 1), `gravity_strength` and
+`coriolis_strength` are computed role-based from it, and all seven hardcoded
+`ud.gravity_strength[1]` reads route through `axes.vertical_axis(ud)`.
+Bit-identical for all existing cases (verified at tol=0 on full run outputs).
diff --git a/changelog.d/axial_phase2_role_canonical.changed.md b/changelog.d/axial_phase2_role_canonical.changed.md
new file mode 100644
index 00000000..47547a91
--- /dev/null
+++ b/changelog.d/axial_phase2_role_canonical.changed.md
@@ -0,0 +1,9 @@
+Axial-agnosticity Phase 2: Coriolis and explicit dynamics are now
+role-canonical — `multiply_inverse_terms` binds (wh1, wv, wh2) and the
+momentum components in (h1, vertical, h2) role order via the cyclic axis
+permutation (njit kernels unchanged); new `compute_inverse_coefficients`
+exposes the cached H^-1 fields for the upcoming tensor elliptic operator;
+buoyancy and the rhoX stratification coupling act on the configured vertical
+momentum; the explicit momentum rows are written in role symbols with
+expression trees preserved; the advection pwchi special case keys on the
+vertical axis. Bit-identical for all existing cases (verified at tol=0).
diff --git a/changelog.d/axial_phase3_boundaries_hydrostates.changed.md b/changelog.d/axial_phase3_boundaries_hydrostates.changed.md
new file mode 100644
index 00000000..d11c2333
--- /dev/null
+++ b/changelog.d/axial_phase3_boundaries_hydrostates.changed.md
@@ -0,0 +1,10 @@
+Axial-agnosticity Phase 3: hydrostatic state, vertical profiles, and
+boundary handling are axis-generic — `States` profiles carry their vertical
+axis (`expand_profile`), `integrated_state`/`analytical_state` integrate
+along the configured vertical, the nodal-divergence wall zeroing loops over
+all WALL/RAYLEIGH axes (fixing the previously broken x-WALL elliptic path),
+the gravity ghost-cell handler threads the physical vertical axis (also
+fixing a latent 3D bug where it read `gravity_strength[2] = 0` during
+advection sweeps), `_set_boundary` mirrors the wall-normal momentum of the
+actual wall axis, and the quasi-2D nodal broadcast generalises to any
+degenerate axis. Bit-identical for all existing cases (verified at tol=0).
diff --git a/changelog.d/axial_phase4_lap3d_full_tensor.changed.md b/changelog.d/axial_phase4_lap3d_full_tensor.changed.md
new file mode 100644
index 00000000..1ced1e3c
--- /dev/null
+++ b/changelog.d/axial_phase4_lap3d_full_tensor.changed.md
@@ -0,0 +1,13 @@
+Axial-agnosticity Phase 4: the 3D elliptic operator now carries the full
+H^-1 tensor coefficients (C_ij = (Gamma^-1 P Theta) h[role(i),role(j)], the
+same H^-1 the momentum correction applies), replacing the legacy ad-hoc x-z
+`corrf` cross terms; the 3D preconditioner uses the C_ii diagonals. With
+identity H^-1 the operator reduces bit-exactly to the previous one (Oracle
+A), and with full Coriolis a y-uniform 3D solve now matches the trusted 2D
+path under the pseudovector axis mapping to ~1e-6 (new
+test_scripts/test_3d_coriolis_oracle.py). That oracle also exposed and
+fixed the implicit-side sibling of the 2D out-of-plane Coriolis defect (the
+pressure-correction w-row was ndim==3-only). Regenerated golden masters:
+travelling_vortex_3d_coriolis (operator upgrade), igw_baldauf_brdar,
+internal_long_wave, unstable_lamb (w-row fix; the igw analytic-oracle
+out-of-plane error improves 0.060 -> 0.045). All other cases bit-identical.
diff --git a/changelog.d/axial_phase5_permutation_oracle.added.md b/changelog.d/axial_phase5_permutation_oracle.added.md
new file mode 100644
index 00000000..3277f6f9
--- /dev/null
+++ b/changelog.d/axial_phase5_permutation_oracle.added.md
@@ -0,0 +1,11 @@
+Axial-agnosticity Phase 5: the permutation oracle
+(`test_scripts/test_permutation_oracle.py`, CI-wired) proves the endgame —
+the 2D internal-long-wave reference (gravity, stratification, walls, full
+Coriolis) embedded as z-vertical (gravity_direction=2) and x-vertical
+(gravity_direction=0) quasi-2D 3D twins reproduces the sigma-mapped 2D
+fields through full solver steps to <=1e-6, with exact uniformity along
+the degenerate axis and the Coriolis pseudovector mapping produced
+automatically by the role-based configuration. Also fixes a latent bug the
+oracle exposed: SpaceDiscr stored `dxyz/ig/ic/stride` as class-level shared
+arrays, so two grids coexisting in one process corrupted each other; they
+are now per-instance. Bit-identical for all existing cases.
diff --git a/changelog.d/axial_phase6_straka_walls_smoke.changed.md b/changelog.d/axial_phase6_straka_walls_smoke.changed.md
new file mode 100644
index 00000000..b5ec1d42
--- /dev/null
+++ b/changelog.d/axial_phase6_straka_walls_smoke.changed.md
@@ -0,0 +1,8 @@
+Axial-agnosticity Phase 6 wrap-up: Straka now runs its faithful free-slip
+wall configuration on all boundaries, exercising the repaired x-WALL
+elliptic path (target regenerated for the wall config); a z-vertical
+plumbing smoke case (`smoke_zvert` + `test_scripts/test_zvert_smoke.py`)
+pushes `gravity_direction = 2` through the production `-ic` entry path; the
+axis conventions, proof obligations, defects fixed, and known limitations
+are documented in `dev_notes/axial_agnosticity.md`. A permanent z-vertical
+golden-master case is deferred to the terrain-following work.
diff --git a/changelog.d/cross_platform_tolerances.fixed.md b/changelog.d/cross_platform_tolerances.fixed.md
new file mode 100644
index 00000000..f8fd2148
--- /dev/null
+++ b/changelog.d/cross_platform_tolerances.fixed.md
@@ -0,0 +1,6 @@
+Golden-master tolerances recalibrated for cross-platform CI: the first GitHub
+runner pass deviated from locally generated targets by 2.3e-6 (igw rhou) to
+7.0e-5 (Agnesi rhou) — different CPU/BLAS/numba reorder the bicgstab
+reductions, ~100x the same-machine scatter the old gates were tuned to. igw
+returns to the 1e-5 default; the two terrain cases (long elliptic iteration
+chains) gate at 5e-4. Physics remains guarded by the analytic oracles.
diff --git a/changelog.d/fix_2d_coriolis_w_row.fixed.md b/changelog.d/fix_2d_coriolis_w_row.fixed.md
new file mode 100644
index 00000000..c4c88cca
--- /dev/null
+++ b/changelog.d/fix_2d_coriolis_w_row.fixed.md
@@ -0,0 +1,12 @@
+Fixed the 2D out-of-plane Coriolis defect: the `rhow` momentum row in
+`explicit_euler.do_forward_step` was guarded by `if ndim == 3`, so 2D runs
+applied only the implicit half of the out-of-plane Coriolis rotation. Found
+by the Baldauf-Brdar analytic oracle (out-of-plane velocity error pinned at
+~0.44 rel-L2 independent of dt, sim/ref amplitude ratio ~0.6, with O(f t)
+feedback into u); after the fix the error drops to 0.06 (dt-convergent to
+0.03) and the amplitude ratio to 1.03. Affects only 2D runs with Coriolis
+components in the x/y slots: the `igw_baldauf_brdar` and
+`internal_long_wave` golden-master targets were deliberately regenerated;
+all other cases are bit-identical (Lamb cases use only `strength[2]`, which
+does not enter the w-row). Oracle gates tightened accordingly
+(`test_igw_analytic.GATES`: vo 0.60 -> 0.10).
diff --git a/changelog.d/fix_3d_elliptic_path.fixed.md b/changelog.d/fix_3d_elliptic_path.fixed.md
new file mode 100644
index 00000000..d8e1f77e
--- /dev/null
+++ b/changelog.d/fix_3d_elliptic_path.fixed.md
@@ -0,0 +1,12 @@
+Fixed the full-3D (`inz > 1`) implicit/elliptic solver path, broken since the
+package restructure: re-wired `lap3D` to the interior-sized `npf` arrays
+(missing imports, coefficient slicing, 3D preconditioner via
+`preconditioner.prepare_diag`), fixed the flat-vector memory layout
+(C-order `[x, y, z]`; the old reshape silently transposed x and z on
+non-cubic grids), fixed the 3D `rhs` shape mismatch and a sign error on the
+x-component of the 3D nodal divergence (legacy, dating to Oct 2021), and made
+the pressure diagnostic kernel dimension-agnostic. The 3D path is validated
+against the 2D solver on a y-uniform quasi-2D problem to ~1e-10
+(`test_scripts/test_3d_elliptic_oracle.py`), and the
+`test_travelling_vortex_3d_coriolis` regression case now runs with a
+committed golden-master target.
diff --git a/changelog.d/igw_analytic_oracle.added.md b/changelog.d/igw_analytic_oracle.added.md
new file mode 100644
index 00000000..cbcc91ea
--- /dev/null
+++ b/changelog.d/igw_analytic_oracle.added.md
@@ -0,0 +1,14 @@
+Added a physics oracle for the Baldauf & Brdar (2013) internal-gravity-wave
+case: `pybella/tests/baldauf_brdar_analytic.py` builds a numerically-exact
+solution of the linearised compressible Euler equations about the isothermal
+background (Bretherton-transformed constant-coefficient system, staggered
+vertical collocation, exact-in-time eigenpropagation per x-Fourier mode;
+energy drift ~1e-13) and evolves the simulation's own initial condition.
+`test_scripts/test_igw_analytic.py` gates the regression configuration
+against it (catching sign/dispersion/amplitude *wrongness*, complementing
+the golden masters which catch *change*) and writes ref/sim/diff PNGs.
+Refinement studies (dt 500->125 s, dx 20->10 km, f on/off) decompose the
+measured sim-vs-linear residual and surface a known solver limitation: in 2D
+runs the out-of-plane momentum receives only the implicit half of the
+Coriolis rotation (`explicit_euler.do_forward_step` skips the `rhow` row for
+`ndim == 2`), pinning that component's error at ~0.44 rel-L2.
diff --git a/changelog.d/lap2d_rayleigh_wall.fixed.md b/changelog.d/lap2d_rayleigh_wall.fixed.md
new file mode 100644
index 00000000..a65ef987
--- /dev/null
+++ b/changelog.d/lap2d_rayleigh_wall.fixed.md
@@ -0,0 +1,6 @@
+`lap2D_manual` now treats RAYLEIGH boundaries as walls, matching `lap3D`,
+`lap2D_numba` and the divergence slab-zeroing. Previously a sponged top in a
+native-2D run got a periodic-in-y elliptic stencil. No-op for the existing 2D
+golden masters (their vertical handling goes through the atmospheric-extension
+branch — verified bit-identical, and the stable lamb wave still propagates at
+0.99 Cs with stable amplitude at twice the regression horizon).
diff --git a/changelog.d/schaer_ridge_case.added.md b/changelog.d/schaer_ridge_case.added.md
new file mode 100644
index 00000000..fa148080
--- /dev/null
+++ b/changelog.d/schaer_ridge_case.added.md
@@ -0,0 +1,8 @@
+Schär (2002) ridge golden-master case (`test_schaer_ridge`): two-scale
+orography (h0 250 m, a 5 km, lambda 4 km) under SLEVE coordinates, native 2D,
+with analytic gradients and the exact cos^2 smooth/residual split. New linear
+FFT mountain-wave oracle (`schaer_linear_analytic.py`, self-tested against
+Smith's closed-form drag) gates the wave field, drag and flux constancy, and
+the Gal-Chen-vs-SLEVE discriminator: spurious small-scale w aloft collapses
+~17x under SLEVE (E_ss 0.004 vs 0.070) where the true lambda-scale response
+is evanescent-dead. Smoke + analytic tests wired into CI.
diff --git a/changelog.d/sleve_transform.added.md b/changelog.d/sleve_transform.added.md
new file mode 100644
index 00000000..c03b2df5
--- /dev/null
+++ b/changelog.d/sleve_transform.added.md
@@ -0,0 +1,8 @@
+SLEVE vertical transform (Schär et al. 2002; Leuenberger et al. 2010
+exponent n): two-scale orography split `ud.orography_smooth` (large-scale)
++ residual against the total `ud.orography`, per-component sinh decay,
+analytic Jacobian — the first eta-dependent J through the operators.
+Selected via `ud.vertical_transform = SLEVETransform(s1, s2, n)`; the
+activation contract and the Gal-Chen path are untouched. Proven by
+transform identities, scale-separation property, and SLEVE-parametrized
+elliptic-composition + resting-atmosphere oracles (3D and native 2D).
diff --git a/changelog.d/straka_density_current.added.md b/changelog.d/straka_density_current.added.md
new file mode 100644
index 00000000..6dc2714a
--- /dev/null
+++ b/changelog.d/straka_density_current.added.md
@@ -0,0 +1,11 @@
+Added the Straka density current (Straka et al. 1993) as regression case
+`test_straka` — the suite's first nonlinear, advection-dominated gravity case
+(256x32 at 200 m, dt = 4 s to t = 900 s; front position, peak winds and
+symmetry verified against the published benchmark). Includes a new explicit
+constant-coefficient diffusion module (`flow_solver/numerics/diffusion.py`,
+enabled per-case via `ud.diffusion` / `ud.diffusion_coeff`, off by default)
+implementing the benchmark's fixed K = 75 m^2/s on velocity and potential
+temperature. The case runs periodic in x: the x-WALL elliptic path is
+currently broken/untested (wall momentum zeroing in the nodal divergence
+handles the vertical axis only) — known limitation, to be fixed with the
+axial-agnosticity refactor.
diff --git a/changelog.d/terrain_2d_native.changed.md b/changelog.d/terrain_2d_native.changed.md
new file mode 100644
index 00000000..e86b9c51
--- /dev/null
+++ b/changelog.d/terrain_2d_native.changed.md
@@ -0,0 +1,6 @@
+Native 2D terrain-following runs: the 2D metric divergence (contravariant/
+J-weighted fluxes), the 2x2 elliptic tensor `J A^T H^-1 A` folded into the
+lap2D cross-term coefficient slots, and a geometry-aware 2D preconditioner
+diagonal. Proven by a 2D div∘correction composition oracle (flat/terrain,
+with/without Coriolis), forced-flat bit-identity, and the native-2D
+resting-atmosphere / conservation gates.
diff --git a/changelog.d/terrain_phase0_transform_scaffolding.added.md b/changelog.d/terrain_phase0_transform_scaffolding.added.md
new file mode 100644
index 00000000..607dfa75
--- /dev/null
+++ b/changelog.d/terrain_phase0_transform_scaffolding.added.md
@@ -0,0 +1,6 @@
+Terrain-following coordinates, phase 0: `VerticalTransform`/`GalChenTransform` +
+`MetricFields` scaffolding (`flow_solver/discretisation/terrain.py`), built in
+`grid_init` and attached as `elem.metric`/`node.metric` (`None` without
+`ud.orography` — uniform-Cartesian path untouched). Transform unit tests, h≡0
+identity oracle, and a quasi-2D mountain-wave smoke case (`smoke_agnesi`)
+through the full-tensor 3D elliptic path.
diff --git a/changelog.d/terrain_phase1_metric_divergence.changed.md b/changelog.d/terrain_phase1_metric_divergence.changed.md
new file mode 100644
index 00000000..84699dae
--- /dev/null
+++ b/changelog.d/terrain_phase1_metric_divergence.changed.md
@@ -0,0 +1,5 @@
+Terrain phase 1: the nodal divergence computes `J∇·F` when terrain is active —
+J-weighted horizontal fluxes and the contravariant vertical flux
+`θ(mom_v − G1·mom_h1 − G2·mom_h2)` in role space, with unchanged differencing
+stencils. Ghost-slab wall zeroing covers the contravariant fluxes automatically.
+Forced-flat oracle (≤1e-13 vs plain path) and role-wiring checks added.
diff --git a/changelog.d/terrain_phase2_metric_gradients.changed.md b/changelog.d/terrain_phase2_metric_gradients.changed.md
new file mode 100644
index 00000000..baecfb5b
--- /dev/null
+++ b/changelog.d/terrain_phase2_metric_gradients.changed.md
@@ -0,0 +1,5 @@
+Terrain phase 2: pressure gradients map to physical space via the terrain
+gradient matrix A (slope correction of the horizontal rows, 1/J on the
+vertical) in both the explicit forward step and the implicit pressure
+correction; the explicit π update divides the J-weighted divergence by the
+node Jacobian. Bypassed entirely without terrain.
diff --git a/changelog.d/terrain_phase3_elliptic_tensor.changed.md b/changelog.d/terrain_phase3_elliptic_tensor.changed.md
new file mode 100644
index 00000000..e795e8d2
--- /dev/null
+++ b/changelog.d/terrain_phase3_elliptic_tensor.changed.md
@@ -0,0 +1,5 @@
+Terrain phase 3: the 3D elliptic operator folds the metric into its tensor —
+`C_ij = wplus ⊙ (J Aᵀ H⁻¹ A)` in role space (lap3D cross terms self-activate)
+and the Helmholtz centre term is J-weighted at nodes. New elliptic oracle
+proves the operator equals the discrete divergence∘momentum-correction
+composition, flat and over an Agnesi hill, to ~1e-12.
diff --git a/changelog.d/terrain_phase4_hydrostates_fields.changed.md b/changelog.d/terrain_phase4_hydrostates_fields.changed.md
new file mode 100644
index 00000000..9611ab0d
--- /dev/null
+++ b/changelog.d/terrain_phase4_hydrostates_fields.changed.md
@@ -0,0 +1,6 @@
+Terrain phase 4: hydrostates at physical height. `States` gains a field mode
+(full per-column fields when terrain is active); `analytical_state` evaluates
+its closed form at z(ξ,η) with local dz = J·dη, `integrated_state` gains a
+fine-grid quadrature branch. Resting-atmosphere oracle: balanced atmosphere
+over a 400 m Agnesi hill stays at rest to ~1e-10 m/s (the solve floor),
+identical to flat. `column`/`initial_pressure` assert no terrain.
diff --git a/changelog.d/terrain_phase5_bottom_bc.changed.md b/changelog.d/terrain_phase5_bottom_bc.changed.md
new file mode 100644
index 00000000..509e00d9
--- /dev/null
+++ b/changelog.d/terrain_phase5_bottom_bc.changed.md
@@ -0,0 +1,6 @@
+Terrain phase 5: gravity-axis ghost cells respect the terrain — the
+CONTRAVARIANT momentum (mom_v − G·mom_h) is reflected oddly and the
+Cartesian vertical momentum rebuilt with the ghost cell's slope terms;
+ghost hydrostatics use the physical image height and local dz = J·dη.
+Advection sweeps flip the metric alongside the solution arrays. Gates:
+uniform flow over a forced-flat metric matches the plain path to 1e-12.
diff --git a/changelog.d/terrain_phase6_advection_cfl.changed.md b/changelog.d/terrain_phase6_advection_cfl.changed.md
new file mode 100644
index 00000000..7934af47
--- /dev/null
+++ b/changelog.d/terrain_phase6_advection_cfl.changed.md
@@ -0,0 +1,9 @@
+Terrain phase 6: advection uses contravariant vertical / J-weighted horizontal
+mass fluxes (HLL upwinding follows automatically), the cell update divides by
+J, recovery's Courant velocity divides J back out, and the CFL gains the
+metric vertical signal speed. Orography is evaluated on periodic-wrapped
+coordinates — a non-periodic hill on a periodic axis otherwise makes the
+elliptic system inconsistent at the duplicated nodes (found via dense
+null-space analysis; bicgstab diverged). smoke_agnesi now runs the 400 m
+witch-of-Agnesi hill end-to-end: J-weighted mass/P conserved to 1e-10,
+max |w| ≈ 0.44 m/s vs the ~0.5 m/s linear estimate.
diff --git a/changelog.d/terrain_phase7_sponge_generalisation.changed.md b/changelog.d/terrain_phase7_sponge_generalisation.changed.md
new file mode 100644
index 00000000..9c89ecdb
--- /dev/null
+++ b/changelog.d/terrain_phase7_sponge_generalisation.changed.md
@@ -0,0 +1,6 @@
+Terrain phase 7: Rayleigh sponge generalised — profile builders read the
+configured vertical axis via `axes.coords_along` (η-based taper, correct in
+terrain-following coordinates), damping broadcasts axis-aware for 3D fields,
+the background Y uses field-mode hydrostates under terrain, and the third
+velocity component is damped in 3D. Lifts the documented vertical=1
+limitation; the 2D path (lamb golden masters) is bit-identical.
diff --git a/changelog.d/terrain_phase8_agnesi_case_oracle.added.md b/changelog.d/terrain_phase8_agnesi_case_oracle.added.md
new file mode 100644
index 00000000..70739ad9
--- /dev/null
+++ b/changelog.d/terrain_phase8_agnesi_case_oracle.added.md
@@ -0,0 +1,7 @@
+Agnesi hydrostatic mountain-wave case (`test_agnesi_hydrostatic`): N=0.01 1/s,
+U=10 m/s over a 100 m witch-of-Agnesi hill with a=10 km in Gal-Chen
+terrain-following coordinates, RAYLEIGH sponge above 12 km. Golden-master
+regression run (15 steps) plus a Smith-1980 analytic oracle
+(`test_agnesi_analytic`): quasi-steady wave field vs the linear hydrostatic
+solution — momentum flux matches the analytic wave drag to 2%, constant with
+height to 4% below the sponge.
diff --git a/changelog.d/terrain_phase9_ci_wiring.infrastructure.md b/changelog.d/terrain_phase9_ci_wiring.infrastructure.md
new file mode 100644
index 00000000..af78082e
--- /dev/null
+++ b/changelog.d/terrain_phase9_ci_wiring.infrastructure.md
@@ -0,0 +1,4 @@
+CI runs the terrain test battery: transform units, h≡0 identity oracle,
+elliptic composition oracle, resting-atmosphere/uniform-flow/mountain-wave
+gates, the Agnesi smoke case, the Agnesi-vs-Smith analytic oracle, and the
+`test_agnesi_hydrostatic` golden master in the flow-solver suite.
diff --git a/outputs/target_agnesi_hydrostatic/target_agnesi_hydrostatic_128_64_stripped.h5 b/outputs/target_agnesi_hydrostatic/target_agnesi_hydrostatic_128_64_stripped.h5
new file mode 100644
index 00000000..aef3f69d
Binary files /dev/null and b/outputs/target_agnesi_hydrostatic/target_agnesi_hydrostatic_128_64_stripped.h5 differ
diff --git a/outputs/target_igw_baldauf_brdar/target_igw_baldauf_brdar_301_20_stripped.h5 b/outputs/target_igw_baldauf_brdar/target_igw_baldauf_brdar_301_20_stripped.h5
new file mode 100644
index 00000000..9557d806
Binary files /dev/null and b/outputs/target_igw_baldauf_brdar/target_igw_baldauf_brdar_301_20_stripped.h5 differ
diff --git a/outputs/target_internal_long_wave/target_internal_long_wave_301_10_stripped.h5 b/outputs/target_internal_long_wave/target_internal_long_wave_301_10_stripped.h5
index 119ed571..194fe409 100644
Binary files a/outputs/target_internal_long_wave/target_internal_long_wave_301_10_stripped.h5 and b/outputs/target_internal_long_wave/target_internal_long_wave_301_10_stripped.h5 differ
diff --git a/outputs/target_schaer_ridge/target_schaer_ridge_256_64_stripped.h5 b/outputs/target_schaer_ridge/target_schaer_ridge_256_64_stripped.h5
new file mode 100644
index 00000000..c9c8c4ce
Binary files /dev/null and b/outputs/target_schaer_ridge/target_schaer_ridge_256_64_stripped.h5 differ
diff --git a/outputs/target_straka/target_straka_256_32_stripped.h5 b/outputs/target_straka/target_straka_256_32_stripped.h5
new file mode 100644
index 00000000..8e4899c8
Binary files /dev/null and b/outputs/target_straka/target_straka_256_32_stripped.h5 differ
diff --git a/outputs/target_swe_vortex/target_swe_vortex_64_64_stripped.h5 b/outputs/target_swe_vortex/target_swe_vortex_64_64_stripped.h5
new file mode 100644
index 00000000..ae9785cd
Binary files /dev/null and b/outputs/target_swe_vortex/target_swe_vortex_64_64_stripped.h5 differ
diff --git a/outputs/target_travelling_vortex_3d_coriolis/target_travelling_vortex_3d_coriolis_64_1_stripped.h5 b/outputs/target_travelling_vortex_3d_coriolis/target_travelling_vortex_3d_coriolis_64_1_stripped.h5
new file mode 100644
index 00000000..7f2171d1
Binary files /dev/null and b/outputs/target_travelling_vortex_3d_coriolis/target_travelling_vortex_3d_coriolis_64_1_stripped.h5 differ
diff --git a/outputs/target_unstable_lamb/target_unstable_lamb_301_30_stripped.h5 b/outputs/target_unstable_lamb/target_unstable_lamb_301_30_stripped.h5
index 3caef678..a36084ea 100644
Binary files a/outputs/target_unstable_lamb/target_unstable_lamb_301_30_stripped.h5 and b/outputs/target_unstable_lamb/target_unstable_lamb_301_30_stripped.h5 differ
diff --git a/src/pybella/flow_solver/discretisation/grid.py b/src/pybella/flow_solver/discretisation/grid.py
index 0e4f82ab..4717619b 100644
--- a/src/pybella/flow_solver/discretisation/grid.py
+++ b/src/pybella/flow_solver/discretisation/grid.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from ...utils import options as opts
+from . import terrain
 
 
 def grid_init(ud):
@@ -35,6 +36,11 @@ def grid_init(ud):
     elem = ElemSpaceDiscr(grid, ud)
     node = NodeSpaceDiscr(grid, ud)
 
+    # terrain metric fields (None when ud has no orography: the uniform-
+    # Cartesian path must stay bit-identical and pay no overhead)
+    elem.metric = terrain.build_metric_fields(elem, ud)
+    node.metric = terrain.build_metric_fields(node, ud)
+
     return elem, node
 
 
@@ -138,9 +144,21 @@ def __init__(self, g):
         assert g.iny >= 1
         assert g.inz >= 1
 
+        # per-instance arrays: the class-level definitions above are shared
+        # buffers, and two grids coexisting in one process (e.g. a 2D
+        # reference and its 3D permutation twin) would corrupt each other
+        self.ig = np.zeros((3))
+        self.ic = np.zeros((3))
+        self.stride = np.zeros((3))
+        self.dxyz = np.zeros((3))
+
         self.ndim = g.ndim
         self.normal = big
 
+        # terrain metric fields (discretisation.terrain.MetricFields);
+        # None == uniform Cartesian — every consumer branches on this
+        self.metric: terrain.MetricFields | None = None
+
         self.igx = self.ig[0] = 2
         self.igy = self.ig[1] = 2 if g.iny > 1 else 0
         self.igz = self.ig[2] = 2 if g.inz > 1 else 0
diff --git a/src/pybella/flow_solver/discretisation/terrain.py b/src/pybella/flow_solver/discretisation/terrain.py
new file mode 100644
index 00000000..9a7cabe4
--- /dev/null
+++ b/src/pybella/flow_solver/discretisation/terrain.py
@@ -0,0 +1,434 @@
+"""Terrain-following vertical coordinates — transform + metric fields.
+
+The solver integrates in computational coordinates ``(xi_h1, eta, xi_h2)``
+(role order: first horizontal, vertical, second horizontal) on the existing
+uniform grid; terrain enters only through precomputed metric fields. With
+terrain height ``h(xi_h1, xi_h2)`` and the flat domain top at ``eta = etat``,
+a :class:`VerticalTransform` defines the physical height ``z(xi, eta)`` and
+the two metric quantities every operator consumes:
+
+    J   = dz/deta                  (Jacobian; cell "thickness" weight)
+    G_i = dz/dxi_i at fixed eta    (slope terms, i in {h1, h2})
+
+Gal-Chen--Somerville (:class:`GalChenTransform`) is the first concrete
+transform; SLEVE drops in later as another subclass — all metric arrays are
+stored as full grid-shaped fields even where Gal-Chen makes them separable,
+so no operator changes are needed for an eta-dependent Jacobian.
+
+Activation contract (h == 0 bypass): terrain is active iff ``ud.orography``
+is defined (a callable ``h(xi_h1, xi_h2)`` in nondimensional units, mirroring
+the ``ud.stratification`` convention). Without it :func:`build_metric_fields`
+returns ``None``, ``elem.metric``/``node.metric`` are ``None``, and every
+consumer takes the uniform-Cartesian code path untouched — bit-identity with
+the pre-terrain solver by construction.
+
+Slopes prefer an analytic gradient ``ud.orography_grad = (dh_dxi1, dh_dxi2)``
+(role-ordered callables); otherwise they are central differences of the
+orography callable evaluated at shifted coordinates (exact at ghost cells,
+no array stenciling).
+
+Everything is role-oriented through :mod:`pybella.utils.axes` — the metric
+machinery works for any ``ud.gravity_direction`` in {0, 1, 2}.
+"""
+
+import numpy as np
+
+from ...utils import axes
+from ...utils import options as opts
+
+
+class VerticalTransform:
+    """Abstract map from computational vertical eta in [eta0, etat] to z.
+
+    Subclasses implement elementwise, broadcastable methods; the builder
+    materialises full grid-shaped arrays from them.
+
+    ``n_components`` declares how many orography fields the transform
+    consumes. Single-component transforms (the default) receive plain
+    arrays for ``h``/``dh``; two-component transforms (SLEVE) receive
+    tuples ``h = (h_smooth, h_residual)`` / ``dh = (dh_smooth,
+    dh_residual)`` — the split is built from ``ud.orography_smooth``
+    against the total ``ud.orography`` by :func:`build_metric_fields`.
+    """
+
+    n_components = 1
+
+    def z(self, eta, h, eta0, etat):
+        """Physical height z(eta, h)."""
+        raise NotImplementedError
+
+    def jacobian(self, eta, h, eta0, etat):
+        """dz/deta at fixed horizontal position."""
+        raise NotImplementedError
+
+    def decay(self, eta, eta0, etat):
+        """Weight b(eta) by which terrain influence decays with height."""
+        raise NotImplementedError
+
+    def slope(self, eta, dh, eta0, etat):
+        """G = dz/dxi at fixed eta, given the terrain slope dh = dh/dxi.
+
+        Default assumes ``z = eta + h * b(eta)``; transforms with a
+        different structure (e.g. SLEVE's two-scale split) override this.
+        """
+        return dh * self.decay(eta, eta0, etat)
+
+
+class GalChenTransform(VerticalTransform):
+    """Gal-Chen & Somerville (1975): linear decay of terrain with height.
+
+    z = eta + h * b(eta),  b(eta) = (etat - eta) / (etat - eta0)
+
+    so z(eta0) = eta0 + h (surface follows the terrain) and z(etat) = etat
+    (flat top). The Jacobian J = 1 - h / (etat - eta0) is eta-independent.
+    """
+
+    def decay(self, eta, eta0, etat):
+        return (etat - eta) / (etat - eta0)
+
+    def z(self, eta, h, eta0, etat):
+        return eta + h * self.decay(eta, eta0, etat)
+
+    def jacobian(self, eta, h, eta0, etat):
+        # broadcast against eta so the builder always gets a full field
+        return (1.0 - h / (etat - eta0)) + 0.0 * eta
+
+
+class SLEVETransform(VerticalTransform):
+    """SLEVE (Schär et al. 2002; Leuenberger et al. 2010 exponent n).
+
+    Two-scale split z = eta + h1 b1(eta) + h2 b2(eta) with per-component
+    decay (zeta = eta - eta0, H = etat - eta0)
+
+        b_i(zeta) = sinh((H/s_i)^n - (zeta/s_i)^n) / sinh((H/s_i)^n)
+
+    so b_i(0) = 1 (terrain-following surface), b_i(H) = 0 (flat top) and
+    the small-scale part h2 decays on its own scale s2 << s1 — the point
+    of SLEVE: small-scale terrain distortion leaves the grid quickly with
+    height instead of propagating to every level as under Gal-Chen.
+
+    ``s1``/``s2`` are nondimensional decay heights (same units as eta);
+    ``n > 1`` (e.g. Leuenberger's 1.35) gives db_i(0) = 0, i.e. an exactly
+    uniform Jacobian at the surface — the steep-terrain fallback. Below
+    the surface (ghost rows, zeta < 0) the decay continues linearly with
+    its surface slope, which keeps fractional ``n`` well-defined and J
+    smooth across the bottom boundary.
+
+    The first transform with an eta-dependent Jacobian — all operators
+    consume full-field J(eta) so nothing downstream changes.
+    """
+
+    n_components = 2
+
+    def __init__(self, s1, s2, n=1.0):
+        self.s1 = float(s1)
+        self.s2 = float(s2)
+        self.n = float(n)
+
+    def _b_db(self, eta, eta0, etat, s):
+        """Decay b and its eta-derivative db, linearly extended below eta0."""
+        n = self.n
+        H = etat - eta0
+        zeta = eta - eta0 + 0.0 * np.asarray(eta)
+        zc = np.maximum(zeta, 0.0)
+        arg_top = (H / s) ** n
+        oosinh = 1.0 / np.sinh(arg_top)
+        inner = arg_top - (zc / s) ** n
+        b = np.sinh(inner) * oosinh
+        db = -(n * zc ** (n - 1.0) / s**n) * np.cosh(inner) * oosinh
+        if n == 1.0:
+            db0 = -(1.0 / s) * np.cosh(arg_top) * oosinh
+        else:
+            db0 = 0.0  # n > 1: zero surface slope of the decay
+        b = np.where(zeta < 0.0, 1.0 + db0 * zeta, b)
+        db = np.where(zeta < 0.0, db0 + 0.0 * zeta, db)
+        return b, db
+
+    def z(self, eta, h, eta0, etat):
+        h1, h2 = h
+        b1, _ = self._b_db(eta, eta0, etat, self.s1)
+        b2, _ = self._b_db(eta, eta0, etat, self.s2)
+        return eta + h1 * b1 + h2 * b2
+
+    def jacobian(self, eta, h, eta0, etat):
+        h1, h2 = h
+        _, db1 = self._b_db(eta, eta0, etat, self.s1)
+        _, db2 = self._b_db(eta, eta0, etat, self.s2)
+        return 1.0 + h1 * db1 + h2 * db2
+
+    def slope(self, eta, dh, eta0, etat):
+        # two decays — the single-decay base default cannot express this
+        dh1, dh2 = dh
+        b1, _ = self._b_db(eta, eta0, etat, self.s1)
+        b2, _ = self._b_db(eta, eta0, etat, self.s2)
+        return dh1 * b1 + dh2 * b2
+
+
+class MetricFields:
+    """Precomputed terrain metric arrays on one grid (cells or nodes).
+
+    Arrays are array-axis oriented (same layout as the solution fields,
+    squeezed like :class:`CellSolField`); the slope arrays ``G1``/``G2``
+    are the dz/dxi terms along the horizontal role axes ``haxes`` mapped
+    by ``axes.role_perm``. ``G2`` is ``None`` in 2D. Plain float64 arrays
+    only, safe to pass straight into numba kernels.
+    """
+
+    _ARRAYS = ("J", "ooJ", "G1", "G2", "z")
+
+    def __init__(self, J, G1, G2, z, vaxis, haxes):
+        self.J = J
+        self.ooJ = 1.0 / J
+        self.G1 = G1
+        self.G2 = G2
+        self.z = z
+        self.vaxis = vaxis
+        self.haxes = haxes
+
+    def flip_forward(self):
+        """Mirror CellSolField.flip_forward for the advection sweeps."""
+        for key in self._ARRAYS:
+            value = getattr(self, key)
+            if value is not None:
+                setattr(self, key, np.moveaxis(value, 0, -1))
+        self.vaxis, self.haxes = self._shift_axes(-1)
+
+    def flip_backward(self):
+        for key in self._ARRAYS:
+            value = getattr(self, key)
+            if value is not None:
+                setattr(self, key, np.moveaxis(value, -1, 0))
+        self.vaxis, self.haxes = self._shift_axes(+1)
+
+    def _shift_axes(self, step):
+        ndim = self.J.ndim
+        shift = lambda a: (a + step) % ndim if a is not None else None
+        return shift(self.vaxis), tuple(shift(a) for a in self.haxes)
+
+
+def apply_gradient_map(metric, dp):
+    """Physical gradients from computational ones: dp <- A @ dp (in place).
+
+    ``dp`` is the axis-indexed list of the three cell-gradient arrays. The
+    chain rule for z = z(xi, eta) gives, in role order (h1, v, h2),
+
+        d/dx_h|z = d/dxi_h - (G_h / J) d/deta,   d/dz = (1/J) d/deta
+
+    i.e. the matrix A = [[1, -G1/J, 0], [0, 1/J, 0], [0, -G2/J, 1]]. The
+    horizontal rows are corrected before the vertical row is scaled.
+    """
+    a_h1, a_h2 = metric.haxes
+    dp_v = dp[metric.vaxis]
+    dp[a_h1] = dp[a_h1] - metric.G1 * metric.ooJ * dp_v
+    if a_h2 is not None:
+        dp[a_h2] = dp[a_h2] - metric.G2 * metric.ooJ * dp_v
+    dp[metric.vaxis] = dp_v * metric.ooJ
+    return dp
+
+
+def elliptic_tensor(metric, h_role):
+    """Fold the terrain metric into the role-indexed H^-1 tensor.
+
+    Returns M = J A^T H^-1 A (role order h1, v, h2), the coefficient
+    tensor of the elliptic operator: the rhs divergence measures
+    D_i((J A^T F)_i) and the momentum correction applies H^-1 A grad p,
+    so their composition carries exactly this tensor. With H^-1 == I it
+    is the classic terrain-following tensor
+
+        [[J, -G1, 0], [-G1, (1 + G1^2 + G2^2)/J, -G2], [0, -G2, J]]
+
+    and with h == 0 (J == 1, G == 0) it reduces bit-exactly to ``h_role``.
+    """
+    J, ooJ, G1, G2 = metric.J, metric.ooJ, metric.G1, metric.G2
+    h = h_role
+    M00 = J * h[0][0]
+    M02 = J * h[0][2]
+    M20 = J * h[2][0]
+    M22 = J * h[2][2]
+    M01 = -G1 * h[0][0] + h[0][1] - G2 * h[0][2]
+    M10 = -G1 * h[0][0] + h[1][0] - G2 * h[2][0]
+    M12 = -G1 * h[0][2] + h[1][2] - G2 * h[2][2]
+    M21 = -G1 * h[2][0] + h[2][1] - G2 * h[2][2]
+    M11 = ooJ * (
+        G1 * G1 * h[0][0]
+        - G1 * (h[0][1] + h[1][0])
+        + G1 * G2 * (h[0][2] + h[2][0])
+        + h[1][1]
+        - G2 * (h[1][2] + h[2][1])
+        + G2 * G2 * h[2][2]
+    )
+    return ((M00, M01, M02), (M10, M11, M12), (M20, M21, M22))
+
+
+def elliptic_tensor_2d(metric, h2x2):
+    """2D restriction of :func:`elliptic_tensor` to the (h1, v) block.
+
+    ``h2x2 = ((h11, h12), (h21, h22))`` is the in-plane H^-1 block (in 2D
+    the out-of-plane H^-1 rotation never enters the divergence, so the
+    composition carries exactly this 2x2 tensor). G2-free form of the same
+    M = J A^T H^-1 A; with h == 0 (J == 1, G1 == 0) it reduces bit-exactly
+    to ``h2x2``.
+    """
+    J, ooJ, G1 = metric.J, metric.ooJ, metric.G1
+    h = h2x2
+    M00 = J * h[0][0]
+    M01 = -G1 * h[0][0] + h[0][1]
+    M10 = -G1 * h[0][0] + h[1][0]
+    M11 = ooJ * (G1 * G1 * h[0][0] - G1 * (h[0][1] + h[1][0]) + h[1][1])
+    return ((M00, M01), (M10, M11))
+
+
+def terrain_is_active(ud):
+    return getattr(ud, "orography", None) is not None
+
+
+def get_transform(ud):
+    transform = getattr(ud, "vertical_transform", None)
+    return transform if transform is not None else GalChenTransform()
+
+
+def vertical_extent(ud, v):
+    """(eta0, etat): domain extent along the vertical axis v."""
+    return ((ud.xmin, ud.xmax), (ud.ymin, ud.ymax), (ud.zmin, ud.zmax))[v]
+
+
+def _coordinate_wrap(ud, axis):
+    """Identity, or periodic wrap into the domain for PERIODIC axes.
+
+    Ghost coordinates lie outside the domain; every other field sees its
+    periodic image there (ghost-cell wrap), so the orography must too —
+    otherwise the metric is discontinuous across the periodic seam and the
+    elliptic system becomes inconsistent at the duplicated nodes.
+    """
+    if axis is None or ud.bdry_type[axis] != opts.BdryType.PERIODIC:
+        return lambda c: c
+    lo, hi = vertical_extent(ud, axis)
+    length = hi - lo
+    return lambda c: lo + np.mod(c - lo, length)
+
+
+def _effective_callable(ud, a_h1, a_h2, fn):
+    """``fn`` with periodic-wrapped arguments (h1, h2 role order)."""
+    wrap1 = _coordinate_wrap(ud, a_h1)
+    wrap2 = _coordinate_wrap(ud, a_h2)
+    return lambda xi1, xi2: fn(wrap1(xi1), wrap2(xi2))
+
+
+def _effective_orography(ud, a_h1, a_h2):
+    """ud.orography with periodic-wrapped arguments (h1, h2 role order)."""
+    return _effective_callable(ud, a_h1, a_h2, ud.orography)
+
+
+def _coord_view(grid_obj, axis, ndim):
+    """Coordinate array of `axis`, shaped to broadcast over an ndim field."""
+    shape = [1] * ndim
+    shape[axis] = -1
+    return axes.coords_along(grid_obj, axis).reshape(shape)
+
+
+def _terrain_slope(ud, heff, grad, a_h1, a_h2, xi1, xi2, which, spacing):
+    """dh/dxi_which (role index 0 or 1): analytic if provided, else FD.
+
+    ``grad`` is the role-ordered tuple of analytic gradient callables for
+    the orography ``heff`` wraps (or None for FD). Both paths wrap
+    periodic coordinates: the analytic gradient is evaluated at the
+    wrapped points, the central difference differentiates the wrapped
+    (periodic) effective orography so the seam is consistent.
+    """
+    if grad is not None:
+        wrap1 = _coordinate_wrap(ud, a_h1)
+        wrap2 = _coordinate_wrap(ud, a_h2)
+        return grad[which](wrap1(xi1), wrap2(xi2))
+    d = spacing
+    if which == 0:
+        return (heff(xi1 + d, xi2) - heff(xi1 - d, xi2)) / (2.0 * d)
+    return (heff(xi1, xi2 + d) - heff(xi1, xi2 - d)) / (2.0 * d)
+
+
+def build_metric_fields(grid_obj, ud):
+    """Build MetricFields for one grid (ElemSpaceDiscr or NodeSpaceDiscr).
+
+    Returns None when terrain is inactive — callers branch on that and the
+    uniform-Cartesian path stays untouched.
+    """
+    if not terrain_is_active(ud):
+        return None
+
+    ndim = grid_obj.ndim
+    v = axes.vertical_axis(ud)
+    transform = get_transform(ud)
+    eta0, etat = vertical_extent(ud, v)
+
+    if ndim == 2:
+        # axes.validate enforces v == 1 in 2D: x horizontal, no second
+        # horizontal (terrain runs are quasi-2D 3D for now, but the metric
+        # build supports native 2D for the planned lap2D cross-term work)
+        a_h1, a_h2 = 0, None
+    else:
+        a_h1, a_h2 = axes.horizontal_axes(v)
+
+    shape = tuple(int(grid_obj.sc[dim]) for dim in range(ndim))
+    eta = _coord_view(grid_obj, v, ndim)
+    xi1 = _coord_view(grid_obj, a_h1, ndim)
+    xi2 = _coord_view(grid_obj, a_h2, ndim) if a_h2 is not None else 0.0
+
+    heff = _effective_orography(ud, a_h1, a_h2)
+    h = heff(xi1, xi2)
+
+    grad = getattr(ud, "orography_grad", None)
+
+    def slope_of(which, spacing):
+        return _terrain_slope(ud, heff, grad, a_h1, a_h2, xi1, xi2, which, spacing)
+
+    n_comp = getattr(transform, "n_components", 1)
+    if n_comp == 2:
+        # two-scale transforms (SLEVE): h splits into a smooth part and the
+        # residual. The smooth part MUST be wrapped with the same coordinate
+        # map as the total, or the residual breaks the periodic-seam
+        # consistency of the metric. No silent default: a missing split
+        # would degenerate SLEVE into something else entirely.
+        smooth = getattr(ud, "orography_smooth", None)
+        if smooth is None:
+            raise ValueError(
+                "two-component vertical transform requires ud.orography_smooth "
+                "(the large-scale part of ud.orography; the residual is the "
+                "small-scale component)"
+            )
+        heff_s = _effective_callable(ud, a_h1, a_h2, smooth)
+        h_s = heff_s(xi1, xi2)
+        grad_s = getattr(ud, "orography_smooth_grad", None)
+
+        h_arg = (h_s, h - h_s)
+
+        def dh_of(which, spacing):
+            dh_tot = slope_of(which, spacing)
+            dh_s = _terrain_slope(
+                ud, heff_s, grad_s, a_h1, a_h2, xi1, xi2, which, spacing
+            )
+            return (dh_s, dh_tot - dh_s)
+
+    else:
+        h_arg = h
+        dh_of = slope_of
+
+    def full(expr):
+        return np.ascontiguousarray(
+            np.broadcast_to(expr, shape).astype(np.float64, copy=False)
+        )
+
+    J = full(transform.jacobian(eta, h_arg, eta0, etat))
+    if np.any(J <= 0.0):
+        raise ValueError(
+            "terrain transform produced non-positive Jacobian: "
+            "orography reaches or exceeds the domain top"
+        )
+
+    z = full(transform.z(eta, h_arg, eta0, etat))
+
+    G1 = full(transform.slope(eta, dh_of(0, grid_obj.dxyz[a_h1]), eta0, etat))
+    if a_h2 is not None:
+        G2 = full(transform.slope(eta, dh_of(1, grid_obj.dxyz[a_h2]), eta0, etat))
+    else:
+        G2 = None
+
+    return MetricFields(J=J, G1=G1, G2=G2, z=z, vaxis=v, haxes=(a_h1, a_h2))
diff --git a/src/pybella/flow_solver/discretisation/time_update.py b/src/pybella/flow_solver/discretisation/time_update.py
index 2a3d76a4..303d78d1 100644
--- a/src/pybella/flow_solver/discretisation/time_update.py
+++ b/src/pybella/flow_solver/discretisation/time_update.py
@@ -3,13 +3,14 @@
 import numpy as np
 
 # dependencies from pybella common
+from ...utils import axes
 from ...utils import options as opts
 
 # dependencies of the flow solver subpackage
 from ..utils.boundary import rayleigh_boundary as bdry_r
 from ..physics import cfl, eos
 from ..numerics.explicit_advection import advective_flux, compute_advection
-from ..numerics import explicit_euler, implicit_euler
+from ..numerics import diffusion, explicit_euler, implicit_euler
 
 # for blending module
 from ...interfaces.dynamics_blending import schemes
@@ -69,14 +70,12 @@ def do(
                 f"step = {mem.time.step}, window_step = {mem.time.window_step}"
             )
 
-        logging.info(
-            f"""
+        logging.info(f"""
                     -------
                     is_compressible = {ud.is_compressible}, is_nonhydrostatic = {ud.is_nonhydrostatic}
                     compressibility = {ud.compressibility:.3f}, nonhydrostasy = {ud.nonhydrostasy:.3f}
                     -------
-                    """
-        )
+                    """)
 
         sol0 = copy.deepcopy(mem.sol)
 
@@ -113,7 +112,7 @@ def do(
             writer=writer,
         )
 
-        if ud.bdry_type[1] == opts.BdryType.RAYLEIGH:
+        if ud.bdry_type[axes.vertical_axis(ud)] == opts.BdryType.RAYLEIGH:
             # top rayleight damping
             bdry_r.rayleigh_damping(mem.sol, mem.npf, ud)
 
@@ -170,7 +169,7 @@ def do(
             label=str(label) + "_after_full_step",
         )
 
-        if ud.bdry_type[1] == opts.BdryType.RAYLEIGH:
+        if ud.bdry_type[axes.vertical_axis(ud)] == opts.BdryType.RAYLEIGH:
             # top rayleight damping
             bdry_r.rayleigh_damping(mem.sol, mem.npf, ud)
 
@@ -184,6 +183,9 @@ def do(
             npf_half_new=npf_half_new,
         )
 
+        if ud.diffusion:
+            diffusion.apply(mem, ud, dt)
+
         ######################################################
         # Blending : Do blending after timestep
         ######################################################
diff --git a/src/pybella/flow_solver/numerics/coriolis.py b/src/pybella/flow_solver/numerics/coriolis.py
index 955112cf..784dafcf 100644
--- a/src/pybella/flow_solver/numerics/coriolis.py
+++ b/src/pybella/flow_solver/numerics/coriolis.py
@@ -1,16 +1,26 @@
 import numba as nb
 
+from ...utils import axes
+
 
 # Refactored main function
 def multiply_inverse_terms(
     Vec, mem, ud, dt, attrs=("rhou", "rhov", "rhow"), get_coeffs=False
 ):
-    """Coriolis matrix multiplication."""
+    """Apply H^-1 (Coriolis/buoyancy coupling matrix inverse) to a vector field.
+
+    ``attrs`` is AXIS-indexed (u, v, w component names); the role binding
+    (h1, vertical, h2) happens here via the cyclic axis permutation, so the
+    njit kernels below — written in role symbols (wh1, wv, wh2) — stay
+    unchanged for any vertical axis.
+    """
     nonhydro = ud.nonhydrostasy
-    g = ud.gravity_strength[1]
+    g = ud.gravity_strength[axes.vertical_axis(ud)]
     Msq = ud.Msq
 
-    wh1, wv, wh2 = dt * ud.coriolis_strength
+    ax_h1, ax_v, ax_h2 = axes.role_perm(axes.vertical_axis(ud))
+    wdt = dt * ud.coriolis_strength
+    wh1, wv, wh2 = wdt[ax_h1], wdt[ax_v], wdt[ax_h2]
     strat = mem.npf.HydroState_n.get_dSdy(mem.elem, mem.node)
     Y = mem.sol.rhoY / mem.sol.rho
     nu = -(dt**2) * (g / Msq) * strat * Y
@@ -21,10 +31,10 @@ def multiply_inverse_terms(
         mem.cache.get_coriolis_array_views(shp)
     )
 
-    # Get vector components by view
-    VecU = getattr(Vec, attrs[0])
-    VecV = getattr(Vec, attrs[1])
-    VecW = getattr(Vec, attrs[2])
+    # Get vector components by view, in role order
+    VecU = getattr(Vec, attrs[ax_h1])
+    VecV = getattr(Vec, attrs[ax_v])
+    VecW = getattr(Vec, attrs[ax_h2])
 
     U, V, W = mem.cache.get_velocity_array_views(VecU.shape)
 
@@ -54,11 +64,39 @@ def multiply_inverse_terms(
 
     # Return coefficients
     if get_coeffs:
-        # For 2D only
+        # 2D-only path (the (h1, v) block); 2D runs force vertical = axis 1
         h11, h12, _, h21, h22, _, _, _, _, _ = mem.cache.get_coriolis_array_views(shp)
         return (h11.T, h22.T, h12.T, h21.T)
 
 
+def compute_inverse_coefficients(mem, ud, dt):
+    """Fill and return the cached role-indexed H^-1 coefficient fields.
+
+    Exactly the coefficients the apply path uses (eq. C11), exposed for the
+    full-tensor elliptic operator, which consumes them as stencil
+    coefficient fields. Returns the 10 cached views
+    (h11, h12, h13, h21, h22, h23, h31, h32, h33, denom), role-indexed,
+    shaped like the buoyancy field nu.
+    """
+    nonhydro = ud.nonhydrostasy
+    g = ud.gravity_strength[axes.vertical_axis(ud)]
+    Msq = ud.Msq
+
+    ax_h1, ax_v, ax_h2 = axes.role_perm(axes.vertical_axis(ud))
+    wdt = dt * ud.coriolis_strength
+    wh1, wv, wh2 = wdt[ax_h1], wdt[ax_v], wdt[ax_h2]
+    strat = mem.npf.HydroState_n.get_dSdy(mem.elem, mem.node)
+    Y = mem.sol.rhoY / mem.sol.rho
+    nu = -(dt**2) * (g / Msq) * strat * Y
+
+    views = mem.cache.get_coriolis_array_views(nu.shape)
+    h11, h12, h13, h21, h22, h23, h31, h32, h33, denom = views
+    _compute_coriolis_coefficients(
+        h11, h12, h13, h21, h22, h23, h31, h32, h33, denom, wh1, wh2, wv, nu, nonhydro
+    )
+    return views
+
+
 @nb.njit(cache=True)
 def _compute_coriolis_coefficients(
     h11, h12, h13, h21, h22, h23, h31, h32, h33, denom, wh1, wh2, wv, nu, nonhydro
diff --git a/src/pybella/flow_solver/numerics/diffusion.py b/src/pybella/flow_solver/numerics/diffusion.py
new file mode 100644
index 00000000..a37e97cf
--- /dev/null
+++ b/src/pybella/flow_solver/numerics/diffusion.py
@@ -0,0 +1,90 @@
+"""Explicit constant-coefficient diffusion (e.g. for the Straka density current).
+
+Applies forward-Euler diffusion with a fixed kinematic coefficient K to the
+velocity components and to the potential-temperature *perturbation*
+theta' = theta - theta_bar(y), once per full time step:
+
+    u     <- u     + dt * K * lap(u)
+    v     <- v     + dt * K * lap(v)
+    theta <- theta + dt * K * lap(theta - theta_bar)
+
+Thermal diffusion acts at fixed P = rho*theta (the pressure-like prognostic
+rhoY is untouched), so the density is updated as rho = rhoY / theta — the
+anomaly construction used by the bubble initial conditions.
+
+Boundary behaviour follows from the existing ghost-cell conventions: WALL
+ghosts mirror tangential velocity and scalars (zero-flux / free-slip) and
+negate the normal momentum (impermeability), which is exactly the classic
+Straka setup.
+
+Stability: explicit Euler needs K*dt*(1/dx^2 + 1/dy^2 [+ 1/dz^2]) <= 1/2;
+the Straka regression case sits two orders of magnitude below this.
+
+Enabled per-case via ``ud.diffusion = True`` + ``ud.diffusion_coeff = K``
+(non-dimensional: K_phys * t_ref / h_ref**2). Off by default — existing
+cases are untouched.
+"""
+
+import numba as nb
+import numpy as np
+
+from ..utils.boundary import cell_boundary as bdry_c
+
+
+@nb.njit(cache=True)
+def _laplacian_2d(f, dx, dy):
+    lap = np.zeros_like(f)
+    lap[1:-1, 1:-1] = (f[2:, 1:-1] - 2.0 * f[1:-1, 1:-1] + f[:-2, 1:-1]) / (dx * dx) + (
+        f[1:-1, 2:] - 2.0 * f[1:-1, 1:-1] + f[1:-1, :-2]
+    ) / (dy * dy)
+    return lap
+
+
+@nb.njit(cache=True)
+def _laplacian_3d(f, dx, dy, dz):
+    lap = np.zeros_like(f)
+    lap[1:-1, 1:-1, 1:-1] = (
+        (f[2:, 1:-1, 1:-1] - 2.0 * f[1:-1, 1:-1, 1:-1] + f[:-2, 1:-1, 1:-1]) / (dx * dx)
+        + (f[1:-1, 2:, 1:-1] - 2.0 * f[1:-1, 1:-1, 1:-1] + f[1:-1, :-2, 1:-1])
+        / (dy * dy)
+        + (f[1:-1, 1:-1, 2:] - 2.0 * f[1:-1, 1:-1, 1:-1] + f[1:-1, 1:-1, :-2])
+        / (dz * dz)
+    )
+    return lap
+
+
+def _laplacian(f, elem):
+    if elem.ndim == 2:
+        return _laplacian_2d(f, elem.dx, elem.dy)
+    return _laplacian_3d(f, elem.dx, elem.dy, elem.dz)
+
+
+def apply(mem, ud, dt):
+    """One explicit diffusion step on velocity and theta'; updates mem.sol in place."""
+    K = ud.diffusion_coeff
+    sol = mem.sol
+    elem = mem.elem
+
+    u = sol.rhou / sol.rho
+    v = sol.rhov / sol.rho
+    theta = sol.rhoY / sol.rho
+
+    # background theta profile on cells (S0 = 1/theta_bar)
+    theta_bar = 1.0 / mem.npf.HydroState.get_S0c(elem)
+
+    u = u + dt * K * _laplacian(u, elem)
+    v = v + dt * K * _laplacian(v, elem)
+    theta = theta + dt * K * _laplacian(theta - theta_bar, elem)
+
+    if elem.ndim == 3:
+        w = sol.rhow / sol.rho
+        w = w + dt * K * _laplacian(w, elem)
+
+    # thermal diffusion at fixed P = rhoY: fold the new theta into rho
+    sol.rho[...] = sol.rhoY / theta
+    sol.rhou[...] = sol.rho * u
+    sol.rhov[...] = sol.rho * v
+    if elem.ndim == 3:
+        sol.rhow[...] = sol.rho * w
+
+    bdry_c.set_ghost_cells(mem, ud)
diff --git a/src/pybella/flow_solver/numerics/explicit_advection/advective_flux.py b/src/pybella/flow_solver/numerics/explicit_advection/advective_flux.py
index ecc7ef89..6e11ef81 100644
--- a/src/pybella/flow_solver/numerics/explicit_advection/advective_flux.py
+++ b/src/pybella/flow_solver/numerics/explicit_advection/advective_flux.py
@@ -26,13 +26,31 @@ def recompute(mem, **kwargs):
 
     flux = mem.cache.get_flux_containers(mem.elem)
 
+    # terrain metric; recompute is only called in the unflipped orientation
+    # (time_update, between sweeps), so component i matches array axis i
+    metric = mem.elem.metric
+
     for i, (comp, rho_comp) in enumerate(zip(components, rho_components)):
         # Use provided velocity or compute from momentum
         if comp in kwargs:
             rhoY_vel = kwargs[comp]
         else:
             momentum = getattr(mem.sol, rho_comp)
-            rhoY_vel = mem.sol.rhoY * momentum / mem.sol.rho
+            if metric is not None and i == metric.vaxis:
+                # contravariant vertical mass flux rhoY*(w - G.u_h)/rho
+                # (J * eta_dot — what actually crosses an eta-face)
+                a_h1, a_h2 = metric.haxes
+                momentum = momentum - metric.G1 * getattr(mem.sol, rho_components[a_h1])
+                if metric.G2 is not None:
+                    momentum = momentum - metric.G2 * getattr(
+                        mem.sol, rho_components[a_h2]
+                    )
+                rhoY_vel = mem.sol.rhoY * momentum / mem.sol.rho
+            elif metric is not None:
+                # horizontal mass fluxes carry the Jacobian (face-area weight)
+                rhoY_vel = metric.J * mem.sol.rhoY * momentum / mem.sol.rho
+            else:
+                rhoY_vel = mem.sol.rhoY * momentum / mem.sol.rho
 
         # Apply directional convolution
         flux[i].rhoY[inner_idx] = convolution.apply_directional_convolution(
diff --git a/src/pybella/flow_solver/numerics/explicit_advection/compute_advection.py b/src/pybella/flow_solver/numerics/explicit_advection/compute_advection.py
index 09f7dfe9..323092d0 100644
--- a/src/pybella/flow_solver/numerics/explicit_advection/compute_advection.py
+++ b/src/pybella/flow_solver/numerics/explicit_advection/compute_advection.py
@@ -1,10 +1,25 @@
 import numba as nb
 
+from ....utils import axes
 from ....utils.slices import get_neighbor_indices
 from ...utils.boundary import cell_boundary as bdry_c
 from . import recovery, riemann_solver
 
 
+def _flip_forward(mem):
+    """Flip the solution AND the terrain metric so ghost-cell fills and
+    flux kernels inside a sweep see consistently oriented arrays."""
+    mem.sol.flip_forward()
+    if mem.elem.metric is not None:
+        mem.elem.metric.flip_forward()
+
+
+def _flip_backward(mem):
+    mem.sol.flip_backward()
+    if mem.elem.metric is not None:
+        mem.elem.metric.flip_backward()
+
+
 def strange_splitting(mem, ud, dt, odd, label, writer=None):
     """
     Concise implementation of Strang-splitting advection.
@@ -44,7 +59,7 @@ def first_order_runge_kutta(mem, ud, dt):
     # Compute fluxes for all dimensions
     for split in range(ndim):
         lmbda = time_step / mem.elem.dxyz[split]
-        mem.sol.flip_forward()
+        _flip_forward(mem)
         if mem.elem.iisc[split] > 1:
             flux[split] = _explicit_step_and_flux(mem, ud, lmbda, split, tag="rk")
 
@@ -53,21 +68,27 @@ def first_order_runge_kutta(mem, ud, dt):
 
     # Apply flux updates for all dimensions
     for dim in range(ndim):
-        _apply_dimensional_flux_update(mem, dim, time_step, left_idx, right_idx)
+        _apply_dimensional_flux_update(mem, ud, dim, time_step, left_idx, right_idx)
 
     bdry_c.set_ghost_cells(mem, ud)
 
 
-def _update_solution_variables(sol, flux, lmbda, left_idx, right_idx, variables=None):
+def _update_solution_variables(
+    sol, flux, lmbda, left_idx, right_idx, variables=None, ooJ=None
+):
     """
     Helper function to update solution variables with flux differences.
 
+    ooJ: terrain inverse Jacobian (sweep-oriented); the finite-volume cell
+    measure is J * dxi, so metric flux differences are divided by J.
     """
     if variables is None:
         variables = ["rho", "rhou", "rhov", "rhow", "rhoX", "rhoY"]
 
     for var in variables:
         flux_diff = getattr(flux, var)[left_idx] - getattr(flux, var)[right_idx]
+        if ooJ is not None:
+            flux_diff = ooJ * flux_diff
         current_val = getattr(sol, var)
         setattr(sol, var, current_val + lmbda * flux_diff)
 
@@ -88,7 +109,8 @@ def _explicit_step_and_flux(mem, ud, lmbda, split_step, tag=None):
     left_idx, right_idx = get_neighbor_indices(mem.elem.ndim)
 
     if tag != "rk":
-        _update_solution_variables(mem.sol, flux, lmbda, left_idx, right_idx)
+        ooJ = mem.elem.metric.ooJ if mem.elem.metric is not None else None
+        _update_solution_variables(mem.sol, flux, lmbda, left_idx, right_idx, ooJ=ooJ)
 
     if tag == "rk":
         return flux
@@ -109,18 +131,19 @@ def _compute_flux_and_recovery(mem, flux, ud, lmbda, split_step, tag=None):
     return flux
 
 
-def _apply_dimensional_flux_update(mem, dim, time_step, left_idx, right_idx):
+def _apply_dimensional_flux_update(mem, ud, dim, time_step, left_idx, right_idx):
     """
     Apply flux update for a specific dimension.
     """
     lmbda = time_step / mem.elem.dxyz[dim]
-    mem.sol.flip_forward()
+    _flip_forward(mem)
     flux = mem.cache.get_flux_containers(mem.elem)[dim]
 
-    _update_solution_variables(mem.sol, flux, lmbda, left_idx, right_idx)
+    ooJ = mem.elem.metric.ooJ if mem.elem.metric is not None else None
+    _update_solution_variables(mem.sol, flux, lmbda, left_idx, right_idx, ooJ=ooJ)
 
-    # Handle special case for vertical axis
-    if dim == 1:
+    # Handle special case for the vertical axis
+    if dim == axes.vertical_axis(ud):
         updt = lmbda * (flux.rhoX[left_idx] - flux.rhoX[right_idx])
         setattr(mem.sol, "pwchi", updt)
 
@@ -143,9 +166,9 @@ def _perform_dimensional_sweep(mem, ud, time_step, reverse=False, diagnostics=No
         if reverse:
             if elem.iisc[split] > 1:
                 _explicit_step_and_flux(mem, ud, lmbda, split, diagnostics)
-            Sol.flip_backward()
+            _flip_backward(mem)
         else:
-            Sol.flip_forward()
+            _flip_forward(mem)
             if elem.iisc[split] > 1:
                 _explicit_step_and_flux(mem, ud, lmbda, split, diagnostics)
 
diff --git a/src/pybella/flow_solver/numerics/explicit_advection/recovery.py b/src/pybella/flow_solver/numerics/explicit_advection/recovery.py
index 9e2b82ae..8587f26b 100644
--- a/src/pybella/flow_solver/numerics/explicit_advection/recovery.py
+++ b/src/pybella/flow_solver/numerics/explicit_advection/recovery.py
@@ -30,6 +30,11 @@ def compute(mem, flux, ud, lmbda, split_step, tag=None):
         * (flux.rhoY[face_inner_idx][lefts_idx] + flux.rhoY[face_inner_idx][rights_idx])
         / mem.sol.rhoY[face_inner_idx]
     )
+    if mem.elem.metric is not None:
+        # the metric mass fluxes carry J (horizontal) / J*eta_dot (vertical);
+        # the slope-transport Courant velocity is the coordinate velocity,
+        # so divide J back out (metric is sweep-oriented alongside sol)
+        u *= mem.elem.metric.ooJ
 
     shape = mem.sol.u.shape
 
diff --git a/src/pybella/flow_solver/numerics/explicit_euler.py b/src/pybella/flow_solver/numerics/explicit_euler.py
index 44f79d14..0793ab0f 100644
--- a/src/pybella/flow_solver/numerics/explicit_euler.py
+++ b/src/pybella/flow_solver/numerics/explicit_euler.py
@@ -1,6 +1,8 @@
 import numpy as np
 
+from ...utils import axes
 from ...utils.operators import convolution, divergence, gradient
+from ..discretisation import terrain
 from ..utils.boundary import cell_boundary as bdry_c
 from ..utils.boundary import node_boundary as bdry_n
 from ..utils.boundary import common as bdry
@@ -12,9 +14,15 @@ def do_forward_step(mem, ud, dt, writer=None, label=None, debug=False):
     ndim = elem.ndim
 
     nonhydro = ud.nonhydrostasy
-    g, Msq = ud.gravity_strength[1], ud.Msq
+    g, Msq = ud.gravity_strength[axes.vertical_axis(ud)], ud.Msq
     Ginv = th.Gammainv
-    corr_h1, corr_v, corr_h2 = ud.coriolis_strength
+    # role-ordered Coriolis components (h1, v, h2); identity for vertical = 1
+    ax_h1, ax_v, ax_h2 = axes.role_perm(axes.vertical_axis(ud))
+    corr_h1, corr_v, corr_h2 = (
+        ud.coriolis_strength[ax_h1],
+        ud.coriolis_strength[ax_v],
+        ud.coriolis_strength[ax_h2],
+    )
     u0, v0, w0 = ud.u_wind_speed, ud.v_wind_speed, ud.w_wind_speed
 
     # Reusable derived quantities
@@ -45,36 +53,57 @@ def do_forward_step(mem, ud, dt, writer=None, label=None, debug=False):
     rhoYovG = Ginv * rhoY
     dbuoy = rhoY * (rhoX / rho)
 
-    # Pressure gradients
+    # Pressure gradients (physical: terrain slope/Jacobian correction via A)
     dpdx, dpdy, dpdz = gradient.compute_at_nodes(p2n, ndim, node.dxyz)
+    if elem.metric is not None:
+        dpdx, dpdy, dpdz = terrain.apply_gradient_map(elem.metric, [dpdx, dpdy, dpdz])
 
     # Wind perturbations
     drhou = rhou - u0 * rho
     drhov = rhov - v0 * rho
     drhow = rhow - w0 * rho
-    v = rhov / rho
 
-    # Momentum update (u, v, w)
-    rhou -= dt * (rhoYovG * dpdx - corr_h2 * drhov + corr_v * drhow)
-    rhov -= (
+    # role-ordered views (h1, v, h2) of the axis-indexed component tuples;
+    # in-place updates below mutate the underlying sol arrays
+    mom = (rhou, rhov, rhow)
+    dmom = (drhou, drhov, drhow)
+    dpd = (dpdx, dpdy, dpdz)
+    mom_h1, mom_v, mom_h2 = mom[ax_h1], mom[ax_v], mom[ax_h2]
+    dm_h1, dm_v, dm_h2 = dmom[ax_h1], dmom[ax_v], dmom[ax_h2]
+    dp_h1, dp_v, dp_h2 = dpd[ax_h1], dpd[ax_v], dpd[ax_h2]
+
+    vel_v = mom_v / rho
+
+    # Momentum update in role space: gravity/buoyancy acts on the vertical
+    # row, Coriolis couples the rows pairwise (cross-product structure)
+    mom_h1 -= dt * (rhoYovG * dp_h1 - corr_h2 * dm_v + corr_v * dm_h2)
+    mom_v -= (
         dt
         * (
-            rhoYovG * dpdy
+            rhoYovG * dp_v
             + (g / Msq) * dbuoy * nonhydro
-            - corr_h1 * drhow
-            + corr_h2 * drhou
+            - corr_h1 * dm_h2
+            + corr_h2 * dm_h1
         )
         * (1 - ud.is_ArakawaKonor)
     )
 
-    if ndim == 3:
-        rhow -= dt * (rhoYovG * dpdz - corr_v * drhou + corr_h1 * drhov)
-
-    # Scalar update (rhoX)
-    sol.rhoX[...] = (rho * (rho / rhoY - S0c)) - dt * (v * dSdy) * rho
-
-    # Compressibility correction to p2
-    dp2n[node.i1] -= dt * dpidP * npf.rhs
+    # the h2-row applies in 2D too: its pressure gradient is zero there, but
+    # the Coriolis terms are not. Restricting it to ndim == 3 gave 2D runs
+    # only the implicit half of the out-of-plane Coriolis rotation — found
+    # 2026-06-09 by the Baldauf-Brdar analytic oracle (w_out error pinned at
+    # ~0.44 rel-L2 with a sim/ref amplitude ratio ~0.6, independent of dt).
+    mom_h2 -= dt * (rhoYovG * dp_h2 - corr_v * dm_h1 + corr_h1 * dm_v)
+
+    # Scalar update (rhoX): stratification couples to the vertical velocity
+    sol.rhoX[...] = (rho * (rho / rhoY - S0c)) - dt * (vel_v * dSdy) * rho
+
+    # Compressibility correction to p2; with terrain npf.rhs carries J*div F,
+    # so the pointwise pi update needs the plain divergence back (1/J_n)
+    if node.metric is not None:
+        dp2n[node.i1] -= dt * dpidP * (npf.rhs * node.metric.ooJ[node.i1])
+    else:
+        dp2n[node.i1] -= dt * dpidP * npf.rhs
     npf.p2_nodes += ud.compressibility * dp2n
 
     # Boundary conditions
diff --git a/src/pybella/flow_solver/numerics/implicit_euler.py b/src/pybella/flow_solver/numerics/implicit_euler.py
index 7fa9ba2b..bca75e7f 100644
--- a/src/pybella/flow_solver/numerics/implicit_euler.py
+++ b/src/pybella/flow_solver/numerics/implicit_euler.py
@@ -1,8 +1,10 @@
 import numpy as np
 import scipy as sp
 
+from ...utils import axes
 from ...utils.operators import convolution, divergence, gradient
 from ...utils.operators.laplacian import preconditioner, lap2D_manual, lap3D
+from ..discretisation import terrain
 from ..utils.boundary import cell_boundary as bdry_c
 from ..utils.boundary import node_boundary as bdry_n
 from ..utils.boundary import common as bdry
@@ -25,11 +27,12 @@ def __call__(self, rk=None):
 
 def do_explicit_part(mem, ud, dt):
     nonhydro = ud.nonhydrostasy
-    g = ud.gravity_strength[1]
+    g = ud.gravity_strength[axes.vertical_axis(ud)]
     Msq = ud.Msq
 
     dbuoy = mem.sol.rhoY * (mem.sol.rhoX / mem.sol.rho)
-    mem.sol.rhov = (nonhydro * mem.sol.rhov) - dt * (g / Msq) * dbuoy
+    vmom = axes.vertical_momentum(ud)
+    setattr(mem.sol, vmom, (nonhydro * getattr(mem.sol, vmom)) - dt * (g / Msq) * dbuoy)
 
     mem.sol.mod_bg_wind(ud, -1.0)
 
@@ -120,12 +123,16 @@ def write_debug_data(key, data):
 
 
 def _correction_nodes(mem, ud, dt, p, updt_chi):
-    ndim = mem.node.ndim
     Gammainv = mem.th.Gammainv
 
     dSdy = mem.npf.HydroState_n.get_dSdy(mem.elem, mem.node)
 
     Dpx, Dpy, Dpz = gradient.compute_at_nodes(p, mem.elem.ndim, mem.node.dxyz)
+    if mem.elem.metric is not None:
+        # physical gradients via the terrain map A — the same correction the
+        # elliptic operator's C_ij coefficients encode, so the projection
+        # annihilates exactly the divergence it measures
+        Dpx, Dpy, Dpz = terrain.apply_gradient_map(mem.elem.metric, [Dpx, Dpy, Dpz])
 
     thinv = mem.sol.rho / mem.sol.rhoY
 
@@ -140,8 +147,13 @@ def _correction_nodes(mem, ud, dt, p, updt_chi):
 
     mem.sol.rhou += thinv * mem.npf.u
     mem.sol.rhov += thinv * mem.npf.v
-    mem.sol.rhow += thinv * mem.npf.w if ndim == 3 else 0.0
-    mem.sol.rhoX += -updt_chi * dt * dSdy * mem.sol.rhov
+    # the w-row applies in 2D too: H^-1 rotates the pressure correction into
+    # the out-of-plane momentum whenever Coriolis is active. Restricting it
+    # to ndim == 3 dropped that component in 2D runs (implicit-side sibling
+    # of the explicit-step defect fixed 2026-06-09; quantified at 1.4e-4 by
+    # the 3D-vs-2D full-Coriolis oracle).
+    mem.sol.rhow += thinv * mem.npf.w
+    mem.sol.rhoX += -updt_chi * dt * dSdy * getattr(mem.sol, axes.vertical_momentum(ud))
 
 
 def operator_coefficients_nodes(mem, ud, dt):
@@ -163,6 +175,12 @@ def operator_coefficients_nodes(mem, ud, dt):
         mem.sol.rhoY**cexp, kernel
     )
 
+    if mem.node.metric is not None:
+        # with terrain the solved equation is J * (Helmholtz): the rhs
+        # carries J*div F, the C_ij tensor carries J — the pointwise center
+        # term needs its J too (node-exact, not averaged)
+        mem.npf.wcenter *= mem.node.metric.J[mem.node.i1]
+
     if not hasattr(ud, "ATMOSPHERIC_EXTENSION"):
         bdry.scale_wall_node_values(mem.npf.wcenter, mem.node, ud)
 
@@ -193,13 +211,42 @@ def _prepare_linear_system(mem, ud, dt):
 
 
 def _prepare_2d_system(mem, ud, dt):
-    """Prepare 2D linear system."""
+    """Prepare 2D linear system.
+
+    The lap2D kernel's effective coefficients are wplus ⊙ c (c = the four
+    coefficient arrays passed as ``coriolis_params``), so with terrain the
+    2x2 tensor M = J A^T H^-1 A is folded into c — mirroring the 3D
+    ``cij = wplus[i] * h_role[i][j]`` — while wplus stays separate to keep
+    the kernel's wall coefficient-zeroing intact. ``multiply_inverse_terms``
+    returns the (h1, v) block TRANSPOSED (C-ravel of the transpose == the
+    F-ravel the wplus arrays get), so the fold un-transposes around
+    ``elliptic_tensor_2d``.
+    """
     Vec = mem.npf
     coriolis_params = coriolis.multiply_inverse_terms(
         Vec, mem, ud, dt, attrs=("u", "v", "w"), get_coeffs=True
     )
 
-    diag_inv = preconditioner.prepare_diag(mem.npf, mem.node)
+    if mem.elem.metric is not None:
+        h11_t, h22_t, h12_t, h21_t = coriolis_params
+        h2x2 = ((h11_t.T, h12_t.T), (h21_t.T, h22_t.T))
+        M = terrain.elliptic_tensor_2d(mem.elem.metric, h2x2)
+        coriolis_params = (M[0][0].T, M[1][1].T, M[0][1].T, M[1][0].T)
+        # diag: fold only the geometric factors (the legacy 2D preconditioner
+        # keeps H^-1 out of the diagonal — preserved here so a forced-flat
+        # metric preconditions bit-identically to the plain path)
+        met = mem.elem.metric
+        diag_inv = preconditioner.prepare_diag(
+            mem.npf,
+            mem.node,
+            cii=(
+                mem.npf.wplus[0] * met.J,
+                mem.npf.wplus[1] * (1.0 + met.G1 * met.G1) * met.ooJ,
+                None,
+            ),
+        )
+    else:
+        diag_inv = preconditioner.prepare_diag(mem.npf, mem.node)
     mem.npf.rhs *= diag_inv
 
     p2 = mem.npf.p2_nodes[mem.node.i2].T
@@ -213,19 +260,45 @@ def _prepare_2d_system(mem, ud, dt):
 
 
 def _prepare_3d_system(mem, ud, dt):
-    """Prepare 3D linear system."""
-    # Note: diag_inv appears to be used but not defined in 3D case
-    # This might be a bug in the original code
-    diag_inv = None  # TODO: Verify if this should be computed for 3D
+    """Prepare 3D linear system.
+
+    The solve vector is the full node.isc box (interior nodes plus one
+    ghost layer per side) in C order. The ghost ring carries zero operator
+    rows and zero rhs entries, so it stays exactly zero through BiCGSTAB.
+
+    The operator carries the full H^-1 tensor coefficients C_ij =
+    (Gamma^-1 P Theta) * h[role(i), role(j)] — the same H^-1 applied by
+    _correction_nodes — so the elliptic solve is consistent with the
+    momentum correction (the legacy operator had only ad-hoc x-z cross
+    terms). With no rotation and no buoyancy H^-1 is the identity and the
+    operator reduces bit-exactly to the plain Laplacian.
+    """
+    hv = coriolis.compute_inverse_coefficients(mem, ud, dt)
+    h_role = ((hv[0], hv[1], hv[2]), (hv[3], hv[4], hv[5]), (hv[6], hv[7], hv[8]))
+    if mem.elem.metric is not None:
+        # terrain: the operator tensor is J A^T H^-1 A — the metric map A of
+        # the momentum correction composed with the J-weighted divergence
+        h_role = terrain.elliptic_tensor(mem.elem.metric, h_role)
+    rho_of = axes.role_of_axis(axes.vertical_axis(ud))
+    cij = [
+        [mem.npf.wplus[i] * h_role[rho_of[i]][rho_of[j]] for j in range(3)]
+        for i in range(3)
+    ]
+
+    diag_inv = preconditioner.prepare_diag(
+        mem.npf, mem.node, cii=(cij[0][0], cij[1][1], cij[2][2])
+    )
+    mem.npf.rhs *= diag_inv
 
-    lap = lap3D.get_linop(mem.elem, mem.node, mem.npf, ud, diag_inv, dt)
-    p2 = mem.npf.p2_nodes  # Define p2 for 3D case
-    sh = p2.reshape(-1).shape[0]
+    lap = lap3D.get_linop(mem.elem, mem.node, mem.npf, ud, diag_inv, dt, cij)
+    sh = mem.npf.rhs.size
 
-    lap = sp.sparse.linalg.LinearOperator((sh, sh), lap)
-    rhs_inner = mem.npf.rhs[mem.node.i1].ravel()
+    lap = sp.sparse.linalg.LinearOperator((sh, sh), lap, dtype=np.float64)
 
-    return lap, rhs_inner, sh
+    rhs_inner = np.zeros_like(mem.npf.rhs)
+    rhs_inner[mem.node.i1] = mem.npf.rhs[mem.node.i1]
+
+    return lap, rhs_inner.ravel()
 
 
 def _reshape_solution(p2, mem, ud, nc):
@@ -236,7 +309,7 @@ def _reshape_solution(p2, mem, ud, nc):
 
     if mem.elem.ndim == 2:
         p2_full[mem.node.i2] = p2.reshape(mem.npf.rhs[mem.node.i1].T.shape).T
-    else:  # 3D case
-        p2_full[mem.node.i1] = p2.reshape(ud.inx + 2, ud.iny + 2, ud.inz + 2)
+    else:  # 3D case: solution vector is the C-ordered node.isc box
+        p2_full[mem.node.i1] = p2.reshape(mem.npf.rhs.shape)
 
     return p2_full
diff --git a/src/pybella/flow_solver/physics/cfl.py b/src/pybella/flow_solver/physics/cfl.py
index 2274e72f..e4f7037d 100644
--- a/src/pybella/flow_solver/physics/cfl.py
+++ b/src/pybella/flow_solver/physics/cfl.py
@@ -31,15 +31,35 @@ def dynamic_timestep(Sol, time, time_output, elem, ud, th, step):
     v = np.abs(Sol.rhov / Sol.rho)
     w = np.abs(Sol.rhow / Sol.rho)
 
+    # terrain: the vertical coordinate velocity is eta_dot = (w - G.u_h)/J
+    # and the vertical signal speed gains the slope/Jacobian factor; the
+    # metric is in the unflipped orientation here (called between steps)
+    c_vert = c
+    if elem.metric is not None:
+        m = elem.metric
+        moms = (Sol.rhou, Sol.rhov, Sol.rhow)
+        contra = moms[m.vaxis] - m.G1 * moms[m.haxes[0]]
+        slope_sq = m.G1**2
+        if m.G2 is not None:
+            contra = contra - m.G2 * moms[m.haxes[1]]
+            slope_sq = slope_sq + m.G2**2
+        vels = [u, v, w]
+        vels[m.vaxis] = np.abs(contra / Sol.rho) * m.ooJ
+        u, v, w = vels
+        c_vert = c * np.sqrt(1.0 + slope_sq) * m.ooJ
+
     # Find maximum velocities (with minimum threshold)
     u_max = max(u.max(), machine_epsilon)
     v_max = max(v.max(), machine_epsilon)
     w_max = max(w.max(), machine_epsilon)
 
     # Calculate acoustic velocities
-    upc_max = max((u + c).max(), machine_epsilon)
-    vpc_max = max((v + c).max(), machine_epsilon)
-    wpc_max = max((w + c).max(), machine_epsilon)
+    cs = [c, c, c]
+    if elem.metric is not None:
+        cs[elem.metric.vaxis] = c_vert
+    upc_max = max((u + cs[0]).max(), machine_epsilon)
+    vpc_max = max((v + cs[1]).max(), machine_epsilon)
+    wpc_max = max((w + cs[2]).max(), machine_epsilon)
 
     if ud.acoustic_timestep == 1:
         return _calculate_acoustic_timestep(
diff --git a/src/pybella/flow_solver/physics/hydrostatics.py b/src/pybella/flow_solver/physics/hydrostatics.py
index b45a92f5..ea901979 100644
--- a/src/pybella/flow_solver/physics/hydrostatics.py
+++ b/src/pybella/flow_solver/physics/hydrostatics.py
@@ -1,10 +1,14 @@
 import numpy as np
 import numba as nb
 
+from ...utils import axes
 from ..utils.boundary import node_boundary as bdry_n
 
 
 def column(HydroState, HydroState_n, Y, Y_n, elem, node, th, ud):
+    """2D x-y initial-condition helper (vertical = axis 1 by convention)."""
+    assert elem.ndim == 2, "column() is a 2D x-y IC helper"
+    assert elem.metric is None, "column() does not support terrain"
     Gamma = th.gm1 / th.gamm
     gamm = th.gamm
     gm1 = th.gm1
@@ -21,7 +25,7 @@ def column(HydroState, HydroState_n, Y, Y_n, elem, node, th, ud):
 
     rhoY0 = 1.0
 
-    g = ud.gravity_strength[1]
+    g = ud.gravity_strength[axes.vertical_axis(ud)]
 
     p0 = rhoY0**gamm
     pi0 = rhoY0**gm1
@@ -92,16 +96,24 @@ def integrated_state(npf, elem, node, th, ud):
     Gamma_inv = 1.0 / Gamma
     gm1_inv = 1.0 / gm1
 
-    # Grid parameters
-    icy = elem.icy
-    igy = elem.igy
+    # Grid parameters along the vertical axis
+    vv = axes.vertical_axis(ud)
+    icy = elem.sc[vv]
+    igy = elem.igs[vv]
+    dyv = elem.dxyz[vv]
+    y_c = axes.coords_along(elem, vv)
+    y_n = axes.coords_along(node, vv)
 
     # Reference state at y=0
     rhoY0 = 1.0
-    g = ud.gravity_strength[1]
+    g = ud.gravity_strength[axes.vertical_axis(ud)]
     p0 = rhoY0**gamm
     pi0 = rhoY0**gm1
 
+    if g != 0.0 and elem.metric is not None:
+        _integrated_state_fields(npf, elem, node, th, ud)
+        return
+
     if g != 0.0:
         ###########################
         # Update cell hydrostates
@@ -110,16 +122,16 @@ def integrated_state(npf, elem, node, th, ud):
         # Define midpoint quadrature along vertical (y-axis)
         dys = np.hstack(
             (
-                np.ones(igy - 1) * -elem.dy,
-                [-elem.dy / 2],
-                [elem.dy / 2],
-                np.ones(icy - 3) * elem.dy,
+                np.ones(igy - 1) * -dyv,
+                [-dyv / 2],
+                [dyv / 2],
+                np.ones(icy - 3) * dyv,
             )
         )
 
         # Cell centers and midpoints for integration
-        y_ps = elem.y
-        y_ms = np.hstack((elem.y[1:igy], node.y[igy], node.y[igy], elem.y[igy:-1]))
+        y_ps = y_c
+        y_ms = np.hstack((y_c[1:igy], y_n[igy], y_n[igy], y_c[igy:-1]))
 
         # Get inverse stratification at each point
         S_ps = 1.0 / ud.stratification(y_ps)
@@ -160,19 +172,19 @@ def integrated_state(npf, elem, node, th, ud):
 
         # Ghost cells below bottom (negative heights)
         Sn_integral_p = np.zeros(igy)
-        yn_p = node.y[:igy] - node.dy
-        yn_m = node.y[1 : igy + 1] - node.dy
+        yn_p = y_n[:igy] - dyv
+        yn_m = y_n[1 : igy + 1] - dyv
 
-        Sn_integral_p[:] = -node.dy * 1.0 / ud.stratification(0.5 * (yn_p + yn_m))
+        Sn_integral_p[:] = -dyv * 1.0 / ud.stratification(0.5 * (yn_p + yn_m))
         Sn_integral_p = np.cumsum(Sn_integral_p[:igy][::-1])[::-1]
 
         # Bulk domain above reference level
-        yn_p = node.y[igy + 1 :]
+        yn_p = y_n[igy + 1 :]
         yn_m = np.zeros_like(yn_p)
         yn_m[1:] = yn_p[:-1]
 
         Sn_p = 1.0 / ud.stratification(0.5 * (yn_p + yn_m))
-        Sn_integral_p = np.hstack((Sn_integral_p, np.cumsum(elem.dy * Sn_p)))
+        Sn_integral_p = np.hstack((Sn_integral_p, np.cumsum(dyv * Sn_p)))
 
         # Calculate nodal hydrostatic fields
         pi_hydro_n = pi0 - Gamma * g * Sn_integral_p
@@ -181,7 +193,7 @@ def integrated_state(npf, elem, node, th, ud):
         # Update node solutions - below reference
         npf.HydroState_n.rhoY0[:igy] = rhoY_hydro_n[:igy]
         npf.HydroState_n.Y0[: igy + 1] = ud.stratification(
-            0.5 * (y_ps[: igy + 1] + y_ps[: igy + 1] - elem.dy)
+            0.5 * (y_ps[: igy + 1] + y_ps[: igy + 1] - dyv)
         )
         npf.HydroState_n.rho0[:igy] = rhoY_hydro_n[:igy] / npf.HydroState_n.Y0[:igy]
         npf.HydroState_n.S0[:igy] = 1.0 / npf.HydroState_n.Y0[:igy]
@@ -191,7 +203,7 @@ def integrated_state(npf, elem, node, th, ud):
         # Update node solutions - above reference
         npf.HydroState_n.rhoY0[igy + 1 :] = rhoY_hydro_n[igy:]
         npf.HydroState_n.Y0[igy + 1 :] = ud.stratification(
-            0.5 * (y_ps[igy:] + y_ps[igy:] + elem.dy)
+            0.5 * (y_ps[igy:] + y_ps[igy:] + dyv)
         )
         npf.HydroState_n.rho0[igy + 1 :] = (
             rhoY_hydro_n[igy:] / npf.HydroState_n.Y0[igy + 1 :]
@@ -218,17 +230,77 @@ def integrated_state(npf, elem, node, th, ud):
         npf.HydroState_n.S0[:] = 1.0
 
 
+def _integrated_state_fields(npf, elem, node, th, ud):
+    """Terrain branch of integrated_state: hydrostates as per-column fields.
+
+    The Exner pressure follows from quadrature of the inverse stratification
+    on a fine auxiliary 1D z-grid (the background depends on physical height
+    only), evaluated at the cell/node height fields z(xi, eta) by linear
+    interpolation. The pi reference (rhoY = 1) sits at z = 0, matching the
+    profile branch.
+    """
+    Gamma = th.gm1 / th.gamm
+    Gamma_inv = 1.0 / Gamma
+    gm1_inv = 1.0 / th.gm1
+
+    vv = axes.vertical_axis(ud)
+    g = ud.gravity_strength[vv]
+
+    z_c = elem.metric.z
+    z_n = node.metric.z
+    z_lo = min(z_c.min(), z_n.min(), 0.0)
+    z_hi = max(z_c.max(), z_n.max(), 0.0)
+    nfine = max(2048, 16 * int(elem.sc[vv]))
+    zf = np.linspace(z_lo, z_hi, nfine)
+
+    Sf = 1.0 / ud.stratification(zf)
+    integral = np.concatenate(
+        ([0.0], np.cumsum(0.5 * (Sf[1:] + Sf[:-1]) * np.diff(zf)))
+    )
+    integral -= np.interp(0.0, zf, integral)
+
+    rhoY0 = 1.0
+    pi0 = rhoY0**th.gm1
+
+    for states, z in ((npf.HydroState, z_c), (npf.HydroState_n, z_n)):
+        pi = pi0 - Gamma * g * np.interp(z, zf, integral)
+        S = 1.0 / ud.stratification(z)
+        states.rhoY0[...] = pi**gm1_inv
+        states.p0[...] = pi**Gamma_inv
+        states.p20[...] = pi / ud.Msq
+        states.S0[...] = S
+        states.S10[...] = 0.0
+        states.Y0[...] = 1.0 / S
+        states.rho0[...] = states.rhoY0 * S
+
+
 def analytical_state(npf, elem, node, th, ud):
-    g = ud.gravity_strength[1]
+    """Isothermal hydrostatic background, discrete-exact per cell.
+
+    With terrain the same closed form is evaluated at the physical heights
+    z(xi, eta) with the local vertical cell extent dz = J * deta, so the
+    hydrostates become full per-column fields (States in field mode);
+    without terrain the expressions reduce to the legacy 1D profiles
+    bit-identically.
+    """
+    vv = axes.vertical_axis(ud)
+    g = ud.gravity_strength[vv]
     Gamma = th.Gamma
     Hex = 1.0 / (th.Gamma * g)
-    dy = elem.dy
+    dy = elem.dxyz[vv]
+
+    if elem.metric is not None:
+        z_n, dz_n = node.metric.z, node.metric.J * dy
+        z_c, dz_c = elem.metric.z, elem.metric.J * dy
+    else:
+        z_n, dz_n = axes.coords_along(node, vv), dy
+        z_c, dz_c = axes.coords_along(elem, vv), dy
 
-    pi_np = np.exp(-(node.y + 0.5 * dy) / Hex)
-    pi_nm = np.exp(-(node.y - 0.5 * dy) / Hex)
-    pi_n = np.exp(-(node.y) / Hex)
+    pi_np = np.exp(-(z_n + 0.5 * dz_n) / Hex)
+    pi_nm = np.exp(-(z_n - 0.5 * dz_n) / Hex)
+    pi_n = np.exp(-(z_n) / Hex)
 
-    Y_n = -Gamma * g * dy / (pi_np - pi_nm)
+    Y_n = -Gamma * g * dz_n / (pi_np - pi_nm)
     P_n = pi_n**th.gm1inv
     p_n = pi_n**th.Gammainv
     rho_n = P_n / Y_n
@@ -240,11 +312,11 @@ def analytical_state(npf, elem, node, th, ud):
     npf.HydroState_n.Y0[...] = Y_n
     npf.HydroState_n.S0[...] = 1.0 / Y_n
 
-    pi_cp = np.exp(-(elem.y + 0.5 * dy) / Hex)
-    pi_cm = np.exp(-(elem.y - 0.5 * dy) / Hex)
-    pi_c = np.exp(-(elem.y) / Hex)
+    pi_cp = np.exp(-(z_c + 0.5 * dz_c) / Hex)
+    pi_cm = np.exp(-(z_c - 0.5 * dz_c) / Hex)
+    pi_c = np.exp(-(z_c) / Hex)
 
-    Y_c = -Gamma * g * dy / (pi_cp - pi_cm)
+    Y_c = -Gamma * g * dz_c / (pi_cp - pi_cm)
     P_c = pi_c**th.gm1inv
     p_c = pi_c**th.Gammainv
     rho_c = P_c / Y_c
@@ -258,6 +330,9 @@ def analytical_state(npf, elem, node, th, ud):
 
 
 def initial_pressure(Sol, npf, elem, node, ud, th):
+    """2D x-y initial-condition helper (vertical = axis 1 by convention)."""
+    assert elem.ndim == 2, "initial_pressure() is a 2D x-y IC helper"
+    assert elem.metric is None, "initial_pressure() does not support terrain"
     Gammainv = th.Gammainv
     igy = node.igy
     igx = node.igx
diff --git a/src/pybella/flow_solver/utils/boundary/cell_boundary.py b/src/pybella/flow_solver/utils/boundary/cell_boundary.py
index 3ca35c60..3f2df998 100644
--- a/src/pybella/flow_solver/utils/boundary/cell_boundary.py
+++ b/src/pybella/flow_solver/utils/boundary/cell_boundary.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+from ....utils import axes
 from ....utils import options as opts
 from .common import get_ghost_padding
 
@@ -15,6 +16,25 @@ def __init__(self, mem, ud):
         self.ud = ud
         self.igs = mem.elem.igs
         self.ndim = mem.elem.ndim
+        # physical vertical axis and the (axis-named) momentum components
+        self.v_phys = axes.vertical_axis(ud)
+        self.vert_mom = axes.MOMENTA[self.v_phys]
+        self.hor_moms = tuple(m for i, m in enumerate(axes.MOMENTA) if i != self.v_phys)
+        # terrain metric (None on uniform-Cartesian runs); during advection
+        # sweeps it is flipped alongside the solution arrays
+        self.metric = mem.elem.metric
+        if self.metric is not None:
+            a_h1, a_h2 = axes.horizontal_axes(self.v_phys)
+            # physical-component momentum names matching G1/G2
+            self.slope_moms = (axes.MOMENTA[a_h1], axes.MOMENTA[a_h2])
+
+    def _slope_terms(self, sol, idx):
+        """G1*mom_h1 + G2*mom_h2 at the given index (terrain only)."""
+        m = self.metric
+        out = m.G1[idx] * getattr(sol, self.slope_moms[0])[idx]
+        if m.G2 is not None:
+            out = out + m.G2[idx] * getattr(sol, self.slope_moms[1])[idx]
+        return out
 
     def apply_no_gravity_boundary(self, sol, current_step, ghost_padding, idx):
         """Apply boundary conditions for axes without gravity."""
@@ -23,14 +43,26 @@ def apply_no_gravity_boundary(self, sol, current_step, ghost_padding, idx):
         if bdry_type == opts.BdryType.PERIODIC:
             _set_boundary(sol, ghost_padding, "wrap", idx)
         elif bdry_type == opts.BdryType.WALL:
-            _set_boundary(sol, ghost_padding, "symmetric", idx)
+            # the wall-normal momentum is the component along the wall axis
+            _set_boundary(
+                sol,
+                ghost_padding,
+                "symmetric",
+                idx,
+                normal_mom=axes.MOMENTA[current_step],
+            )
         elif bdry_type == opts.BdryType.RAYLEIGH:
-            raise AssertionError("Rayleigh boundary not defined on x-direction.")
+            raise AssertionError("Rayleigh boundary only defined on the gravity axis.")
 
     def apply_gravity_boundary(self, sol, dim, ghost_padding, step):
-        """Apply boundary conditions for axes with gravity."""
+        """Apply boundary conditions for axes with gravity.
+
+        ``dim`` is the ARRAY axis of the boundary (during advection sweeps
+        the data is flipped, so it differs from the physical vertical);
+        gravity_strength is indexed by the PHYSICAL axis.
+        """
         gravity_axis = dim
-        g = self.ud.gravity_strength[gravity_axis]
+        g = self.ud.gravity_strength[self.v_phys]
         direction = -1.0
         offset = 0
 
@@ -41,7 +73,7 @@ def apply_gravity_boundary(self, sol, dim, ghost_padding, step):
 
     def _process_ghost_cells_side(self, sol, side, dim, direction, offset, step, g):
         """Process ghost cells for one side of the boundary."""
-        y_axs = self.ndim - 1 if step is not None else 1
+        y_axs = self.ndim - 1 if step is not None else self.v_phys
 
         for current_idx in np.arange(side)[::-1]:
             indices = self._get_gravity_indices(current_idx, direction, offset, y_axs)
@@ -53,7 +85,13 @@ def _process_ghost_cells_side(self, sol, side, dim, direction, offset, step, g):
     def _get_gravity_indices(self, current_idx, direction, offset, y_axs):
         """Get the indices for last, source, and image cells."""
         nlast, nsource, nimage = _get_gravity_padding(
-            self.ndim, current_idx, direction, offset, self.mem.elem, y_axs=y_axs
+            self.ndim,
+            current_idx,
+            direction,
+            offset,
+            self.mem.elem.sc[self.v_phys],
+            self.mem.elem.igs[self.v_phys],
+            y_axs=y_axs,
         )
         return {"last": nlast, "source": nsource, "image": nimage}
 
@@ -65,8 +103,21 @@ def _calculate_ghost_values(self, sol, indices, direction, g, y_axs):
         Y_last = sol.rhoY[nlast] / sol.rho[nlast]
         Y_source = sol.rhoY[nsource] / sol.rho[nsource]
 
-        rhoYv_image = -sol.rhov[nsource] * sol.rhoY[nsource] / sol.rho[nsource]
-        S = 1.0 / self.ud.stratification(self.mem.elem.y[nimage[y_axs]])
+        vert = getattr(sol, self.vert_mom)
+        if self.metric is not None:
+            # the metric must be oriented like the (possibly sweep-flipped)
+            # solution arrays — compute_advection flips them together
+            assert self.metric.vaxis == y_axs, "metric not sweep-oriented"
+            # free slip through the terrain surface: reflect the
+            # CONTRAVARIANT momentum (mom_v - G.mom_h), not the Cartesian one
+            contra_source = vert[nsource] - self._slope_terms(sol, nsource)
+            rhoYv_image = -contra_source * sol.rhoY[nsource] / sol.rho[nsource]
+            # stratification at the PHYSICAL height of the image cell
+            S = 1.0 / self.ud.stratification(self.metric.z[nimage])
+        else:
+            rhoYv_image = -vert[nsource] * sol.rhoY[nsource] / sol.rho[nsource]
+            y_coords = axes.coords_along(self.mem.elem, self.v_phys)
+            S = 1.0 / self.ud.stratification(y_coords[nimage[y_axs]])
 
         # Calculate pressure difference
         dpi = self._calculate_pressure_difference(
@@ -87,9 +138,10 @@ def _calculate_ghost_values(self, sol, indices, direction, g, y_axs):
         return {
             "rho": rho,
             "rhoY": rhoY,
-            "u": sol.rhou[nsource] / sol.rho[nsource],
+            "hor": {
+                m: getattr(sol, m)[nsource] / sol.rho[nsource] for m in self.hor_moms
+            },
             "v": velocities["v"],
-            "w": sol.rhow[nsource] / sol.rho[nsource],
             "X": sol.rhoX[nsource] / sol.rho[nsource],
             "Th_slc": velocities.get("Th_slc", 1.0),
         }
@@ -104,13 +156,14 @@ def _calculate_pressure_difference(
                 - self.mem.npf.HydroState.p20[nlast[y_axs]]
             ) * self.ud.Msq
         else:
-            return (
-                direction
-                * (self.mem.th.Gamma * g)
-                * 0.5
-                * self.mem.elem.dy
-                * (1.0 / Y_last + S)
-            )
+            deta = self.mem.elem.dxyz[self.v_phys]
+            if self.metric is not None:
+                # local vertical cell extent dz = J * deta across the
+                # last -> image interval
+                dz = 0.5 * (self.metric.J[nimage] + self.metric.J[nlast]) * deta
+            else:
+                dz = deta
+            return direction * (self.mem.th.Gamma * g) * 0.5 * dz * (1.0 / Y_last + S)
 
     def _calculate_density_and_mass_fraction(self, sol, nlast, nimage, dpi, S, y_axs):
         """Calculate density and mass fraction for ghost cells."""
@@ -128,13 +181,14 @@ def _calculate_velocities(
         """Calculate velocity components for ghost cells."""
         result = {}
 
+        vert = getattr(sol, self.vert_mom)
         if hasattr(self.ud, "ATMOSPHERIC_EXTENSION"):
             if direction > 0:  # bottom boundary
                 result["v"] = (
-                    sol.rhov[nsource] * Y_source / sol.rho[nsource] * rhoY / Y_image
+                    vert[nsource] * Y_source / sol.rho[nsource] * rhoY / Y_image
                 )
             else:  # top boundary
-                result["v"] = sol.rhov[nsource] * Y_source
+                result["v"] = vert[nsource] * Y_source
             result["Th_slc"] = (
                 rhoY / (rhoY / Y_image) / (sol.rhoY[nsource] / sol.rho[nsource])
             )
@@ -147,22 +201,26 @@ def _calculate_velocities(
     def _assign_ghost_values(self, sol, nimage, ghost_values):
         """Assign calculated values to ghost cells."""
         sol.rho[nimage] = ghost_values["rho"]
-        sol.rhou[nimage] = (
-            ghost_values["rho"] * ghost_values["u"] * ghost_values["Th_slc"]
-        )
-        sol.rhow[nimage] = (
-            ghost_values["rho"] * ghost_values["w"] * ghost_values["Th_slc"]
-        )
+        for m, val in ghost_values["hor"].items():
+            getattr(sol, m)[nimage] = ghost_values["rho"] * val * ghost_values["Th_slc"]
         sol.rhoY[nimage] = ghost_values["rhoY"]
         sol.rhoX[nimage] = ghost_values["rho"] * ghost_values["X"]
 
-        # Handle v-component differently for atmospheric extension
+        # Handle the vertical component differently for atmospheric extension
+        vert = getattr(sol, self.vert_mom)
         if hasattr(self.ud, "ATMOSPHERIC_EXTENSION"):
-            sol.rhov[nimage] = -ghost_values["v"] / (
+            vert[nimage] = -ghost_values["v"] / (
                 ghost_values["rhoY"] / ghost_values["rho"]
             )
+        elif self.metric is not None:
+            # rho*v carries the reflected CONTRAVARIANT momentum; rebuild the
+            # Cartesian vertical momentum with the ghost cell's slope terms
+            # (the horizontal momenta were assigned just above)
+            vert[nimage] = ghost_values["rho"] * ghost_values["v"] + self._slope_terms(
+                sol, nimage
+            )
         else:
-            sol.rhov[nimage] = ghost_values["rho"] * ghost_values["v"]
+            vert[nimage] = ghost_values["rho"] * ghost_values["v"]
 
 
 def set_ghost_cells(mem, ud, step=None, sol=None):
@@ -217,20 +275,25 @@ def _pad_field(sol, field_name, idx, pads, mode):
             field[...] = np.pad(field[idx], pads, mode)
 
 
-def _set_boundary(sol, pads, btype, idx):
+def _set_boundary(sol, pads, btype, idx, normal_mom="rhov"):
     """
-    Functional approach to setting the boundary.
+    Functional approach to setting the boundary. ``normal_mom`` names the
+    wall-normal momentum component (mirrored with a sign flip); historically
+    this was hardcoded to rhov, which broke walls on non-vertical axes.
     """
+    tangential = ["rho", "rhoY", "rhoX"] + [
+        m for m in ("rhou", "rhov", "rhow") if m != normal_mom
+    ]
 
     # Define field groupings for each boundary type
     boundary_specs = {
         "symmetric": [
-            (["rho", "rhou", "rhow", "rhoY", "rhoX"], "symmetric"),
-            (["rhov"], "negative_symmetric"),
+            (tangential, "symmetric"),
+            ([normal_mom], "negative_symmetric"),
         ],
         "constant": [
-            (["rho", "rhou", "rhow", "rhoY", "rhoX"], "symmetric"),
-            (["rhov"], "constant"),
+            (tangential, "symmetric"),
+            ([normal_mom], "constant"),
         ],
         "wrap": [(["rho", "rhou", "rhov", "rhow", "rhoY", "rhoX"], "wrap")],
     }
@@ -273,7 +336,7 @@ def _negative_symmetric(vector, pad_width, iaxis, kwargs=None):
         return vector
 
 
-def _get_gravity_padding(ndim, cur_idx, direction, offset, elem, y_axs=None):
+def _get_gravity_padding(ndim, cur_idx, direction, offset, icv, igv, y_axs=None):
     """
     Parameters
     ----------
@@ -285,14 +348,14 @@ def _get_gravity_padding(ndim, cur_idx, direction, offset, elem, y_axs=None):
         Top of the domain, `direction=+1`, bottom of the domain, `direction=-1`.
     offset : int
         `offset=0`, index starts counting from 0,1.... `offset=1`, index starts counting from -1,-2,..., i.e. end-selection of the array.
-    elem : :class:`discretization.kgrid.ElemSpaceDiscr`
-        Cell grid.
+    icv, igv : int
+        Cell count (incl. ghosts) and ghost count along the gravity axis.
     y_axs : int, optional
         `Default == None`. Specifies the direction of the gravity axis. If `None`, then direction is the the y-axis.
 
     """
     cur_i = np.copy(cur_idx)
-    cur_idx += offset * ((elem.icy - 1) - 2 * cur_idx)
+    cur_idx += offset * ((icv - 1) - 2 * cur_idx)
     gravity_padding = [slice(None)] * ndim
     if y_axs == None:
         y_axs = 1
@@ -301,9 +364,7 @@ def _get_gravity_padding(ndim, cur_idx, direction, offset, elem, y_axs=None):
     nlast[y_axs] = int(cur_idx + direction)
 
     nsource = np.copy(gravity_padding)
-    nsource[y_axs] = int(
-        offset * (elem.icy) + direction * (2 * elem.igy - (1 - offset) - cur_i)
-    )
+    nsource[y_axs] = int(offset * (icv) + direction * (2 * igv - (1 - offset) - cur_i))
 
     nimage = np.copy(gravity_padding)
     nimage[y_axs] = int(cur_idx)
diff --git a/src/pybella/flow_solver/utils/boundary/node_boundary.py b/src/pybella/flow_solver/utils/boundary/node_boundary.py
index f83a7288..e8e70192 100644
--- a/src/pybella/flow_solver/utils/boundary/node_boundary.py
+++ b/src/pybella/flow_solver/utils/boundary/node_boundary.py
@@ -1,4 +1,5 @@
 import numpy as np
+from ....utils import axes
 from ....utils import options as opts
 from .common import get_ghost_padding
 
@@ -14,11 +15,13 @@ def set_ghost_nodes(p, node, ud, igs=None):
         else:  # ud.bdry_type[dim] == opts.BdryType.WALL:
             p[...] = np.pad(p[idx], ghost_padding, "reflect")
 
-    # if periodic_plus_one
-    if node.iicy == 2:  # implying horizontal slices
-        pn = p[:, 2, :]
-        pn = np.expand_dims(pn, axis=1)
-        p[...] = np.repeat(pn, node.icy, axis=1)
+    # quasi-2D: broadcast the single interior layer across any degenerate
+    # axis (historically hardcoded to axis 1 / iicy == 2)
+    for dim in axes.degenerate_axes(node):
+        slc = [slice(None)] * p.ndim
+        slc[dim] = node.igs[dim]
+        pn = np.expand_dims(p[tuple(slc)], axis=dim)
+        p[...] = np.repeat(pn, node.sc[dim], axis=dim)
 
 
 def periodic_plus_one(vector, pad_width, iaxis, kwargs=None):
diff --git a/src/pybella/flow_solver/utils/boundary/rayleigh_boundary.py b/src/pybella/flow_solver/utils/boundary/rayleigh_boundary.py
index 3d18c5de..73a0786d 100644
--- a/src/pybella/flow_solver/utils/boundary/rayleigh_boundary.py
+++ b/src/pybella/flow_solver/utils/boundary/rayleigh_boundary.py
@@ -1,18 +1,29 @@
 import copy
 import numpy as np
+from ....utils import axes
 from ....utils import io
 from ....utils import options as opts
 from . import cell_boundary as bdry_c
 
 
 def get_tau_y(ud, elem, node, alpha):
-    tauc_y = np.zeros_like(elem.y)
-    taun_y = np.zeros_like(node.y)
+    """Top-sponge damping profiles along the configured vertical axis.
 
-    ud.bny = node.y[-ud.inbcy - 3]
+    Profiles are functions of the computational vertical coordinate eta —
+    with terrain this is the correct choice (coordinate surfaces flatten
+    towards the domain top where the sponge lives).
+    """
+    v = axes.vertical_axis(ud)
+    elem_y = axes.coords_along(elem, v)
+    node_y = axes.coords_along(node, v)
 
-    c1n = node.y <= ud.bny
-    ccn = (node.y[:-2] - ud.bny) / (node.y[:-2][-1] - ud.bny)
+    tauc_y = np.zeros_like(elem_y)
+    taun_y = np.zeros_like(node_y)
+
+    ud.bny = node_y[-ud.inbcy - 3]
+
+    c1n = node_y <= ud.bny
+    ccn = (node_y[:-2] - ud.bny) / (node_y[:-2][-1] - ud.bny)
     c2n = np.logical_and(ccn >= 0.0, ccn <= 0.5)
     c3n = np.logical_and(ccn > 0.5, ccn <= 1.0)
 
@@ -22,7 +33,7 @@ def get_tau_y(ud, elem, node, alpha):
         / 2.0
         * (
             1.0
-            - np.cos((node.y[np.where(c2n)] - ud.bny) / (node.y[-1] - ud.bny) * np.pi)
+            - np.cos((node_y[np.where(c2n)] - ud.bny) / (node_y[-1] - ud.bny) * np.pi)
         )
     )
     taun_y[np.where(c3n)] = (
@@ -30,27 +41,32 @@ def get_tau_y(ud, elem, node, alpha):
         / 2.0
         * (
             1.0
-            + ((node.y[np.where(c3n)] - ud.bny) / (node.y[-1] - ud.bny) - 0.5) * np.pi
+            + ((node_y[np.where(c3n)] - ud.bny) / (node_y[-1] - ud.bny) - 0.5) * np.pi
         )
     )
 
     taun_y[-2:] = -np.abs(taun_y).max()
-    tauc_y[...] = np.interp(elem.y, node.y, taun_y)
+    tauc_y[...] = np.interp(elem_y, node_y, taun_y)
 
     return tauc_y, taun_y
 
 
 def get_bottom_tau_y(ud, elem, node, alpha, cutoff=0.5):
-    tauc_y = np.zeros_like(elem.y)
-    taun_y = np.zeros_like(node.y)
+    v = axes.vertical_axis(ud)
+    elem_y = axes.coords_along(elem, v)
+    node_y = axes.coords_along(node, v)
+    vmax = (ud.xmax, ud.ymax, ud.zmax)[v]
+
+    tauc_y = np.zeros_like(elem_y)
+    taun_y = np.zeros_like(node_y)
 
-    assert ud.ymax > cutoff, "rayleigh forcing boundary below minimum domain extent"
-    idx = (np.abs(elem.y - (ud.ymax - cutoff))).argmin()
+    assert vmax > cutoff, "rayleigh forcing boundary below minimum domain extent"
+    idx = (np.abs(elem_y - (vmax - cutoff))).argmin()
 
-    ud.forcing_bny = node.y[idx]
+    ud.forcing_bny = node_y[idx]
 
-    c1n = node.y <= ud.forcing_bny
-    ccn = (node.y[:-3] - ud.forcing_bny) / (node.y[:-3][-1] - ud.forcing_bny)
+    c1n = node_y <= ud.forcing_bny
+    ccn = (node_y[:-3] - ud.forcing_bny) / (node_y[:-3][-1] - ud.forcing_bny)
     c2n = np.logical_and(ccn >= 0.0, ccn <= 0.5)
     c3n = np.logical_and(ccn > 0.5, ccn <= 1.0)
 
@@ -61,8 +77,8 @@ def get_bottom_tau_y(ud, elem, node, alpha, cutoff=0.5):
         * (
             1.0
             - np.cos(
-                (node.y[np.where(c2n)] - ud.forcing_bny)
-                / (node.y[-1] - ud.forcing_bny)
+                (node_y[np.where(c2n)] - ud.forcing_bny)
+                / (node_y[-1] - ud.forcing_bny)
                 * np.pi
             )
         )
@@ -73,7 +89,7 @@ def get_bottom_tau_y(ud, elem, node, alpha, cutoff=0.5):
         * (
             1.0
             + (
-                (node.y[np.where(c3n)] - ud.forcing_bny) / (node.y[-1] - ud.forcing_bny)
+                (node_y[np.where(c3n)] - ud.forcing_bny) / (node_y[-1] - ud.forcing_bny)
                 - 0.5
             )
             * np.pi
@@ -82,7 +98,7 @@ def get_bottom_tau_y(ud, elem, node, alpha, cutoff=0.5):
 
     taun_y[-3:] = -np.abs(taun_y).max()
     taun_y[...] = taun_y[::-1]
-    tauc_y = np.interp(elem.y, node.y, taun_y)
+    tauc_y = np.interp(elem_y, node_y, taun_y)
 
     dd = 1.0
     tauc_y = dd * tauc_y / np.abs(tauc_y).max()
@@ -137,19 +153,37 @@ def apply_rayleigh_forcing(
     bdry_c.set_ghost_cells(mem, ud)
 
 
+def _vertical_profile(profile, ndim, vaxis):
+    """Broadcast a 1D vertical sponge profile against ndim cell fields.
+
+    Reproduces the legacy implicit (1, -1) broadcast for 2D x-y exactly;
+    scalars (the inactive 0.0 sentinels) pass through.
+    """
+    if not isinstance(profile, np.ndarray) or profile.ndim != 1:
+        return profile
+    shape = [1] * ndim
+    shape[vaxis] = -1
+    return profile.reshape(shape)
+
+
 def rayleigh_damping(sol, npf, ud, forcing=None):
     u = sol.rhou / sol.rho  # [elem.i2]
     v = sol.rhov / sol.rho  # [elem.i2]
     Y = sol.rhoY / sol.rho  # [elem.i2]
     rho = sol.rho  # [elem.i2]
 
-    if ud.bdry_type[1] == opts.BdryType.RAYLEIGH:
+    ndim = sol.rho.ndim
+    vaxis = axes.vertical_axis(ud)
+
+    if ud.bdry_type[vaxis] == opts.BdryType.RAYLEIGH:
         tcy, tny = ud.tcy, ud.tny
     else:
         tcy, tny = 0.0, 0.0
+    tcy = _vertical_profile(tcy, ndim, vaxis)
 
     if forcing is not None:
         tcy_f, tny_f = ud.forcing_tcy, ud.forcing_tny
+        tcy_f = _vertical_profile(tcy_f, ndim, vaxis)
         tcy, tny = 0.0, 0.0
 
         u_f, v_f, Y_f, pi_f, t = forcing
@@ -175,7 +209,6 @@ def rayleigh_damping(sol, npf, ud, forcing=None):
         tcy_f, tny_f = 0.0, 0.0
         mfac = 0.0
 
-    # assuming 2D vertical slice - not dimension agnostic
     u += tcy * (u - ud.u_wind_speed) + c_f * (
         tcy_f * (u - ud.u_wind_speed) + np.abs(tcy_f) * mfac * u_f
     )
@@ -183,9 +216,20 @@ def rayleigh_damping(sol, npf, ud, forcing=None):
         tcy_f * (v - ud.v_wind_speed) + np.abs(tcy_f) * mfac * v_f
     )
 
-    Ybar = npf.HydroState.Y0.reshape(1, -1)
+    if npf.HydroState.field_mode:
+        Ybar = npf.HydroState.Y0  # terrain: full per-column field
+    else:
+        Ybar = _vertical_profile(npf.HydroState.Y0, ndim, vaxis)
     Y += tcy * (Y - Ybar) + c_f * (tcy_f * (Y - Ybar) + np.abs(tcy_f) * mfac * Y_f)
 
     sol.rhou[...] = rho * u
     sol.rhov[...] = rho * v
     sol.rhoY[...] = rho * Y
+
+    # damp the third velocity component in 3D too (mountain-wave sponges
+    # must absorb vertical motion whatever the axis layout); the 2D path
+    # is untouched — its out-of-plane rhow was never damped (golden master)
+    if ndim == 3:
+        w = sol.rhow / rho
+        w += tcy * (w - ud.w_wind_speed) + c_f * tcy_f * (w - ud.w_wind_speed)
+        sol.rhow[...] = rho * w
diff --git a/src/pybella/flow_solver/utils/fields.py b/src/pybella/flow_solver/utils/fields.py
index 16a8fc8e..289a1531 100644
--- a/src/pybella/flow_solver/utils/fields.py
+++ b/src/pybella/flow_solver/utils/fields.py
@@ -2,6 +2,8 @@
 import scipy as sp
 import logging
 
+from ...utils import axes
+
 
 class CellSolField(object):
     """
@@ -56,7 +58,8 @@ def squeezer(self):
 
         """
         for key, value in vars(self).items():
-            setattr(self, key, value.squeeze())
+            if type(value) == np.ndarray:
+                setattr(self, key, value.squeeze())
 
     def primitives(self, th):
         """
@@ -123,20 +126,23 @@ class States(CellSolField):
 
     """
 
-    def __init__(self, size):
+    def __init__(self, size, field_mode=False):
         """
         Parameters
         ----------
         size : tuple
             Tuple containing the number of cells in the respective directions including ghost cells.
-        ud : :class:`inputs.user_data.UserDataInit`
-            Data container for the initial conditions
+        field_mode : bool
+            False (default): 1D vertical profiles, broadcast horizontally on
+            demand. True (terrain-following runs): full grid-shaped fields —
+            hydrostates vary per column when the physical height does.
 
         Notes
         -----
         Many variables in this data container are no longer used and can be removed.
 
         """
+        self.field_mode = field_mode
 
         self.p0 = np.zeros((size))
         self.p20 = np.zeros((size))
@@ -151,17 +157,39 @@ def __init__(self, size):
         self.get_dSdy = self.get_dSdy
         self.get_S0c = self.get_S0c
 
+        # vertical axis of the 1D profiles; NodePressureField overrides
+        self.vaxis = axes.VERTICAL_DEFAULT
+
         self.init_dSdy = False
         self.init_S0c = False
 
     def get_dSdy(self, elem, node):
         if not self.init_dSdy:
             logging.info("Computing dSdy")
-            self.dSdy = sp.signal.convolve(self.S0, [1.0, -1.0], mode="valid") / node.dy
-
-            for dim in range(0, node.ndim, 2):
-                self.dSdy = np.expand_dims(self.dSdy, dim)
-                self.dSdy = np.repeat(self.dSdy, elem.sc[dim], axis=dim)
+            if self.field_mode:
+                # node-States field: difference S0 over the physical node
+                # heights along the vertical, then average to cell centres
+                # along the horizontal axes (the field generalisation of
+                # the 1D convolve-and-broadcast below)
+                zn = node.metric.z
+                dS = np.diff(self.S0, axis=self.vaxis) / np.diff(zn, axis=self.vaxis)
+                for dim in range(node.ndim):
+                    if dim == self.vaxis:
+                        continue
+                    lo = [slice(None)] * node.ndim
+                    hi = [slice(None)] * node.ndim
+                    lo[dim] = slice(None, -1)
+                    hi[dim] = slice(1, None)
+                    dS = 0.5 * (dS[tuple(lo)] + dS[tuple(hi)])
+                self.dSdy = dS
+            else:
+                self.dSdy = (
+                    sp.signal.convolve(self.S0, [1.0, -1.0], mode="valid")
+                    / node.dxyz[self.vaxis]
+                )
+                self.dSdy = axes.expand_profile(
+                    self.dSdy, node.ndim, self.vaxis, elem.sc
+                )
 
             self.init_dSdy = True
 
@@ -170,22 +198,20 @@ def get_dSdy(self, elem, node):
     def get_S0c(self, elem):
         if not self.init_S0c:
             logging.info("Computing S0c")
-            S0c_result = self.S0
-
-            for dim in range(0, elem.ndim, 2):
-                S0c_result = np.expand_dims(S0c_result, dim)
-                S0c_result = np.repeat(S0c_result, elem.sc[dim], axis=dim)
-
-            self.S0c = S0c_result
+            if self.field_mode:
+                self.S0c = self.S0
+            else:
+                self.S0c = axes.expand_profile(self.S0, elem.ndim, self.vaxis, elem.sc)
             self.init_S0c = True
 
         return self.S0c
 
 
 class NodePressureField(object):
-    def __init__(self, elem, node):
+    def __init__(self, elem, node, ud=None):
         sc = elem.sc
         sn = node.sc
+        vaxis = axes.vertical_axis(ud) if ud is not None else axes.VERTICAL_DEFAULT
 
         self.p0 = 1.0
         self.p00 = 1.0
@@ -204,8 +230,15 @@ def __init__(self, elem, node):
         self.wcenter = np.zeros((node.isc))
         self.wplus = np.zeros(([elem.ndim] + list(sc)))
 
-        self.HydroState = States([sc[1]])
-        self.HydroState_n = States([sn[1]])
+        if elem.metric is not None:
+            # terrain: hydrostates vary per column -> full fields
+            self.HydroState = States(sc, field_mode=True)
+            self.HydroState_n = States(sn, field_mode=True)
+        else:
+            self.HydroState = States([sc[vaxis]])
+            self.HydroState_n = States([sn[vaxis]])
+        self.HydroState.vaxis = vaxis
+        self.HydroState_n.vaxis = vaxis
 
         self.squeezer()
 
diff --git a/src/pybella/flow_solver/utils/prepare.py b/src/pybella/flow_solver/utils/prepare.py
index 3ab6f920..998935ec 100644
--- a/src/pybella/flow_solver/utils/prepare.py
+++ b/src/pybella/flow_solver/utils/prepare.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from ...utils import user_data, io, data_structures
+from ...utils import axes, user_data, io, data_structures
 from ..physics import hydrostatics
 from ..physics import thermodynamics as gd_thermodynamics
 from ..discretisation import grid as dis_grid
@@ -38,11 +38,12 @@ def initialise():
     ud.coriolis_strength = np.array(ud.coriolis_strength)
 
     elem, node = dis_grid.grid_init(ud)
+    axes.validate(ud, elem.ndim)
 
     sol = fields.CellSolField(elem.sc)
 
     th = gd_thermodynamics.ThermodynamicalQuantities(ud)
-    npf = fields.NodePressureField(elem, node)
+    npf = fields.NodePressureField(elem, node, ud)
 
     io.init_logger(ud)
 
diff --git a/src/pybella/flow_solver/utils/solver_diagnostics.py b/src/pybella/flow_solver/utils/solver_diagnostics.py
index 889545e7..ecf3ef33 100644
--- a/src/pybella/flow_solver/utils/solver_diagnostics.py
+++ b/src/pybella/flow_solver/utils/solver_diagnostics.py
@@ -9,7 +9,7 @@ def get_p_from_pressure_related_fields(mem, ud, psinc=False):
 
     th = mem.th
 
-    kernel = np.ones((2, 2))
+    kernel = np.ones([2] * dp2n.ndim)
     dp2c = sp.signal.fftconvolve(dp2n, kernel, mode="valid") / kernel.sum()
 
     if psinc:
diff --git a/src/pybella/inputs/rising_bubble.py b/src/pybella/inputs/rising_bubble.py
index a23903e1..57efd04c 100644
--- a/src/pybella/inputs/rising_bubble.py
+++ b/src/pybella/inputs/rising_bubble.py
@@ -1,4 +1,5 @@
 import numpy as np
+from ..utils import axes
 from ..flow_solver.physics import hydrostatics
 
 
@@ -46,7 +47,7 @@ def sol_init(Sol, npf, elem, node, th, ud, seed=None):
     y0 = 0.2
     r0 = 0.2
 
-    g = ud.gravity_strength[1]
+    g = ud.gravity_strength[axes.vertical_axis(ud)]
     # print(ud.rho_ref)
 
     hydrostatics.state(npf, elem, node, th, ud)
diff --git a/src/pybella/interfaces/ic_config.py b/src/pybella/interfaces/ic_config.py
index 09d19be5..af4822dc 100644
--- a/src/pybella/interfaces/ic_config.py
+++ b/src/pybella/interfaces/ic_config.py
@@ -1,27 +1,21 @@
+# Initial-condition registry: -ic keys -> module paths providing UserData + sol_init.
+#
+# Legacy pre-restructure ICs (acoustic waves, SWE variants, baroclinic instability, ...)
+# were removed when the old RKLM_Python/inputs tree was dropped; they remain recoverable
+# from the git tag `archive/full_coriolis` and are mostly small tweaks of the cases below.
 IC_MODULES = {
-    "bi": "inputs.baroclinic_instability_periodic",
-    "tv": "inputs.travelling_vortex_2D",
-    "tv_2d": "inputs.travelling_vortex_2D",
-    "tv_neg": "inputs.travelling_vortex_2D_neg",
-    "tv_3d": "inputs.travelling_vortex_3D",
-    "tv_corr": "inputs.travelling_vortex_3D_Coriolis",
-    "aw": "inputs.acoustic_wave_high",
-    "igw": "inputs.internal_long_wave",
-    "igw_3d": "inputs.internal_long_wave_3D",
-    "lbw": "inputs.lamb_waves",
-    "skl": "inputs.sk_lamb_wave",
-    "mark": "inputs.mark",
-    "lw_p": "inputs.lamb_wave_perturb",
-    "igw_bb": "inputs.igw_baldauf_brdar",
-    "rb": "inputs.rising_bubble",
-    "rbc": "inputs.rising_bubble_cold",
-    "swe_bal_vortex": "inputs.swe_bal_vortex",
-    "swe": "inputs.shallow_water_3D",
-    "swe_icshear": "inputs.shallow_water_3D_icshear",
-    "swe_dvortex": "inputs.shallow_water_3D_dvortex",
+    "rb": "pybella.inputs.rising_bubble",
     "test_travelling_vortex": "pybella.tests.test_travelling_vortex",
+    "test_travelling_vortex_3d_coriolis": "pybella.tests.test_travelling_vortex_3d_coriolis",
     "test_internal_long_wave": "pybella.tests.test_internal_long_wave",
+    "test_igw_baldauf_brdar": "pybella.tests.test_igw_baldauf_brdar",
     "test_lamb_wave": "pybella.tests.test_lamb_wave",
     "test_blending_warm_bubble": "pybella.tests.test_blending_warm_bubble",
     "test_unstable_lamb": "pybella.tests.test_unstable_lamb",
+    "test_swe_vortex": "pybella.tests.test_swe_vortex",
+    "test_straka": "pybella.tests.test_straka",
+    "test_agnesi_hydrostatic": "pybella.tests.test_agnesi_hydrostatic",
+    "test_schaer_ridge": "pybella.tests.test_schaer_ridge",
+    "smoke_zvert": "pybella.tests.smoke_zvert",
+    "smoke_agnesi": "pybella.tests.smoke_agnesi",
 }
diff --git a/src/pybella/tests/agnesi_smith_analytic.py b/src/pybella/tests/agnesi_smith_analytic.py
new file mode 100644
index 00000000..e14d4324
--- /dev/null
+++ b/src/pybella/tests/agnesi_smith_analytic.py
@@ -0,0 +1,153 @@
+"""Smith (1980) linear hydrostatic mountain-wave reference.
+
+Steady linear solution for uniform wind U and constant buoyancy frequency
+N over a witch-of-Agnesi hill h(x) = h0 a^2 / (x^2 + a^2) in the
+hydrostatic regime (N a / U >> 1), Boussinesq form:
+
+    delta(x, z) = h0 a (a cos(l z) - x sin(l z)) / (x^2 + a^2)
+    w(x, z)     = U d(delta)/dx
+                = U h0 a [ (x^2 - a^2) sin(l z) - 2 a x cos(l z) ] / (x^2 + a^2)^2
+    u'(x, z)    = -U d(delta)/dz
+                = U h0 a l (a sin(l z) + x cos(l z)) / (x^2 + a^2)
+
+with vertical wavenumber l = N / U, and the analytic wave drag
+
+    D = (pi / 4) rho_0 N U h0^2        (per unit spanwise length).
+
+The compressible simulation's wave amplitudes grow with height like
+1/sqrt(rho0(z)); the comparator removes that anelastic factor by scaling
+the simulated perturbations with sqrt(rho0(z) / rho0(0)) before comparing
+against the Boussinesq reference.
+
+The comparison layer (:func:`compare`) is a standalone oracle in the
+spirit of ``baldauf_brdar_analytic.py``: it consumes the live ModelState
+of an in-process run (driven by ``test_scripts/test_agnesi_analytic.py``)
+and returns relative-L2 metrics plus the momentum-flux/drag diagnostics —
+it catches *wrongness*, not just *change*.
+"""
+
+import numpy as np
+
+
+def smith_fields(x, z, params):
+    """Analytic delta, w, u' on a meshgrid (x[:, None], z[None, :]) in SI."""
+    U, N, h0, a = params["U"], params["N"], params["h0"], params["a"]
+    l = N / U
+    x = np.asarray(x).reshape(-1, 1)
+    z = np.asarray(z).reshape(1, -1)
+    r2 = x**2 + a**2
+    delta = h0 * a * (a * np.cos(l * z) - x * np.sin(l * z)) / r2
+    w = U * h0 * a * ((x**2 - a**2) * np.sin(l * z) - 2 * a * x * np.cos(l * z)) / r2**2
+    up = U * h0 * a * l * (a * np.sin(l * z) + x * np.cos(l * z)) / r2
+    return delta, w, up
+
+
+def analytic_drag(params, rho0_surface):
+    """Smith wave drag per unit spanwise length [N/m]."""
+    return 0.25 * np.pi * rho0_surface * params["N"] * params["U"] * params["h0"] ** 2
+
+
+def _inner_xy(arr, ndim):
+    """Inner-domain x-y slab; collapses the degenerate spanwise axis in 3D."""
+    if ndim == 2:
+        return arr[2:-2, 2:-2]
+    return arr[2:-2, 2:-2, 0]
+
+
+def sim_perturbations_SI(mem, ud):
+    """Extract (x, z, u', w, rho0) in SI (y vertical; quasi-2D 3D or native 2D).
+
+    Returns inner-domain cell fields with the spanwise (z-array) axis
+    collapsed; heights are the physical cell heights from the metric.
+    """
+    ndim = mem.elem.ndim
+
+    rho = _inner_xy(mem.sol.rho, ndim)
+    u = _inner_xy(mem.sol.rhou, ndim) / rho * ud.u_ref
+    w = _inner_xy(mem.sol.rhov, ndim) / rho * ud.u_ref
+
+    x = mem.elem.x[2:-2] * ud.h_ref
+    z = _inner_xy(mem.elem.metric.z, ndim) * ud.h_ref
+
+    rho0 = _inner_xy(mem.npf.HydroState.rho0, ndim)
+
+    up = u - ud.u_wind_speed * ud.u_ref
+    # remove the anelastic 1/sqrt(rho0) amplitude growth for the
+    # Boussinesq comparison
+    fac = np.sqrt(rho0 / rho0[:, 0:1].mean())
+    return x, z, up * fac, w * fac, rho0
+
+
+def momentum_flux_profile(mem, ud, dx_SI):
+    """Vertically resolved momentum flux M(eta) = -integral rho u' w dx [N/m].
+
+    Computed on eta-levels (terrain-following), J-weighted densities; in
+    the linear steady state M is height-constant and equals -D below the
+    sponge.
+    """
+    ndim = mem.elem.ndim
+    rho = _inner_xy(mem.sol.rho, ndim) * (ud.p_ref / (ud.R_gas * ud.T_ref))
+    u = _inner_xy(mem.sol.rhou, ndim) / _inner_xy(mem.sol.rho, ndim) * ud.u_ref
+    w = _inner_xy(mem.sol.rhov, ndim) / _inner_xy(mem.sol.rho, ndim) * ud.u_ref
+    up = u - ud.u_wind_speed * ud.u_ref
+    return -np.sum(rho * up * w, axis=0) * dx_SI
+
+
+def compare(mem, ud, z_lo_SI=1000.0, z_hi_SI=9000.0):
+    """Rel-L2 of w and u' vs Smith in the interior window + drag metrics."""
+    params = {
+        "U": ud.U0,
+        "N": ud.NN,
+        "h0": ud.hill_height,
+        "a": ud.hill_width,
+    }
+
+    x, z, up_sim, w_sim, rho0 = sim_perturbations_SI(mem, ud)
+
+    # analytic fields at the simulation's physical cell heights (column-
+    # dependent under terrain): evaluate per cell
+    U, N, h0, a = params["U"], params["N"], params["h0"], params["a"]
+    l = N / U
+    xx = x.reshape(-1, 1)
+    r2 = xx**2 + a**2
+    w_ref = (
+        U
+        * h0
+        * a
+        * ((xx**2 - a**2) * np.sin(l * z) - 2 * a * xx * np.cos(l * z))
+        / r2**2
+    )
+    up_ref = U * h0 * a * l * (a * np.sin(l * z) + xx * np.cos(l * z)) / r2
+
+    window = (z >= z_lo_SI) & (z <= z_hi_SI)
+
+    # remove x-means from BOTH sides: on a periodic domain the wave drag
+    # decelerates the mean flow during spin-up — a real effect, but not part
+    # of Smith's infinite-domain steady wave solution (same convention as
+    # the Baldauf-Brdar comparator's frozen k=0 mode)
+    def demean(q):
+        return q - q.mean(axis=0, keepdims=True)
+
+    def rel_l2(sim, ref):
+        s, r = demean(sim), demean(ref)
+        denom = np.linalg.norm(np.where(window, r, 0.0))
+        return np.linalg.norm(np.where(window, s - r, 0.0)) / denom
+
+    metrics = {
+        "w": rel_l2(w_sim, w_ref),
+        "u": rel_l2(up_sim, up_ref),
+    }
+
+    # momentum flux vs analytic drag
+    dx_SI = mem.elem.dx * ud.h_ref
+    rho_ref_SI = ud.p_ref / (ud.R_gas * ud.T_ref)
+    flux = momentum_flux_profile(mem, ud, dx_SI)
+    drag = analytic_drag(params, rho0[:, 0].mean() * rho_ref_SI)
+
+    zc_eta = mem.elem.y[2:-2] * ud.h_ref  # eta levels (flat away from hill)
+    in_band = (zc_eta >= z_lo_SI) & (zc_eta <= z_hi_SI)
+    flux_band = flux[in_band]
+    metrics["drag_ratio"] = float(np.mean(flux_band) / drag)
+    metrics["flux_constancy"] = float(np.std(flux_band) / np.abs(np.mean(flux_band)))
+
+    return metrics, {"flux_profile": flux, "drag": drag, "z_eta": zc_eta}
diff --git a/src/pybella/tests/baldauf_brdar_analytic.py b/src/pybella/tests/baldauf_brdar_analytic.py
new file mode 100644
index 00000000..fdca184b
--- /dev/null
+++ b/src/pybella/tests/baldauf_brdar_analytic.py
@@ -0,0 +1,397 @@
+"""Linear reference solution for the Baldauf & Brdar (2013) IGW channel test.
+
+Builds a numerically-exact solution of the *linearised* compressible Euler
+equations about the isothermal hydrostatic background of
+``test_igw_baldauf_brdar`` (f-plane Coriolis included) and evolves the
+simulation's own saved initial condition, providing an independent physics
+oracle for the nonlinear solver: golden masters catch *change*, this catches
+*wrongness* (sign, dispersion, rotation errors).
+
+Method (instead of transcribing B&B's closed form, which hinges on delicate
+rigid-lid basis choices):
+
+1.  Bretherton transform: with rho0(z) = rho_s exp(-z/H), the substitution
+    (u, vo, w, b) = exp(z/2H) (U, Vo, W, B),  p' = rho_s exp(-z/2H) P
+    turns the linear system into one with CONSTANT coefficients:
+
+        dU/dt  = -ik P - f Vo
+        dVo/dt = +f U
+        dW/dt  = -dP/dz + alpha P + B
+        dB/dt  = -N^2 W
+        dP/dt  = -c^2 ik U - c^2 dW/dz - c^2 alpha W
+
+    with alpha = 1/(2H) - g/c^2, N^2 = (gamma-1) g^2 / (gamma R T0), and
+    b = -g (rho' - p'/c^2) / rho0 the buoyancy. (Coriolis signs follow
+    pyBELLA's convention: du/dt = -f vo, dvo/dt = +f u, rotation about the
+    vertical y-axis; vo is pyBELLA's out-of-plane w.)
+
+2.  x is periodic: exact FFT decomposition; each k evolves independently.
+
+3.  z: staggered collocation on a grid ``refine``x finer than the simulation
+    (W, B on interior interfaces -- the rigid lids W=0 are imposed by
+    construction; U, Vo, P at cell centres; 2nd-order staggered derivatives,
+    energy-neutral discretisation).
+
+4.  Time: exact, via the matrix exponential exp(A t) per mode.
+
+The comparison is restricted to the x-mean-free (k != 0) wave content: the
+k = 0 column is a hydrostatically balanced state that is frozen in the
+linear dynamics, and subtracting the instantaneous x-mean on both sides
+makes the comparison immune to the (discrete-vs-continuum) background
+residuals of the initial condition.
+"""
+
+import numpy as np
+import scipy as sp
+
+
+class IGWParams:
+    """SI parameters of the test_igw_baldauf_brdar setup, derived from ud."""
+
+    def __init__(self, ud):
+        self.gamma = ud.gamm
+        self.R = ud.R_gas
+        self.T0 = ud.T_ref
+        self.g = ud.grav
+        self.p_s = ud.p_ref
+        self.f = ud.omega
+        self.h_ref = ud.h_ref
+        self.t_ref = ud.t_ref
+        self.u_ref = ud.u_ref
+        self.rho_ref = ud.p_ref / (ud.R_gas * ud.T_ref)
+
+        self.c2 = self.gamma * self.R * self.T0
+        self.H = self.R * self.T0 / self.g  # density/pressure scale height
+        self.N2 = (self.gamma - 1.0) * self.g**2 / (self.gamma * self.R * self.T0)
+        self.alpha = 1.0 / (2.0 * self.H) - self.g / self.c2
+
+    def rho0(self, z):
+        return self.rho_ref * np.exp(-z / self.H)
+
+    def p0(self, z):
+        return self.p_s * np.exp(-z / self.H)
+
+
+def _build_operator(k, zc, zi, par):
+    """A(k) for the transformed state [U, Vo, P (centres); W, B (interfaces)].
+
+    zc: cell-centre heights (nc,), zi: interior interface heights (ni = nc-1,).
+    Returns the complex (3nc + 2ni) square matrix.
+    """
+    nc, ni = len(zc), len(zi)
+    dz = zc[1] - zc[0]
+    n = 3 * nc + 2 * ni
+    A = np.zeros((n, n), dtype=complex)
+
+    iU = slice(0, nc)
+    iV = slice(nc, 2 * nc)
+    iP = slice(2 * nc, 3 * nc)
+    iW = slice(3 * nc, 3 * nc + ni)
+    iB = slice(3 * nc + ni, n)
+
+    I_c = np.eye(nc)
+    # interface <- centre difference / average (ni x nc)
+    D_ic = np.zeros((ni, nc))
+    M_ic = np.zeros((ni, nc))
+    for i in range(ni):
+        D_ic[i, i] = -1.0 / dz
+        D_ic[i, i + 1] = 1.0 / dz
+        M_ic[i, i] = 0.5
+        M_ic[i, i + 1] = 0.5
+    # centre <- interface difference / average (nc x ni), W = 0 at the lids
+    D_ci = np.zeros((nc, ni))
+    M_ci = np.zeros((nc, ni))
+    for j in range(nc):
+        if j - 1 >= 0:
+            D_ci[j, j - 1] = -1.0 / dz
+            M_ci[j, j - 1] = 0.5
+        if j < ni:
+            D_ci[j, j] = 1.0 / dz
+            M_ci[j, j] = 0.5
+
+    ik = 1j * k
+
+    # dU/dt = -ik P - f Vo
+    A[iU, iP] = -ik * I_c
+    A[iU, iV] = -par.f * I_c
+    # dVo/dt = +f U
+    A[iV, iU] = +par.f * I_c
+    # dP/dt = -c^2 ik U - c^2 (D_ci W) - c^2 alpha (M_ci W)
+    A[iP, iU] = -par.c2 * ik * I_c
+    A[iP, iW] = -par.c2 * (D_ci + par.alpha * M_ci)
+    # dW/dt = -(D_ic P) + alpha (M_ic P) + B
+    A[iW, iP] = -D_ic + par.alpha * M_ic
+    A[iW, iB] = np.eye(ni)
+    # dB/dt = -N^2 W
+    A[iB, iW] = -par.N2 * np.eye(ni)
+
+    return A, (iU, iV, iP, iW, iB)
+
+
+def evolve_linear(fields_ic, x_len, zc_sim, t_end, par, refine=2):
+    """Evolve the linear system from the sim's IC perturbations.
+
+    fields_ic: dict with keys 'u', 'vo', 'w', 'p', 'rho' — SI perturbation
+    fields on the sim's interior cell grid (nx, nz), x-mean NOT yet removed.
+    x_len: domain length [m]; zc_sim: sim cell-centre heights (nz,) [m].
+    Returns dict of the same keys evaluated on the sim grid at t_end
+    (k != 0 content only).
+    """
+    nx, nz = fields_ic["u"].shape
+    dz_sim = zc_sim[1] - zc_sim[0]
+
+    # fine staggered z grids
+    ncf = refine * nz
+    dzf = dz_sim / refine
+    zcf = (np.arange(ncf) + 0.5) * dzf
+    zif = (np.arange(1, ncf)) * dzf
+
+    # buoyancy from rho', p'
+    b_ic = (
+        -par.g
+        * (fields_ic["rho"] - fields_ic["p"] / par.c2)
+        / par.rho0(zc_sim)[None, :]
+    )
+
+    # remove x-mean (k=0 column is frozen in the linear dynamics)
+    def demean(q):
+        return q - q.mean(axis=0, keepdims=True)
+
+    u = demean(fields_ic["u"])
+    vo = demean(fields_ic["vo"])
+    w = demean(fields_ic["w"])
+    p = demean(fields_ic["p"])
+    b = demean(b_ic)
+
+    # Bretherton transform on the sim grid, then x-FFT
+    wgt = np.exp(-zc_sim / (2.0 * par.H))[None, :]
+    U0 = np.fft.rfft(u * wgt, axis=0)
+    V0 = np.fft.rfft(vo * wgt, axis=0)
+    W0 = np.fft.rfft(w * wgt, axis=0)
+    B0 = np.fft.rfft(b * wgt, axis=0)
+    P0 = np.fft.rfft(p / (par.rho_ref * wgt), axis=0)
+
+    ks = 2.0 * np.pi * np.fft.rfftfreq(nx, d=x_len / nx)
+    nk = len(ks)
+
+    # spline-interpolate the (smooth) transformed profiles to the fine grids
+    def to_fine(Q, ztarget):
+        out = np.empty((nk, len(ztarget)), dtype=complex)
+        for m in range(nk):
+            re = sp.interpolate.CubicSpline(zc_sim, Q[m].real, bc_type="natural")
+            im = sp.interpolate.CubicSpline(zc_sim, Q[m].imag, bc_type="natural")
+            out[m] = re(ztarget) + 1j * im(ztarget)
+        return out
+
+    Uf, Vf, Pf = to_fine(U0, zcf), to_fine(V0, zcf), to_fine(P0, zcf)
+    Wf, Bf = to_fine(W0, zif), to_fine(B0, zif)
+
+    ncf_, nif = len(zcf), len(zif)
+    UT = np.zeros_like(Uf)
+    VT = np.zeros_like(Vf)
+    PT = np.zeros_like(Pf)
+    WT = np.zeros_like(Wf)
+    BT = np.zeros_like(Bf)
+
+    # skip spectrally-empty modes (the Gaussian envelope kills high k)
+    mode_amp = np.zeros(nk)
+    for m in range(nk):
+        mode_amp[m] = max(
+            np.abs(Uf[m]).max(),
+            np.abs(Vf[m]).max(),
+            np.abs(Wf[m]).max(),
+            np.abs(Bf[m]).max() / par.N2**0.5,
+            np.abs(Pf[m]).max() / par.c2**0.5,
+        )
+    # 1e-6 of the peak: the Gaussian envelope's spectrum is ~2e-8 of peak by
+    # mode 80; everything above the cut carries no metric-relevant energy
+    amp_cut = 1e-6 * mode_amp.max()
+
+    energy_drift = 0.0
+    for m in range(1, nk):  # skip k = 0
+        if mode_amp[m] < amp_cut:
+            continue
+        A, (iU, iV, iP, iW, iB) = _build_operator(ks[m], zcf, zif, par)
+        q0 = np.concatenate([Uf[m], Vf[m], Pf[m], Wf[m], Bf[m]])
+        # exact-in-time evolution via eigendecomposition (A is neutrally
+        # stable: similar to skew-Hermitian under the energy weights)
+        lam, S = np.linalg.eig(A)
+        qt = S @ (np.exp(lam * t_end) * np.linalg.solve(S, q0))
+
+        # energy conservation check (discretisation is energy-neutral)
+        def energy(q):
+            return (
+                np.sum(np.abs(q[iU]) ** 2)
+                + np.sum(np.abs(q[iV]) ** 2)
+                + np.sum(np.abs(q[iP]) ** 2) / par.c2
+                + np.sum(np.abs(q[iW]) ** 2)
+                + np.sum(np.abs(q[iB]) ** 2) / par.N2
+            )
+
+        e0 = energy(q0)
+        if e0 > 0.0:
+            energy_drift = max(energy_drift, abs(energy(qt) / e0 - 1.0))
+
+        UT[m], VT[m], PT[m] = qt[iU], qt[iV], qt[iP]
+        WT[m], BT[m] = qt[iW], qt[iB]
+
+    # back to physical space on the fine grid
+    def from_modes(Q):
+        return np.fft.irfft(Q, n=nx, axis=0)
+
+    u_f = from_modes(UT) * np.exp(zcf / (2.0 * par.H))[None, :]
+    vo_f = from_modes(VT) * np.exp(zcf / (2.0 * par.H))[None, :]
+    p_f = from_modes(PT) * (par.rho_ref * np.exp(-zcf / (2.0 * par.H)))[None, :]
+    w_f = from_modes(WT) * np.exp(zif / (2.0 * par.H))[None, :]
+    b_f = from_modes(BT) * np.exp(zif / (2.0 * par.H))[None, :]
+
+    # evaluate on the sim cell centres
+    def to_sim(qf, zsrc):
+        out = np.empty((nx, len(zc_sim)))
+        for i in range(nx):
+            out[i] = sp.interpolate.CubicSpline(zsrc, qf[i], bc_type="natural")(zc_sim)
+        return out
+
+    u_s = to_sim(u_f, zcf)
+    vo_s = to_sim(vo_f, zcf)
+    p_s = to_sim(p_f, zcf)
+    w_s = to_sim(w_f, zif)
+    b_s = to_sim(b_f, zif)
+
+    # rho' back from (b, p'):  b = -g (rho' - p'/c^2)/rho0
+    rho_s = p_s / par.c2 - par.rho0(zc_sim)[None, :] * b_s / par.g
+
+    return (
+        {"u": u_s, "vo": vo_s, "w": w_s, "p": p_s, "rho": rho_s},
+        {"energy_drift": energy_drift},
+    )
+
+
+def sim_fields_SI(h5file, tag, par, interior=(slice(2, -2), slice(2, -2))):
+    """Extract SI perturbation fields from a pyBELLA igw output file.
+
+    tag: e.g. '000_ic' or '030_after_full_step'. Background = isothermal
+    analytic profiles; the comparison layer removes x-means anyway.
+    Returns dict u, vo, w, p, rho on the interior cell grid plus zc [m].
+    """
+    import h5py
+
+    with h5py.File(h5file, "r") as h:
+        rho = h["rho"][f"rho_{tag}"][...][interior]
+        rhou = h["rhou"][f"rhou_{tag}"][...][interior]
+        rhov = h["rhov"][f"rhov_{tag}"][...][interior]
+        rhow = h["rhow"][f"rhow_{tag}"][...][interior]
+        rhoY = h["rhoY"][f"rhoY_{tag}"][...][interior]
+
+    nz = rho.shape[1]
+    zc = (np.arange(nz) + 0.5) * (10000.0 / nz)
+
+    rho_SI = rho * par.rho_ref
+    p_SI = rhoY**par.gamma * par.p_s
+
+    return {
+        "u": (rhou / rho) * par.u_ref,
+        "vo": (rhow / rho) * par.u_ref,  # pyBELLA w = out-of-plane velocity
+        "w": (rhov / rho) * par.u_ref,  # pyBELLA v = vertical velocity
+        "p": p_SI - par.p0(zc)[None, :],
+        "rho": rho_SI - (par.p0(zc) / (par.R * par.T0))[None, :],
+    }, zc
+
+
+class _StubWriter:
+    def write(self, *a, **k):
+        pass
+
+    def populate(self, *a, **k):
+        pass
+
+
+def run_sim(dt_factor=1, omega=None, nx_factor=1):
+    """Run test_igw_baldauf_brdar in-process at dt = 500 s / dt_factor.
+
+    Returns (ud, ic_fields, end_fields, zc, t_end_SI) with fields in SI
+    perturbation form on the interior cell grid (see sim_fields_SI).
+    The run length is held fixed at 31 * 500 s = 15500 s.
+    omega overrides the case's Coriolis parameter (e.g. 0.0 to switch it off
+    consistently in sim and reference).
+    """
+    from .test_igw_baldauf_brdar import UserData, sol_init
+    from ..utils import user_data, data_structures
+    from ..flow_solver.discretisation import grid as dis_grid
+    from ..flow_solver.discretisation import time_update as dis_time_update
+    from ..flow_solver.utils import fields as fs_fields
+    from ..flow_solver.utils import cache as fs_cache
+    from ..flow_solver.utils.boundary import cell_boundary as bdry_c
+    from ..flow_solver.physics import thermodynamics as gd_thermodynamics
+
+    ud = user_data.UserDataInit(**vars(UserData()))
+    if omega is not None:
+        ud.omega = omega
+    ud.coriolis_strength = np.array([0.0, ud.omega * ud.t_ref, 0.0])
+    if nx_factor != 1:
+        # keep inx even (initial_pressure parity assert): 602 -> 601 cells
+        ud.inx = 301 * nx_factor
+    ud.dtfixed /= dt_factor
+    ud.dtfixed0 /= dt_factor
+    ud.stepmax = 31 * dt_factor
+    ud.diag = False
+
+    elem, node = dis_grid.grid_init(ud)
+    sol = fs_fields.CellSolField(elem.sc)
+    th = gd_thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fs_fields.NodePressureField(elem, node, ud)
+    sol = sol_init(sol, npf, elem, node, th, ud)
+
+    mem = data_structures.ModelState(
+        elem, node, sol, npf, th, fs_cache.FlowSolverCache()
+    )
+    bdry_c.set_ghost_cells(mem, ud)
+
+    par = IGWParams(ud)
+
+    def extract(s):
+        i2 = (slice(2, -2), slice(2, -2))
+        rho = s.rho[i2]
+        rhoY = s.rhoY[i2]
+        nz = rho.shape[1]
+        zc = (np.arange(nz) + 0.5) * (10000.0 / nz)
+        return {
+            "u": (s.rhou[i2] / rho) * par.u_ref,
+            "vo": (s.rhow[i2] / rho) * par.u_ref,
+            "w": (s.rhov[i2] / rho) * par.u_ref,
+            "p": rhoY**par.gamma * par.p_s - par.p0(zc)[None, :],
+            "rho": rho * par.rho_ref - (par.p0(zc) / (par.R * par.T0))[None, :],
+        }, zc
+
+    ic, zc = extract(mem.sol)
+    mem = dis_time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+    end, _ = extract(mem.sol)
+
+    return ud, ic, end, zc, mem.time.t * ud.t_ref
+
+
+def compare(
+    h5file, ud, t_end, tag_ic="000_ic", tag_end="030_after_full_step", refine=2
+):
+    """Run the full comparison; returns (metrics, sim_end, ref_end, zc)."""
+    par = IGWParams(ud)
+    L = (ud.xmax - ud.xmin) * ud.h_ref
+
+    ic, zc = sim_fields_SI(h5file, tag_ic, par)
+    end, _ = sim_fields_SI(h5file, tag_end, par)
+
+    ref, diag = evolve_linear(ic, L, zc, t_end, par, refine=refine)
+
+    def demean(q):
+        return q - q.mean(axis=0, keepdims=True)
+
+    metrics = {"energy_drift": diag["energy_drift"]}
+    sim_w = {}
+    for key in ("u", "vo", "w", "p", "rho"):
+        s = demean(end[key])
+        r = ref[key]
+        denom = np.linalg.norm(r)
+        metrics[key] = np.linalg.norm(s - r) / denom if denom > 0 else np.inf
+        sim_w[key] = s
+
+    return metrics, sim_w, ref, zc
diff --git a/src/pybella/tests/diagnostics.py b/src/pybella/tests/diagnostics.py
index 16d91e67..f14006f7 100644
--- a/src/pybella/tests/diagnostics.py
+++ b/src/pybella/tests/diagnostics.py
@@ -42,10 +42,13 @@ def update_targets(self):
                 self.arr_dump[dump_name][attribute] = float(arr.sum())
 
                 if self.plot:
+                    pl_arr = arr.T
+                    if pl_arr.ndim == 3:  # 3D target: plot transverse mid-slice
+                        pl_arr = pl_arr[:, pl_arr.shape[1] // 2, :]
                     # vis_pt.plotter accepts a list of tuples with plot and panel title.
                     pl = vis_pt.plotter(
                         [
-                            (arr.T, "ref"),
+                            (pl_arr, "ref"),
                         ],
                         ncols=1,
                         figsize=(4, 3),
@@ -137,13 +140,11 @@ def test_do(self, mem, ud):
                 logging.info(str(e))
                 raise
 
-        logging.info(
-            f"""
+        logging.info(f"""
             {'#' * 10}
             Test passed for {self.current_run}
             {'#' * 10}
-            """.strip()
-        )
+            """.strip())
 
     def __init(self, ds: DiagnosticState):
         tp = test_params(ds)
@@ -176,6 +177,13 @@ def __plot_comparison(self, mem, ref_mem, ud):
             test_sol = self.__get_sol_for_comparison(mem, ud, attribute)
             ref_sol = self.__get_sol_for_comparison(ref_mem, ud, attribute)
 
+            # 3D fields: contour-plot the interior mid-slice along the
+            # transverse (y) axis; the comparison itself stays full-3D
+            if test_sol.ndim == 3:
+                jmid = test_sol.shape[1] // 2
+                test_sol = test_sol[:, jmid, :]
+                ref_sol = ref_sol[:, jmid, :]
+
             arr_plots.append([ref_sol, "ref"])
             arr_plots.append([test_sol, "test"])
             arr_plots.append([ref_sol - test_sol, "diff"])
diff --git a/src/pybella/tests/schaer_linear_analytic.py b/src/pybella/tests/schaer_linear_analytic.py
new file mode 100644
index 00000000..3f63aee7
--- /dev/null
+++ b/src/pybella/tests/schaer_linear_analytic.py
@@ -0,0 +1,147 @@
+"""Linear mountain-wave reference for arbitrary periodic terrain (FFT).
+
+Steady linear Boussinesq solution for uniform wind U and constant N over
+ANY terrain h(x) periodic on the domain: per Fourier mode k of h,
+
+    w_hat(k, z) = i k U h_hat(k) * E(k, z)
+
+with the vertical structure (l = N / U the Scorer parameter)
+
+    E = exp(i m z),  m = sign(k) sqrt(l^2 - k^2)   for |k| < l (radiating,
+                                                    upward group velocity)
+    E = exp(-q z),   q = sqrt(k^2 - l^2)           for |k| > l (evanescent)
+
+and from 2D continuity u'_hat = -(1 / i k) dw_hat/dz. The Schär (2002)
+ridge spectrum has its envelope band radiating hydrostatically and its
+small-scale peaks (|k| = 2 pi / lambda > l) evanescent — above ~2 km the
+true field carries no small-scale signal, which is the coordinate-quality
+discriminator.
+
+The wave drag is computed from the analytic surface fields,
+D = -rho0 * integral u'(x,0) w(x,0) dx — convention-free; fed the Agnesi
+profile it reproduces Smith's closed form D = (pi/4) rho0 N U h0^2
+(self-test in test_scripts/test_schaer_analytic.py).
+
+Mirrors ``agnesi_smith_analytic.compare`` (and reuses its simulation
+extractors), adding the discriminator metric ``E_ss``: the small-scale
+spectral fraction of simulated w at eta-levels where the true small-scale
+signal is evanescent-dead.
+"""
+
+import numpy as np
+
+from . import agnesi_smith_analytic as smith
+
+
+def linear_fields(x_SI, z_SI, h_SI, U, N):
+    """Analytic (w, u') at cells (x_i, z_ij) for periodic terrain h(x_i).
+
+    x_SI: (Nx,) equispaced periodic cell centers; z_SI: (Nx, Nz) physical
+    cell heights (column-dependent under terrain); h_SI: (Nx,) terrain.
+    """
+    nx = x_SI.size
+    L = (x_SI[1] - x_SI[0]) * nx
+    k_all = 2.0 * np.pi * np.fft.fftfreq(nx, d=L / nx)
+    h_hat = np.fft.fft(h_SI) / nx  # h(x) = sum_k h_hat e^{i k x}
+
+    l = N / U
+    w = np.zeros_like(z_SI)
+    up = np.zeros_like(z_SI)
+    # mode loop (k = 0 carries no wave); negligible amplitudes skipped
+    tiny = 1e-14 * np.abs(h_hat).max()
+    for idx in range(1, nx):
+        k = k_all[idx]
+        a_w = 1j * k * U * h_hat[idx]
+        if np.abs(a_w) < tiny:
+            continue
+        # DFT phases are indexed from the first sample: e^{i k (x - x0)}
+        phase_x = np.exp(1j * k * (x_SI - x_SI[0])).reshape(-1, 1)
+        if abs(k) < l:
+            m = np.sign(k) * np.sqrt(l**2 - k**2)
+            ez = np.exp(1j * m * z_SI)
+            w_hat = a_w * ez
+            up_hat = -(m / k) * w_hat
+        else:
+            q = np.sqrt(k**2 - l**2)
+            ez = np.exp(-q * z_SI)
+            w_hat = a_w * ez
+            up_hat = -1j * (q / k) * w_hat
+        w += np.real(w_hat * phase_x)
+        up += np.real(up_hat * phase_x)
+    return w, up
+
+
+def analytic_drag(x_SI, h_SI, U, N, rho0_surface):
+    """D = -rho0 integral u'(x,0) w(x,0) dx over the periodic domain [N/m]."""
+    z0 = np.zeros((x_SI.size, 1))
+    w0, up0 = linear_fields(x_SI, z0, h_SI, U, N)
+    dx = x_SI[1] - x_SI[0]
+    return -rho0_surface * np.sum(up0[:, 0] * w0[:, 0]) * dx
+
+
+def small_scale_fraction(w_levels, dx_SI, lambda_SI, frac=0.75):
+    """E_ss: spectral energy fraction of w at |k| >= frac * (2 pi / lambda).
+
+    ``w_levels``: (Nx, Nlev) — w on eta-levels (k = 0 mode excluded; the
+    mean carries no wave information).
+    """
+    nx = w_levels.shape[0]
+    k = 2.0 * np.pi * np.fft.fftfreq(nx, d=dx_SI)
+    spec = np.abs(np.fft.fft(w_levels, axis=0)) ** 2
+    spec[0] = 0.0
+    k_cut = frac * 2.0 * np.pi / lambda_SI
+    total = spec.sum()
+    if total == 0.0:
+        return 0.0
+    return float(spec[np.abs(k) >= k_cut].sum() / total)
+
+
+def compare(mem, ud, z_lo_SI=1000.0, z_hi_SI=8000.0, ss_lo_SI=4000.0, ss_hi_SI=9000.0):
+    """Metrics dict in the shape of ``agnesi_smith_analytic.compare`` plus
+    the coordinate-quality discriminator ``E_ss``."""
+    U = ud.U0
+    N = ud.NN
+
+    x, z, up_sim, w_sim, rho0 = smith.sim_perturbations_SI(mem, ud)
+    h_SI = ud.orography(x / ud.h_ref, 0.0) * ud.h_ref
+
+    w_ref, up_ref = linear_fields(x, z, h_SI, U, N)
+
+    window = (z >= z_lo_SI) & (z <= z_hi_SI)
+
+    # remove x-means from both sides: periodic-domain wave drag decelerates
+    # the mean flow during spin-up — real, but not part of the steady linear
+    # wave (the reference k = 0 mode is zero by construction)
+    def demean(q):
+        return q - q.mean(axis=0, keepdims=True)
+
+    def rel_l2(sim, ref):
+        s, r = demean(sim), demean(ref)
+        denom = np.linalg.norm(np.where(window, r, 0.0))
+        return np.linalg.norm(np.where(window, s - r, 0.0)) / denom
+
+    metrics = {
+        "w": rel_l2(w_sim, w_ref),
+        "u": rel_l2(up_sim, up_ref),
+    }
+
+    # momentum flux vs the spectral drag
+    dx_SI = mem.elem.dx * ud.h_ref
+    rho_ref_SI = ud.p_ref / (ud.R_gas * ud.T_ref)
+    flux = smith.momentum_flux_profile(mem, ud, dx_SI)
+    drag = analytic_drag(x, h_SI, U, N, rho0[:, 0].mean() * rho_ref_SI)
+
+    zc_eta = mem.elem.y[2:-2] * ud.h_ref  # eta levels (flat away from hill)
+    in_band = (zc_eta >= z_lo_SI) & (zc_eta <= z_hi_SI)
+    flux_band = flux[in_band]
+    metrics["drag_ratio"] = float(np.mean(flux_band) / drag)
+    metrics["flux_constancy"] = float(np.std(flux_band) / np.abs(np.mean(flux_band)))
+
+    # discriminator: small-scale w energy at eta-levels where the true
+    # small-scale response is evanescent-dead
+    ss_band = (zc_eta >= ss_lo_SI) & (zc_eta <= ss_hi_SI)
+    metrics["E_ss"] = small_scale_fraction(
+        demean(w_sim)[:, ss_band], dx_SI, ud.ridge_wavelength
+    )
+
+    return metrics, {"flux_profile": flux, "drag": drag, "z_eta": zc_eta}
diff --git a/src/pybella/tests/smoke_agnesi.py b/src/pybella/tests/smoke_agnesi.py
new file mode 100644
index 00000000..a892670b
--- /dev/null
+++ b/src/pybella/tests/smoke_agnesi.py
@@ -0,0 +1,135 @@
+"""Quasi-2D mountain-wave plumbing smoke case (NOT a golden-master case).
+
+The Agnesi configuration *shape*: a vertical x-y slice run as quasi-2D 3D
+(``inz = 2``, z degenerate periodic) so the implicit solve goes through the
+full-tensor 27-point operator (``lap3D``) — the path terrain metric terms
+attach to. Isothermal hydrostatic background, uniform horizontal wind over
+a 400 m witch-of-Agnesi hill (Gal-Chen terrain-following coordinates),
+periodic x, walls in y, a few steps. No target, no CompareSol: the pytest
+asserts a clean run; the in-process terrain gates (conservation, bounded
+mountain-wave response) live in ``test_terrain_resting_atmosphere.py``.
+
+The golden-master Agnesi regression case (``test_agnesi_hydrostatic``)
+with a proper top sponge and the Smith-1980 oracle is separate.
+"""
+
+import numpy as np
+
+from ..utils import options as opts
+from ..utils import axes
+from ..flow_solver.physics import hydrostatics
+
+
+class UserData(object):
+    grav = 9.80665  # [m/s^2]
+    omega = 0.0
+
+    def __init__(self):
+        self.grav = self.grav
+        self.omega = self.omega
+
+        self.R_gas = 287.05
+        self.gamm = 1.4
+        self.T_ref = 250.0
+        self.p_ref = 1e5
+        self.h_ref = self.R_gas * self.T_ref / self.grav  # [m]
+        self.u_ref = 10.0
+        self.t_ref = self.h_ref / self.u_ref
+
+        self.Msq = self.u_ref * self.u_ref / (self.R_gas * self.T_ref)
+
+        # vertical on axis 1 (y); x horizontal, z degenerate
+        self.gravity_direction = 1
+        self.gravity_strength = np.zeros(3)
+        self.gravity_strength[1] = self.grav * self.h_ref / (self.R_gas * self.T_ref)
+        self.i_gravity = np.zeros(3)
+        self.i_gravity[1] = 1
+
+        self.xmin = -30000.0 / self.h_ref
+        self.xmax = 30000.0 / self.h_ref
+        self.ymin = 0.0
+        self.ymax = 10000.0 / self.h_ref
+        self.zmin = 0.0
+        self.zmax = 1.0
+
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.PERIODIC
+        self.bdry_type[1] = opts.BdryType.WALL
+        self.bdry_type[2] = opts.BdryType.PERIODIC
+
+        self.is_compressible = 1
+        self.is_nonhydrostatic = 1
+
+        # uniform background wind along x
+        self.u_wind_speed = 10.0 / self.u_ref
+        self.v_wind_speed = 0.0
+        self.w_wind_speed = 0.0
+
+        self.CFL = 0.9
+        self.dtfixed = 10.0 / self.t_ref  # 10 s
+        self.dtfixed0 = 10.0 / self.t_ref
+
+        self.inx = 48 + 1
+        self.iny = 16 + 1
+        self.inz = 1 + 1
+
+        self.tout = [1.0]
+        self.stepmax = 3
+
+        self.stratification = self.stratification_function
+        self.orography = self.orography_function
+        self.output_timesteps = False
+
+        self.diag = False
+        self.output_base_name = "_smoke_agnesi"
+        self.output_type = "test"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.iny - 1)
+        self.autogen_fn = False
+
+    def orography_function(self, xi1, xi2):
+        # witch of Agnesi: 400 m peak, 5 km half-width
+        h0 = 400.0 / self.h_ref
+        a = 5000.0 / self.h_ref
+        return h0 * a**2 / (xi1**2 + a**2) + 0.0 * xi2
+
+    def stratification_function(self, y):
+        # isothermal: theta ~ exp(N^2 z / g) with N^2 = (gamma-1) g^2 / (gamma R T)
+        Nsq = (
+            ((self.gamm - 1.0) / self.gamm)
+            * self.grav
+            * self.grav
+            / (self.R_gas * self.T_ref)
+        ) * self.t_ref**2
+        g = self.gravity_strength[1] / self.Msq
+        return np.exp(Nsq * y / g)
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    # hydrostatic background along the configured vertical (y)
+    hydrostatics.analytical_state(npf, elem, node, th, ud)
+
+    v = axes.vertical_axis(ud)
+    S0c = npf.HydroState.get_S0c(elem)
+    if npf.HydroState.field_mode:
+        # terrain: hydrostates are already full per-column fields
+        rhoY0 = npf.HydroState.rhoY0
+    else:
+        rhoY0 = axes.expand_profile(npf.HydroState.rhoY0, elem.ndim, v, elem.sc)
+
+    Sol.rhoY[...] = rhoY0
+    Sol.rho[...] = rhoY0 * S0c
+    Sol.rhou[...] = Sol.rho * ud.u_wind_speed
+    Sol.rhov[...] = 0.0
+    Sol.rhow[...] = 0.0
+    Sol.rhoX[...] = Sol.rho * (Sol.rho / Sol.rhoY - S0c)
+
+    # hydrostatically balanced background: zero perturbation pressure
+    # (p2 is the perturbation Exner pressure; the background gradient is
+    # balanced through the S0c/dbuoy formulation)
+    npf.p2_nodes[...] = 0.0
+
+    ud.nonhydrostasy = 1.0
+    ud.compressibility = 1.0
+
+    return Sol
diff --git a/src/pybella/tests/smoke_zvert.py b/src/pybella/tests/smoke_zvert.py
new file mode 100644
index 00000000..e0c3e17c
--- /dev/null
+++ b/src/pybella/tests/smoke_zvert.py
@@ -0,0 +1,127 @@
+"""z-vertical plumbing smoke case (NOT a golden-master regression case).
+
+A minimal 3D quasi-2D setup with the vertical on axis 2
+(``gravity_direction = 2``, met convention): isothermal hydrostatic
+background along z, a small thermal bump, a few steps. Its only job is to
+push a non-default vertical axis through the PRODUCTION entry path —
+``-ic`` registry, ``prepare.initialise`` (incl. ``axes.validate``),
+hydrostatics along z, gravity ghost cells / wall zeroing on axis 2, and
+the full time loop — which the in-process permutation oracle deliberately
+bypasses. No target, no CompareSol: the pytest only asserts a clean run.
+
+Physics agnosticity itself is proven by ``test_permutation_oracle.py``;
+a permanent z-vertical golden-master case is deferred to the
+terrain-following work (decision 2026-06-10).
+"""
+
+import numpy as np
+
+from ..utils import options as opts
+from ..utils import axes
+from ..flow_solver.physics import hydrostatics
+
+
+class UserData(object):
+    grav = 9.80665  # [m/s^2]
+    omega = 0.0
+
+    def __init__(self):
+        self.grav = self.grav
+        self.omega = self.omega
+
+        self.R_gas = 287.05
+        self.gamm = 1.4
+        self.T_ref = 250.0
+        self.p_ref = 1e5
+        self.h_ref = self.R_gas * self.T_ref / self.grav  # [m]
+        self.u_ref = 10.0
+        self.t_ref = self.h_ref / self.u_ref
+
+        self.Msq = self.u_ref * self.u_ref / (self.R_gas * self.T_ref)
+
+        # vertical on axis 2 (z); x horizontal, y degenerate
+        self.gravity_direction = 2
+        self.gravity_strength = np.zeros(3)
+        self.gravity_strength[2] = self.grav * self.h_ref / (self.R_gas * self.T_ref)
+        self.i_gravity = np.zeros(3)
+        self.i_gravity[2] = 1
+
+        self.xmin = 0.0
+        self.xmax = 60000.0 / self.h_ref
+        self.ymin = 0.0
+        self.ymax = 1.0
+        self.zmin = 0.0
+        self.zmax = 10000.0 / self.h_ref
+
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.PERIODIC
+        self.bdry_type[1] = opts.BdryType.PERIODIC
+        self.bdry_type[2] = opts.BdryType.WALL
+
+        self.is_compressible = 1
+        self.is_nonhydrostatic = 1
+
+        self.CFL = 0.9
+        self.dtfixed = 10.0 / self.t_ref  # 10 s
+        self.dtfixed0 = 10.0 / self.t_ref
+
+        self.inx = 48 + 1
+        self.iny = 1 + 1
+        self.inz = 10 + 1
+
+        self.tout = [1.0]
+        self.stepmax = 3
+
+        self.stratification = self.stratification_function
+        self.output_timesteps = False
+
+        self.diag = False
+        self.output_base_name = "_smoke_zvert"
+        self.output_type = "test"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.inz - 1)
+        self.autogen_fn = False
+
+    def stratification_function(self, y):
+        # isothermal: theta ~ exp(N^2 z / g) with N^2 = (gamma-1) g^2 / (gamma R T)
+        Nsq = (
+            ((self.gamm - 1.0) / self.gamm)
+            * self.grav
+            * self.grav
+            / (self.R_gas * self.T_ref)
+        ) * self.t_ref**2
+        g = self.gravity_strength[2] / self.Msq
+        return np.exp(Nsq * y / g)
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    # hydrostatic background along the configured vertical (z)
+    hydrostatics.analytical_state(npf, elem, node, th, ud)
+
+    v = axes.vertical_axis(ud)
+    S0c = npf.HydroState.get_S0c(elem)
+    rhoY0 = axes.expand_profile(npf.HydroState.rhoY0, elem.ndim, v, elem.sc)
+
+    # small warm bump in the x-z plane
+    x = elem.x.reshape(-1, 1, 1)
+    z = elem.z.reshape(1, 1, -1)
+    xc = 0.5 * (ud.xmax + ud.xmin)
+    zc = 0.4 * ud.zmax
+    delth = (0.1 / ud.T_ref) * np.exp(
+        -((x - xc) ** 2 + (z - zc) ** 2) / (2000.0 / ud.h_ref) ** 2
+    )
+
+    Y = 1.0 / S0c + delth
+    Sol.rhoY[...] = rhoY0
+    Sol.rho[...] = rhoY0 / Y
+    Sol.rhou[...] = 0.0
+    Sol.rhov[...] = 0.0
+    Sol.rhow[...] = 0.0
+    Sol.rhoX[...] = Sol.rho * (Sol.rho / Sol.rhoY - S0c)
+
+    npf.p2_nodes[...] = axes.expand_profile(npf.HydroState_n.p20, node.ndim, v, node.sc)
+
+    ud.nonhydrostasy = 1.0
+    ud.compressibility = 1.0
+
+    return Sol
diff --git a/src/pybella/tests/test_agnesi_hydrostatic.py b/src/pybella/tests/test_agnesi_hydrostatic.py
new file mode 100644
index 00000000..ee99eeee
--- /dev/null
+++ b/src/pybella/tests/test_agnesi_hydrostatic.py
@@ -0,0 +1,171 @@
+"""Agnesi hydrostatic mountain wave — terrain-following golden-master case.
+
+Linear hydrostatic regime (Smith 1980): constant buoyancy frequency
+N = 0.01 1/s, uniform wind U = 10 m/s over a witch-of-Agnesi hill with
+half-width a = 10 km (N a / U = 10, hydrostatic) and height h0 = 100 m
+(N h0 / U = 0.1, linear). Gal-Chen terrain-following coordinates, quasi-2D
+3D vertical slice (z degenerate periodic) through the full-tensor elliptic
+operator. Rayleigh sponge above 12 km (two vertical wavelengths
+lambda_z = 2 pi U / N ~ 6.3 km below it) absorbs upward-radiating waves.
+
+The regression run (stepmax steps) is a determinism gate; the physics is
+proven by ``test_scripts/test_agnesi_analytic.py``, which integrates to a
+quasi-steady state and compares against the Smith (1980) analytic wave
+field and wave drag (``tests/agnesi_smith_analytic.py``).
+"""
+
+import numpy as np
+
+from ..utils import options as opts
+from ..utils import axes
+from ..flow_solver.physics import hydrostatics
+from ..flow_solver.utils.boundary import rayleigh_boundary as bdry_r
+
+from ..utils.data_structures import DiagnosticState
+
+_COMPARED_FIELDS = ("rho", "rhou", "rhov", "rhow", "rhoY", "rhoX", "p2_nodes")
+
+
+class UserData(object):
+    grav = 9.81  # [m/s^2]
+    omega = 0.0
+
+    NN = 0.01  # [1/s] buoyancy frequency
+    U0 = 10.0  # [m/s] background wind
+    hill_height = 100.0  # [m]
+    hill_width = 10000.0  # [m]
+
+    def __init__(self):
+        self.grav = self.grav
+        self.omega = self.omega
+        # instance-ify the case parameters: vars(UserData()) only carries
+        # instance attributes onto UserDataInit, and the Smith comparator
+        # reads them from ud
+        self.NN = self.NN
+        self.U0 = self.U0
+        self.hill_height = self.hill_height
+        self.hill_width = self.hill_width
+
+        self.R_gas = 287.4
+        self.gamm = 1.4
+        self.T_ref = 300.0
+        self.p_ref = 1e5
+        self.h_ref = 10000.0  # [m]
+        self.t_ref = 1000.0  # [s]
+        self.u_ref = self.h_ref / self.t_ref  # 10 m/s
+
+        self.Msq = self.u_ref * self.u_ref / (self.R_gas * self.T_ref)
+
+        self.gravity_direction = 1
+        self.gravity_strength = np.zeros(3)
+        self.gravity_strength[1] = self.grav * self.h_ref / (self.R_gas * self.T_ref)
+        self.i_gravity = np.zeros(3)
+        self.i_gravity[1] = 1
+
+        # x: +-100 km (= +-10 a) periodic — hydrostatic waves radiate
+        # vertically, so the wave field stays near the hill and the top
+        # sponge absorbs it; y: 0..24 km with the sponge above 12 km
+        self.xmin = -100000.0 / self.h_ref
+        self.xmax = 100000.0 / self.h_ref
+        self.ymin = 0.0
+        self.ymax = 24000.0 / self.h_ref
+        self.zmin = 0.0
+        self.zmax = 1.0
+
+        self.u_wind_speed = self.U0 / self.u_ref
+        self.v_wind_speed = 0.0
+        self.w_wind_speed = 0.0
+
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.PERIODIC
+        self.bdry_type[1] = opts.BdryType.WALL  # switched to RAYLEIGH in sol_init
+        self.bdry_type[2] = opts.BdryType.PERIODIC
+
+        self.rayleigh_bdry_switch = True
+        self.rayleigh_forcing = False
+
+        self.is_compressible = 1
+        self.is_nonhydrostatic = 1
+
+        ##########################################
+        # NUMERICS
+        ##########################################
+        self.CFL = 0.9
+        self.dtfixed = 50.0 / self.t_ref  # N dt = 0.5
+        self.dtfixed0 = 50.0 / self.t_ref
+
+        self.inx = 128 + 1  # dx = 1.5625 km (a / dx = 6.4)
+        self.iny = 64 + 1  # dy = 375 m (lambda_z / dy ~ 17)
+        self.inz = 1 + 1
+
+        # sponge: cells above 12 km
+        self.inbcy = 32
+
+        # regression run: 750 s (transient spin-up phase, deterministic —
+        # exercises terrain BC, metric advection, elliptic tensor, sponge);
+        # the analytic oracle extends grid/stepmax to reach steady state
+        self.tout = [1e6]
+        self.stepmax = 15
+
+        self.stratification = self.stratification_function
+        self.orography = self.orography_function
+        self.output_timesteps = True
+
+        self.diag = True
+        self.diag_updt_targets = False
+
+        self.output_base_name = "_agnesi_hydrostatic"
+        self.output_type = "test" if not self.diag_updt_targets else "target"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.iny - 1)
+        self.autogen_fn = False
+
+        self.diag_state = DiagnosticState(
+            test_name="test_agnesi_hydrostatic",
+            file_name="target_agnesi_hydrostatic",
+            Nx=self.inx - 1,
+            Ny=self.iny - 1,
+            steps=[self.stepmax - 1],
+            plot_compare=True,
+            # terrain elliptic solves (~130 bicgstab iters/step) amplify
+            # cross-platform rounding: first CI run (2026-06-11) deviated
+            # 7.0e-5 on rhou / 5.1e-6 on rho from the locally generated
+            # target (local same-machine scatter is ~1e-7). Gate well above
+            # platform noise; physics is guarded by the Smith (1980) oracle.
+            tolerances={k: 5e-4 for k in _COMPARED_FIELDS},
+        )
+
+    def stratification_function(self, y):
+        # constant buoyancy frequency: theta = exp(N^2 z / g)
+        Nsq = (self.NN * self.t_ref) ** 2
+        g = self.gravity_strength[1] / self.Msq
+        return np.exp(Nsq * y / g)
+
+    def orography_function(self, xi1, xi2):
+        h0 = self.hill_height / self.h_ref
+        a = self.hill_width / self.h_ref
+        return h0 * a**2 / (xi1**2 + a**2) + 0.0 * xi2
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    if getattr(ud, "rayleigh_bdry_switch", False):
+        ud.bdry_type[axes.vertical_axis(ud)] = opts.BdryType.RAYLEIGH
+        ud.tcy, ud.tny = bdry_r.get_tau_y(ud, elem, node, 0.5)
+
+    # constant-N background at physical height via fine-grid quadrature
+    hydrostatics.integrated_state(npf, elem, node, th, ud)
+
+    S0c = npf.HydroState.get_S0c(elem)
+    rhoY0 = npf.HydroState.rhoY0  # field mode: full per-column fields
+
+    Sol.rhoY[...] = rhoY0
+    Sol.rho[...] = rhoY0 * S0c
+    Sol.rhou[...] = Sol.rho * ud.u_wind_speed
+    Sol.rhov[...] = 0.0
+    Sol.rhow[...] = 0.0
+    Sol.rhoX[...] = Sol.rho * (Sol.rho / Sol.rhoY - S0c)
+
+    # hydrostatically balanced background: zero perturbation pressure
+    npf.p2_nodes[...] = 0.0
+
+    return Sol
diff --git a/src/pybella/tests/test_igw_baldauf_brdar.py b/src/pybella/tests/test_igw_baldauf_brdar.py
new file mode 100644
index 00000000..a950870d
--- /dev/null
+++ b/src/pybella/tests/test_igw_baldauf_brdar.py
@@ -0,0 +1,247 @@
+"""Baldauf & Brdar (2013) linear internal gravity wave test.
+
+Ported from the legacy ``igw_baldauf_brdar`` initial condition
+(recoverable from the git tag ``archive/full_coriolis``). The case is a
+small thermal perturbation on an isothermal (constant Brunt-Väisälä
+frequency) hydrostatic background in a periodic channel, for which
+Baldauf & Brdar (QJRMS, 2013) derive an analytic linear solution.
+
+Physics deltas relative to ``test_internal_long_wave``:
+
+- isothermal background: ``Nsq_ref = ((gamma-1)/gamma) * g^2 / (R_gas * T_ref)``
+  with ``T_ref = 250 K`` (instead of the prescribed ``Nsq_ref = 1e-4``),
+- reference scales ``u_ref = 10 m/s``, ``h_ref = R_gas * T_ref / grav``,
+  ``t_ref = h_ref / u_ref``,
+- domain: ``scale_factor * 300 km`` wide, ``10 km`` deep (long-wave
+  variant for ``scale_factor = 20``),
+- Gaussian envelope perturbation
+  ``delT * molly(x) * sin(pi*y/H) * exp(-(x-xc)^2/a^2)`` instead of the
+  Lorentzian ``1/(1+(x-xc)^2/a^2)``,
+- compressible (``is_compressible = 1``),
+- traditional-Coriolis component ``coriolis_strength[1] = omega * t_ref``
+  with ``omega = 1.03126e-4 s^-1``.
+"""
+
+import numpy as np
+
+from ..utils import options as opts
+
+from ..flow_solver.utils import fields
+from ..flow_solver.physics import hydrostatics
+
+from ..utils.data_structures import DiagnosticState
+
+
+class UserData(object):
+    # planetary -> 160.0;  long-wave -> 20.0;  standard -> 1.0;
+    scale_factor = 20.0
+
+    def __init__(self):
+        self.scale_factor = self.scale_factor
+
+        self.grav = 9.80665  # [m/s^2]
+        self.omega = 1.0 * 0.000103126  # [s^{-1}]
+        self.R_gas = 287.05  # [J kg^{-1} K^{-1}]
+        self.gamm = 1.4
+
+        self.T_ref = 250.00  # [K]
+        self.u_ref = 10.0  # [m/s]
+        self.p_ref = 1e5  # [Pa]
+        self.h_ref = self.R_gas * self.T_ref / self.grav  # [m]
+        self.t_ref = self.h_ref / self.u_ref  # [s]
+
+        self.Nsq_ref = (
+            ((self.gamm - 1.0) / self.gamm)
+            * self.grav
+            * self.grav
+            / (self.R_gas * self.T_ref)
+        )  # [s^{-2}]
+
+        self.Msq = (
+            self.u_ref * self.u_ref / (self.R_gas * self.T_ref)
+        )  # Mach number squared
+
+        self.is_nonhydrostatic = 1
+        self.is_compressible = 1
+
+        self.gravity_strength = np.zeros((3))
+        self.gravity_strength[1] = self.grav * self.h_ref / (self.R_gas * self.T_ref)
+
+        gravity_mask = (self.gravity_strength > np.finfo(np.float64).eps) | (
+            np.arange(3) == 1
+        )
+        self.i_gravity = gravity_mask.astype(int)
+        if np.any(gravity_mask):
+            self.gravity_direction = np.where(gravity_mask)[0][
+                -1
+            ]  # Use last matching index
+
+        # NB: defined after `omega` and `t_ref` so that the dependency
+        # manager in `UserDataInit` does not overwrite this explicit choice.
+        self.coriolis_strength = np.zeros((3))
+        self.coriolis_strength[1] = self.omega * self.t_ref
+
+        self.xmin = -0.5 * self.scale_factor * 300000.0 / self.h_ref
+        self.xmax = 0.5 * self.scale_factor * 300000.0 / self.h_ref
+        self.ymin = 0.0
+        self.ymax = 10000.0 / self.h_ref
+        self.zmin = -1.0
+        self.zmax = 1.0
+
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.PERIODIC
+        self.bdry_type[1] = opts.BdryType.WALL
+        self.bdry_type[2] = opts.BdryType.WALL
+
+        ##########################################
+        # NUMERICS
+        ##########################################
+        self.CFL = 0.9
+
+        self.dtfixed0 = 0.5 / self.t_ref * 50.0 * self.scale_factor
+        self.dtfixed = 0.5 / self.t_ref * 50.0 * self.scale_factor
+
+        self.inx = 301 + 1
+        self.iny = 20 + 1
+        self.inz = 1
+
+        self.tout = [8 * 180.0 * self.scale_factor / self.t_ref]  # 8 hrs
+
+        self.tol = 1.0e-8
+        self.stepmax = 31
+        self.max_iterations = 6000
+
+        self.autogen_fn = False
+
+        self.output_timesteps = True
+
+        self.stratification = self.stratification_function
+        self.molly = self.molly_function
+        self.rhoe = self.rhoe_method
+
+        self.diag = True
+        self.diag_updt_targets = False
+
+        self.output_base_name = "_igw_baldauf_brdar"
+        self.output_type = "test" if not self.diag_updt_targets else "target"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.iny - 1)
+
+        self.diag_state = DiagnosticState(
+            test_name="test_igw_baldauf_brdar",
+            file_name="target_igw_baldauf_brdar",
+            Nx=self.inx - 1,
+            Ny=self.iny - 1,
+            steps=[self.stepmax - 1],
+            # tolerance audit 2026-06-09 measured ~1e-10 run-to-run scatter
+            # SAME-MACHINE; the first CI run (2026-06-11, GitHub runner)
+            # showed cross-PLATFORM scatter of 2.3e-6 on rhou — different
+            # CPU/BLAS/numba reorder the bicgstab reductions. Gate at the
+            # 1e-5 default; physics is guarded by the B&B analytic oracle.
+            tolerances={
+                "rho": 1e-5,
+                "rhou": 1e-5,
+                "rhov": 1e-5,
+                "rhow": 1e-5,
+                "rhoY": 1e-5,
+                "rhoX": 1e-5,
+                "p2_nodes": 1e-5,
+            },
+        )
+
+    def stratification_function(self, y):
+        Nsq = self.Nsq_ref * self.t_ref * self.t_ref
+        g = self.gravity_strength[1] / self.Msq
+
+        return np.exp(Nsq * y / g)
+
+    def molly_function(self, x):
+        del0 = 0.25
+        L = self.xmax - self.xmin
+        xi_l = np.minimum(1.0, (x - self.xmin) / (del0 * L))
+        xi_r = np.minimum(1.0, (self.xmax - x) / (del0 * L))
+
+        return 0.5 * np.minimum(1.0 - np.cos(np.pi * xi_l), 1.0 - np.cos(np.pi * xi_r))
+
+    @staticmethod
+    def rhoe_method(rho, u, v, w, p, ud, th):
+        Msq = ud.compressibility * ud.Msq
+
+        gm1inv = th.gm1inv
+        return p * gm1inv + 0.5 * Msq * rho * (u * u + v * v + w * w)
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    u0 = ud.u_wind_speed
+    v0 = ud.v_wind_speed
+    w0 = ud.w_wind_speed
+    delT = 0.01 / ud.T_ref
+
+    xc = 0.0
+    a = ud.scale_factor * 5.0e3 / ud.h_ref
+    H = ud.ymax - ud.ymin
+
+    hydrostatics.analytical_state(npf, elem, node, th, ud)
+
+    x = elem.x.reshape(-1, 1)
+    y = elem.y.reshape(1, -1)
+
+    Tb = delT * ud.molly(x) * np.sin(np.pi * y / H) * np.exp(-((x - xc) ** 2) / (a**2))
+
+    xn = node.x[:-1].reshape(-1, 1)
+    yn = node.y[:-1].reshape(1, -1)
+
+    Tbn = (
+        delT
+        * ud.molly(xn)
+        * np.sin(np.pi * yn / H)
+        * np.exp(-((xn - xc) ** 2) / (a**2))
+    )
+
+    HySt = fields.States(node.sc)
+    HyStn = fields.States(node.sc)
+
+    Y = ud.stratification(y) + Tb
+    Yn = ud.stratification(yn) + Tbn
+
+    hydrostatics.column(HySt, HyStn, Y, Yn, elem, node, th, ud)
+
+    xc_idx = slice(0, -1)
+    yc_idx = slice(0, -1)
+    c_idx = (xc_idx, yc_idx)
+
+    if ud.is_compressible:
+        p = HySt.p0[c_idx]
+        rhoY = HySt.rhoY0[c_idx]
+    else:
+        p = npf.HydroState.p0
+        rhoY = npf.HydroState.rhoY0
+
+    u, v, w = u0, v0, w0
+
+    rho = rhoY / Y
+    Sol.rho[...] = rho
+    Sol.rhou[...] = rho * u
+    Sol.rhov[...] = rho * v
+    Sol.rhow[...] = rho * w
+    Sol.rhoY[...] = rhoY
+
+    npf.p2_cells[...] = HySt.p20[c_idx]
+
+    Sol.rhoX[...] = Sol.rho * (Sol.rho / Sol.rhoY - npf.HydroState.S0.reshape(1, -1))
+
+    npf.p2_nodes[:, :] = HyStn.p20
+
+    hydrostatics.initial_pressure(Sol, npf, elem, node, ud, th)
+
+    ud.nonhydrostasy = 1.0 if ud.is_nonhydrostatic == 1 else 0.0
+    ud.compressibility = 1.0 if ud.is_compressible == 1 else 0.0
+
+    if "imbal" in ud.aux:
+        npf.p2_nodes[...] = 0.0
+
+    return Sol
+
+
+def T_from_p_rho(p, rho):
+    return np.divide(p, rho)
diff --git a/src/pybella/tests/test_schaer_ridge.py b/src/pybella/tests/test_schaer_ridge.py
new file mode 100644
index 00000000..21e7ed9b
--- /dev/null
+++ b/src/pybella/tests/test_schaer_ridge.py
@@ -0,0 +1,226 @@
+"""Schär ridge (Schär et al. 2002) — SLEVE golden-master case, native 2D.
+
+The classic two-scale mountain-wave benchmark: a Gaussian envelope
+modulated by a small-scale cosine ridge,
+
+    h(x) = h0 exp(-(x/a)^2) cos^2(pi x / lambda),
+
+h0 = 250 m, a = 5 km, lambda = 4 km, in uniform wind U = 10 m/s with
+constant buoyancy frequency N = 0.01 1/s. Spectrally the terrain has an
+envelope peak at k ~ 0 (hydrostatic, radiates) and small-scale peaks at
+k = +-2 pi / lambda — EVANESCENT here (k2 = 1.57e-3 > l = N/U = 1e-3
+1/m), so above ~2 km the true solution carries no small-scale signal.
+That is what discriminates vertical coordinates: under Gal-Chen the
+small-scale terrain distorts the grid at every level and leaves spurious
+small-scale w aloft; under SLEVE (this case's transform) the distortion
+decays on s2 = 2.5 km and the upper levels stay clean.
+
+The exact cos^2 identity supplies the SLEVE split in closed form:
+
+    smooth   h1(x) = (h0/2) exp(-(x/a)^2)
+    residual h2(x) = (h0/2) exp(-(x/a)^2) cos(2 pi x / lambda)
+
+with analytic gradients for both (max slope ~0.2 — 25x the Agnesi case;
+FD slopes are not trusted at that steepness).
+
+Native 2D (lap2D terrain path); the regression run is a determinism gate.
+Physics + the Gal-Chen-vs-SLEVE discriminator live in
+``test_scripts/test_schaer_analytic.py`` against the linear FFT oracle
+(``tests/schaer_linear_analytic.py``).
+"""
+
+import numpy as np
+
+from ..utils import options as opts
+from ..utils import axes
+from ..flow_solver.physics import hydrostatics
+from ..flow_solver.discretisation import terrain
+from ..flow_solver.utils.boundary import rayleigh_boundary as bdry_r
+
+from ..utils.data_structures import DiagnosticState
+
+_COMPARED_FIELDS = ("rho", "rhou", "rhov", "rhow", "rhoY", "rhoX", "p2_nodes")
+
+
+class UserData(object):
+    grav = 9.81  # [m/s^2]
+    omega = 0.0
+
+    NN = 0.01  # [1/s] buoyancy frequency
+    U0 = 10.0  # [m/s] background wind
+    hill_height = 250.0  # [m] h0
+    hill_width = 5000.0  # [m] envelope half-width a
+    ridge_wavelength = 4000.0  # [m] lambda
+
+    def __init__(self):
+        self.grav = self.grav
+        self.omega = self.omega
+        self.NN = self.NN
+        self.U0 = self.U0
+        self.hill_height = self.hill_height
+        self.hill_width = self.hill_width
+        self.ridge_wavelength = self.ridge_wavelength
+
+        self.R_gas = 287.4
+        self.gamm = 1.4
+        self.T_ref = 300.0
+        self.p_ref = 1e5
+        self.h_ref = 10000.0  # [m]
+        self.t_ref = 1000.0  # [s]
+        self.u_ref = self.h_ref / self.t_ref  # 10 m/s
+
+        self.Msq = self.u_ref * self.u_ref / (self.R_gas * self.T_ref)
+
+        self.gravity_direction = 1
+        self.gravity_strength = np.zeros(3)
+        self.gravity_strength[1] = self.grav * self.h_ref / (self.R_gas * self.T_ref)
+        self.i_gravity = np.zeros(3)
+        self.i_gravity[1] = 1
+
+        # x: +-50 km periodic (h(+-50 km) ~ e^-100 h0, seam-safe);
+        # y: 0..19.6 km, sponge above ~11.6 km (two vertical wavelengths
+        # 2 pi U / N = 6.3 km below it)
+        self.xmin = -50000.0 / self.h_ref
+        self.xmax = 50000.0 / self.h_ref
+        self.ymin = 0.0
+        self.ymax = 19600.0 / self.h_ref
+        self.zmin = 0.0
+        self.zmax = 1.0
+
+        self.u_wind_speed = self.U0 / self.u_ref
+        self.v_wind_speed = 0.0
+        self.w_wind_speed = 0.0
+
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.PERIODIC
+        self.bdry_type[1] = opts.BdryType.WALL  # switched to RAYLEIGH in sol_init
+        self.bdry_type[2] = opts.BdryType.PERIODIC
+
+        self.rayleigh_bdry_switch = True
+        self.rayleigh_forcing = False
+
+        self.is_compressible = 1
+        self.is_nonhydrostatic = 1
+
+        ##########################################
+        # NUMERICS
+        ##########################################
+        self.CFL = 0.9
+        self.dtfixed = 12.5 / self.t_ref  # U dt / dx ~ 0.32
+        self.dtfixed0 = 12.5 / self.t_ref
+
+        self.inx = 256 + 1  # dx ~ 390 m (lambda / dx ~ 10)
+        self.iny = 64 + 1  # dy ~ 306 m (lambda_z / dy ~ 21)
+        self.inz = 1  # NATIVE 2D (lap2D terrain path)
+
+        # sponge: cells above ~11.6 km
+        self.inbcy = 26
+
+        # SLEVE decay scales (Schär et al. 2002): s1 = 15 km, s2 = 2.5 km
+        self.vertical_transform = terrain.SLEVETransform(
+            s1=15000.0 / self.h_ref, s2=2500.0 / self.h_ref
+        )
+
+        # regression run: 300 s spin-up (deterministic gate — exercises the
+        # native-2D terrain BC, metric advection, SLEVE elliptic tensor,
+        # sponge); the analytic oracle integrates further on a reduced grid
+        self.tout = [1e6]
+        self.stepmax = 24
+
+        self.stratification = self.stratification_function
+        self.orography = self.orography_function
+        self.orography_grad = (self.orography_dx, self.orography_zero_grad)
+        self.orography_smooth = self.orography_smooth_function
+        self.orography_smooth_grad = (
+            self.orography_smooth_dx,
+            self.orography_zero_grad,
+        )
+        self.output_timesteps = True
+
+        self.diag = True
+        self.diag_updt_targets = False
+
+        self.output_base_name = "_schaer_ridge"
+        self.output_type = "test" if not self.diag_updt_targets else "target"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.iny - 1)
+        self.autogen_fn = False
+
+        self.diag_state = DiagnosticState(
+            test_name="test_schaer_ridge",
+            file_name="target_schaer_ridge",
+            Nx=self.inx - 1,
+            Ny=self.iny - 1,
+            steps=[self.stepmax - 1],
+            plot_compare=True,
+            # terrain elliptic solves amplify cross-platform rounding: first
+            # CI run (2026-06-11) deviated 2.4e-5 on rhou from the locally
+            # generated target (local same-machine scatter is ~1e-7). Gate
+            # well above platform noise; physics is guarded by the linear
+            # FFT oracle + SLEVE discriminator.
+            tolerances={k: 5e-4 for k in _COMPARED_FIELDS},
+        )
+
+    def stratification_function(self, y):
+        # constant buoyancy frequency: theta = exp(N^2 z / g)
+        Nsq = (self.NN * self.t_ref) ** 2
+        g = self.gravity_strength[1] / self.Msq
+        return np.exp(Nsq * y / g)
+
+    # --- orography: total, smooth part, analytic gradients (nondim) ---------
+
+    def _scales(self):
+        h0 = self.hill_height / self.h_ref
+        a = self.hill_width / self.h_ref
+        lam = self.ridge_wavelength / self.h_ref
+        return h0, a, lam
+
+    def orography_function(self, xi1, xi2):
+        h0, a, lam = self._scales()
+        return h0 * np.exp(-((xi1 / a) ** 2)) * np.cos(np.pi * xi1 / lam) ** 2 + (
+            0.0 * xi2
+        )
+
+    def orography_dx(self, xi1, xi2):
+        h0, a, lam = self._scales()
+        env = np.exp(-((xi1 / a) ** 2))
+        return h0 * env * (
+            -(2.0 * xi1 / a**2) * np.cos(np.pi * xi1 / lam) ** 2
+            - (np.pi / lam) * np.sin(2.0 * np.pi * xi1 / lam)
+        ) + (0.0 * xi2)
+
+    def orography_smooth_function(self, xi1, xi2):
+        h0, a, _ = self._scales()
+        return 0.5 * h0 * np.exp(-((xi1 / a) ** 2)) + 0.0 * xi2
+
+    def orography_smooth_dx(self, xi1, xi2):
+        h0, a, _ = self._scales()
+        return -h0 * xi1 / a**2 * np.exp(-((xi1 / a) ** 2)) + 0.0 * xi2
+
+    @staticmethod
+    def orography_zero_grad(xi1, xi2):
+        return 0.0 * xi1 + 0.0 * xi2
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    if getattr(ud, "rayleigh_bdry_switch", False):
+        ud.bdry_type[axes.vertical_axis(ud)] = opts.BdryType.RAYLEIGH
+        ud.tcy, ud.tny = bdry_r.get_tau_y(ud, elem, node, 0.5)
+
+    # constant-N background at physical height via fine-grid quadrature
+    hydrostatics.integrated_state(npf, elem, node, th, ud)
+
+    S0c = npf.HydroState.get_S0c(elem)
+    rhoY0 = npf.HydroState.rhoY0  # field mode: full per-column fields
+
+    Sol.rhoY[...] = rhoY0
+    Sol.rho[...] = rhoY0 * S0c
+    Sol.rhou[...] = Sol.rho * ud.u_wind_speed
+    Sol.rhov[...] = 0.0
+    Sol.rhow[...] = 0.0
+    Sol.rhoX[...] = Sol.rho * (Sol.rho / Sol.rhoY - S0c)
+
+    # hydrostatically balanced background: zero perturbation pressure
+    npf.p2_nodes[...] = 0.0
+
+    return Sol
diff --git a/src/pybella/tests/test_straka.py b/src/pybella/tests/test_straka.py
new file mode 100644
index 00000000..33b6dd40
--- /dev/null
+++ b/src/pybella/tests/test_straka.py
@@ -0,0 +1,134 @@
+"""Straka density current (Straka et al. 1993).
+
+Cold-bubble benchmark: a -15 K temperature anomaly dropped into a neutrally
+stratified (theta = 300 K), hydrostatically balanced atmosphere collapses,
+spreads along the ground and rolls up into Kelvin-Helmholtz rotors. The
+benchmark prescribes a fixed physical viscosity/diffusivity K = 75 m^2/s on
+velocity and potential temperature so the solution converges with
+resolution; this case is the reason ``flow_solver/numerics/diffusion.py``
+exists (``ud.diffusion`` flag).
+
+Setup (dimensional): domain 51.2 km x 6.4 km, free-slip walls all around,
+dx = dz = 200 m, dt = 4 s fixed, run to t = 900 s. Anomaly centred at
+x = 0 km, z = 3 km with radii (4 km, 2 km):
+
+    T' = -15/2 * (1 + cos(pi * r))  [K]   for  r <= 1,
+    theta' = T' / pi_bar(z)               (perturbation at fixed pressure).
+
+Non-dimensionalisation: h_ref = 10 km, t_ref = 1000 s (u_ref = 10 m/s),
+T_ref = 300 K, p_ref = 1e5 Pa; K* = K t_ref / h_ref^2 = 7.5e-4.
+
+This is the suite's only nonlinear, advection-dominated gravity+wall case
+(regression_harness.md gap #4). It supersedes the legacy
+``rising_bubble_cold`` IC.
+"""
+
+import numpy as np
+
+from ..flow_solver.physics import hydrostatics
+from ..utils import options as opts
+from ..utils.data_structures import DiagnosticState
+
+
+class UserData(object):
+    def __init__(self):
+        self.grav = 9.81  # [m/s^2]
+        self.t_ref = 1000.0  # [s]
+        self.T_ref = 300.0  # [K]
+        self.h_ref = 10000.0  # [m]
+        self.p_ref = 1e5  # [Pa]
+
+        self.xmin = -2.56  # [-25.6 km]
+        self.xmax = 2.56
+        self.ymin = 0.0
+        self.ymax = 0.64  # [6.4 km]
+
+        # free-slip walls all around, faithful to Straka et al. (1993).
+        # x-WALLs were broken until the axial-agnosticity boundary fixes
+        # (the nodal-divergence wall zeroing was vertical-axis-only and the
+        # wall-normal momentum mirror was hardcoded to rhov); this case now
+        # exercises that path. With the domain at +-25.6 km the fronts
+        # (~15.5 km at t=900s) stay well clear of the boundary either way.
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.WALL
+        self.bdry_type[1] = opts.BdryType.WALL
+        self.bdry_type[2] = opts.BdryType.WALL
+
+        ##########################################
+        # NUMERICS
+        ##########################################
+        self.CFL = 0.9  # cap only; dtfixed binds for |u| < 45 m/s
+        self.dtfixed = 0.004  # 4 s
+        self.dtfixed0 = 0.004
+
+        self.inx = 256 + 1  # dx = 200 m
+        self.iny = 32 + 1  # dy = 200 m
+        self.inz = 1
+
+        self.tout = [0.9]  # 900 s
+        # exactly 225 steps (000..224); strip_target_file keeps step stepmax-1
+        self.stepmax = 225
+
+        self.is_compressible = 1
+
+        # Straka's fixed physical viscosity/diffusivity K = 75 m^2/s
+        self.diffusion = True
+        self.diffusion_coeff = 75.0 * self.t_ref / self.h_ref**2  # 7.5e-4
+
+        self.diag = True
+        self.diag_updt_targets = False
+
+        self.output_base_name = "_straka"
+        self.output_type = "test" if not self.diag_updt_targets else "target"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.iny - 1)
+
+        self.output_timesteps = True
+
+        self.diag_state = DiagnosticState(
+            test_name="test_straka",
+            file_name="target_straka",
+            Nx=self.inx - 1,
+            Ny=self.iny - 1,
+            steps=[self.stepmax - 1],
+        )
+
+        self.autogen_fn = False
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    u0 = ud.u_wind_speed
+    v0 = ud.v_wind_speed
+    w0 = ud.w_wind_speed
+
+    delT = -15.0  # [K] temperature anomaly amplitude
+    xc, yc = 0.0, 0.3  # centre: (0 km, 3 km)
+    xr, yr = 0.4, 0.2  # radii: (4 km, 2 km)
+
+    hydrostatics.integrated_state(npf, elem, node, th, ud)
+
+    x = elem.x
+    y = elem.y
+    x, y = np.meshgrid(x, y)
+
+    r = np.sqrt(((x - xc) / xr) ** 2 + ((y - yc) / yr) ** 2)
+
+    # temperature anomaly at fixed pressure -> theta' = T'/pi_bar
+    perturbation = (delT / ud.T_ref) * 0.5 * (np.cos(np.pi * r) + 1.0)
+    perturbation[np.where(r > 1.0)] = 0.0
+
+    rhoY = npf.HydroState.rhoY0[np.newaxis, :]
+    pi_bar = rhoY**th.gm1
+
+    rho = rhoY / (ud.stratification(y) + perturbation.T / pi_bar)
+
+    Sol.rho[...] = rho
+    Sol.rhou[...] = rho * u0
+    Sol.rhov[...] = rho * v0
+    Sol.rhow[...] = rho * w0
+    Sol.rhoY[...] = rhoY
+
+    # hydrostatically balanced background: zero perturbation pressure
+    npf.p2_nodes[...] = 0.0
+
+    return Sol
diff --git a/src/pybella/tests/test_swe_vortex.py b/src/pybella/tests/test_swe_vortex.py
new file mode 100644
index 00000000..7880e41e
--- /dev/null
+++ b/src/pybella/tests/test_swe_vortex.py
@@ -0,0 +1,276 @@
+import numpy as np
+
+from ..utils import options as opts
+from ..flow_solver.utils.boundary import node_boundary as bdry_n
+from ..flow_solver.physics import hydrostatics
+from ..flow_solver.numerics import implicit_euler
+from ..flow_solver.utils import cache
+
+from ..utils.data_structures import DiagnosticState
+
+
+class UserData(object):
+    """
+    Balanced shallow-water vortex — demonstration-grade SWE regression case.
+
+    Port of the legacy ``balanced_shallow_water_2D.py`` (git tag
+    ``archive/full_coriolis``). The shallow-water equations are run through
+    the gas-dynamics solver via the gamma = 2 equivalence::
+
+        rho  <->  fluid depth h
+        p    <->  g h^2 / 2
+        rhoY <->  p^(1/gamma) = sqrt(g/2) h
+
+    with Msq = 1 (h_ref = t_ref = T_ref = R_gas = 1) and zero vertical
+    gravity; the SWE gravity g enters only through the pressure law.
+
+    Deviations from the legacy case (kept to demo scope):
+
+    - grid reduced from 150x150 to 64x64, run shortened to t = 31000 s
+      (31 steps of dt = 1000 s),
+    - x and y boundaries set periodic (legacy used walls; the mirror-image
+      vortex construction assumes periodicity),
+    - the nodal pressure is evaluated analytically on the node grid instead
+      of cubic scattered-data interpolation of the cell field, so the golden
+      master is independent of the scipy ``griddata`` implementation.
+    """
+
+    grav = 0.0
+    omega = 0.0
+
+    R_gas = 1.0
+    gamm = 2.0
+
+    h_ref = 1.0
+    t_ref = 1.0
+    T_ref = 1.0
+    p_ref = 1.0
+
+    def __init__(self):
+        self.grav = self.grav
+        self.omega = self.omega
+        self.R_gas = self.R_gas
+        self.gamm = self.gamm
+        self.h_ref = self.h_ref
+        self.t_ref = self.t_ref
+        self.T_ref = self.T_ref
+        self.p_ref = self.p_ref
+
+        self.xmin = -0.5e6
+        self.xmax = 0.5e6
+        self.ymin = -0.5e6
+        self.ymax = 0.5e6
+        self.zmin = -0.5
+        self.zmax = 0.5
+
+        self.u_wind_speed = 0.0
+        self.v_wind_speed = 0.0
+        self.w_wind_speed = 0.0
+
+        # SWE gravitational acceleration; enters via the pressure law
+        # p = g h^2 / 2 only (the vertical gravity ``grav`` is zero).
+        self.g_swe = 9.81
+
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.PERIODIC
+        self.bdry_type[1] = opts.BdryType.PERIODIC
+        self.bdry_type[2] = opts.BdryType.WALL
+
+        ##########################################
+        # NUMERICS
+        ##########################################
+        self.CFL = 0.9 / 2.0
+        self.dtfixed = 1000.0
+        self.dtfixed0 = 1000.0
+
+        self.inx = 64 + 1
+        self.iny = 64 + 1
+        self.inz = 1
+
+        self.initial_projection = True
+
+        # the run is stopped by the stepmax cap (31 steps of dt = 1000 s,
+        # i.e. t = 31000 s); tout is set beyond it as in the other tests.
+        self.tout = [1.0e6]
+        self.stepmax = 31
+
+        self.stratification = self.stratification_function
+        self.rhoe = self.rhoe_function
+        self.output_timesteps = True
+
+        self.diag = True
+        self.diag_updt_targets = False
+
+        self.output_base_name = "_swe_vortex"
+        self.output_type = "test" if not self.diag_updt_targets else "target"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.iny - 1)
+
+        self.diag_state = DiagnosticState(
+            test_name="test_swe_vortex",
+            file_name="target_swe_vortex",
+            Nx=self.inx - 1,
+            Ny=self.iny - 1,
+            steps=[self.stepmax - 1],
+        )
+
+        self.autogen_fn = False
+
+    def stratification_function(self, y):
+        if type(y) == float:
+            return 1.0
+        else:
+            return np.ones((y.shape))
+
+    def rhoe_function(self, rho, u, v, w, p, ud, th):
+        Msq = ud.compressibility * ud.Msq
+        gm1inv = th.gm1inv
+
+        return p * gm1inv + 0.5 * Msq * rho * (u**2 + v**2 + w**2)
+
+
+def _depth_field(xs, ys, xc, yc, xcm, ycm, R0, fac, Frsq):
+    """
+    Cyclostrophically balanced depth field h(r) for the vortex with the
+    tangential velocity profile uth = fac * (1 - r/R0)^6 * (r/R0)^6: the
+    polynomial below is the exact radial integral of uth^2 / (g r).
+    """
+    coe = np.zeros((13))
+    coe[0] = +1.0 / 12
+    coe[1] = -12.0 / 13
+    coe[2] = +33.0 / 7
+    coe[3] = -44.0 / 3
+    coe[4] = +495.0 / 16
+    coe[5] = -792.0 / 17
+    coe[6] = +154.0 / 3
+    coe[7] = -792.0 / 19
+    coe[8] = +99.0 / 4
+    coe[9] = -220.0 / 21
+    coe[10] = +3.0
+    coe[11] = -12.0 / 23
+    coe[12] = +1.0 / 24
+
+    xccs = np.zeros_like(xs)
+    yccs = np.zeros_like(ys)
+
+    xccs[...] = xc * (np.abs(xs - xc) < np.abs(xs - xcm))
+    xccs[...] += xcm * (np.abs(xs - xc) > np.abs(xs - xcm))
+
+    yccs[...] = yc * (np.abs(ys - yc) < np.abs(ys - ycm))
+    yccs[...] += ycm * (np.abs(ys - yc) > np.abs(ys - ycm))
+
+    r = np.sqrt((xs - xccs) ** 2 + (ys - yccs) ** 2)
+
+    rho = np.zeros_like(r)
+    for i in range(12, 24 + 1):
+        rho[...] += fac**2 * coe[i - 12] * (r / R0) ** i * (r < R0)
+
+    rho *= Frsq
+    rho = (rho - rho.max()) * (r < R0)
+    rho += 1.0
+
+    return rho, r, xccs, yccs
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    u0 = ud.u_wind_speed
+    v0 = ud.v_wind_speed
+    w0 = 0.0
+
+    rotdir = 1.0
+
+    R0 = 400000.0
+    fac = 1.0 * 1024.0
+    xc = 0.0
+    yc = 0.0
+
+    g = ud.g_swe
+    Frsq = 1.0 / g
+
+    xcm = xc - (ud.xmax - ud.xmin)
+    ycm = yc - (ud.ymax - ud.ymin)
+
+    igs = elem.igs
+    igy = igs[1]
+
+    igxn = node.igx
+    igyn = node.igy
+
+    hydrostatics.integrated_state(npf, elem, node, th, ud)
+
+    # cell-centred depth (rho), radius and vortex centres
+    xs = elem.x.reshape(-1, 1)
+    ys = elem.y[igy:-igy].reshape(1, -1)
+    rho, r, xccs, yccs = _depth_field(xs, ys, xc, yc, xcm, ycm, R0, fac, Frsq)
+
+    uth = (rotdir * fac * (1.0 - r / R0) ** 6 * (r / R0) ** 6) * (r < R0)
+
+    u = u0 + uth * (-(ys - yccs) / r)
+    v = v0 + uth * (+(xs - xccs) / r)
+    w = w0
+
+    Sol.rho[:, igy:-igy] = rho
+    Sol.rhou[:, igy:-igy] = rho * u
+    Sol.rhov[:, igy:-igy] = rho * v
+    Sol.rhow[:, igy:-igy] = rho * w
+
+    # shallow-water pressure law p = g h^2 / 2; rhoY = p^(1/gamma)
+    p = g / 2.0 * rho**2
+    Sol.rhoY[:, igy:-igy] = p**th.gamminv
+
+    # nodal pressure: same analytic depth field evaluated on the node grid
+    xs_n = node.x[igxn:-igxn].reshape(-1, 1)
+    ys_n = node.y[igyn:-igyn].reshape(1, -1)
+    rho_n, _, _, _ = _depth_field(xs_n, ys_n, xc, yc, xcm, ycm, R0, fac, Frsq)
+
+    p_n = g / 2.0 * rho_n**2
+    npf.p2_nodes[igxn:-igxn, igyn:-igyn] = p_n**th.gamminv
+    bdry_n.set_ghost_nodes(npf.p2_nodes, node, ud)
+
+    ud.nonhydrostasy = float(ud.is_nonhydrostatic)
+    ud.compressibility = float(ud.is_compressible)
+
+    if ud.initial_projection == True:
+        is_compressible = np.copy(ud.is_compressible)
+        compressibility = np.copy(ud.compressibility)
+        ud.is_compressible = 0
+        ud.compressibility = 0.0
+
+        p2aux = np.copy(npf.p2_nodes)
+
+        Sol.rhou -= u0 * Sol.rho
+        Sol.rhov -= v0 * Sol.rho
+
+        mem = obj()
+        mem.sol = Sol
+        mem.npf = npf
+        mem.elem = elem
+        mem.node = node
+        mem.th = th
+        mem.time = obj()
+        mem.time.t = ud.dtfixed
+        mem.time.step = 0
+        mem.cache = cache.FlowSolverCache()
+
+        implicit_euler.do_implicit_part(
+            mem, ud, ud.dtfixed, writer=None, label="initial_projection"
+        )
+
+        npf.p2_nodes[...] = p2aux
+        npf.dp2_nodes[...] = 0.0
+
+        Sol.rhou += u0 * Sol.rho
+        Sol.rhov += v0 * Sol.rho
+
+        ud.is_compressible = is_compressible
+        ud.compressibility = compressibility
+
+    return Sol
+
+
+def T_from_p_rho(p, rho):
+    return np.divide(p, rho)
+
+
+class obj(object):
+    pass
diff --git a/src/pybella/tests/test_travelling_vortex_3d_coriolis.py b/src/pybella/tests/test_travelling_vortex_3d_coriolis.py
new file mode 100644
index 00000000..d36c1d66
--- /dev/null
+++ b/src/pybella/tests/test_travelling_vortex_3d_coriolis.py
@@ -0,0 +1,310 @@
+"""Travelling vortex in the horizontal (x-z) plane with full Coriolis force.
+
+Port of the legacy ``travelling_vortex_3D_Coriolis`` initial condition
+(recoverable from the git tag ``archive/full_coriolis``,
+``RKLM_Python/inputs/travelling_vortex_3D_Coriolis.py``) onto the current API.
+
+Relative to the 2D travelling vortex (``test_travelling_vortex``), the legacy
+case changes:
+
+- the vortex lives in the x-z plane (u and w carry the swirl, v = 0) on a
+  quasi-2D 64 x 1 x 64 grid over the unit cube,
+- a nonzero rotation rate ``omega`` gives ``coriolis_strength = [100, 0, 100]``
+  (omega * t_ref on the first and third components),
+- the cyclostrophic pressure balance gains a Coriolis correction: a second
+  polynomial (``ccoe``, exponents 7..25) scaled by f = coriolis_strength[0] is
+  added to the centrifugal polynomial (``coe``, exponents 12..36),
+- rho0 = del_rho = 0.5, no background wind, pseudo-incompressible regime,
+  periodic boundaries in all three directions.
+
+This case exercises the full-3D (``inz > 1``) implicit/elliptic solver path
+(27-point Laplacian, ``utils/operators/laplacian/lap3D.py``) on a quasi-2D
+grid; being y-uniform, its elliptic solve is validated against the 2D solver
+to ~1e-10.
+"""
+
+import numpy as np
+
+from ..utils import options as opts
+from ..flow_solver.physics import hydrostatics
+from ..flow_solver.numerics import implicit_euler
+from ..flow_solver.utils import cache
+
+from ..utils.data_structures import DiagnosticState
+
+
+class UserData(object):
+    grav = 0.0
+    omega = 0.01 * 100.0  # => coriolis_strength[0] = [2] = omega * t_ref = 100.0
+
+    h_ref = 10000.0
+    t_ref = 100.0
+    T_ref = 300.00
+    p_ref = 1e5
+
+    def __init__(self):
+        self.h_ref = self.h_ref
+        self.t_ref = self.t_ref
+        self.T_ref = self.T_ref
+        self.p_ref = self.p_ref
+        self.grav = self.grav
+        self.omega = self.omega
+
+        self.xmin = 0.0
+        self.xmax = 1.0
+        self.ymin = 0.0
+        self.ymax = 1.0
+        self.zmin = 0.0
+        self.zmax = 1.0
+
+        self.u_wind_speed = 0.0
+        self.v_wind_speed = 0.0
+        self.w_wind_speed = 0.0
+
+        self.bdry_type = np.empty((3), dtype=object)
+        self.bdry_type[0] = opts.BdryType.PERIODIC
+        self.bdry_type[1] = opts.BdryType.PERIODIC
+        self.bdry_type[2] = opts.BdryType.PERIODIC
+
+        # legacy case runs in the pseudo-incompressible regime
+        self.is_compressible = 0
+
+        ##########################################
+        # NUMERICS
+        ##########################################
+        self.CFL = 0.95
+        # legacy file used dtfixed = 2.1 * 1.200930e-2; a round 0.01 keeps the
+        # regression run at exactly 100 steps (000..099) to tout = 1.0
+        self.dtfixed = 0.01
+        self.dtfixed0 = 0.01
+
+        self.inx = 64 + 1
+        self.iny = 1 + 1
+        self.inz = 64 + 1
+
+        self.initial_projection = True
+
+        self.tout = [1.0]
+        self.stepmax = 100
+
+        self.stratification = self.stratification_function
+        self.rhoe = self.rhoe_function
+        self.output_timesteps = True
+
+        self.diag = True
+        self.diag_updt_targets = False
+
+        self.output_base_name = "_travelling_vortex_3d_coriolis"
+        self.output_type = "test" if not self.diag_updt_targets else "target"
+        self.aux = ""
+        self.output_suffix = "_%i_%i" % (self.inx - 1, self.iny - 1)
+
+        self.diag_state = DiagnosticState(
+            test_name="test_travelling_vortex_3d_coriolis",
+            file_name="target_travelling_vortex_3d_coriolis",
+            Nx=self.inx - 1,
+            Ny=self.iny - 1,
+            steps=[self.stepmax - 1],
+            # 3D fields are contour-plotted as the transverse (y) mid-slice
+            plot_compare=True,
+        )
+
+        self.autogen_fn = False
+
+    def stratification_function(self, y):
+        if type(y) == float:
+            return 1.0
+        else:
+            return np.ones((y.shape))
+
+    def rhoe_function(self, rho, u, v, w, p, ud, th):
+        Msq = ud.compressibility * ud.Msq
+        gm1inv = th.gm1inv
+
+        return p * gm1inv + 0.5 * Msq * rho * (u**2 + v**2 + w**2)
+
+
+def sol_init(Sol, npf, elem, node, th, ud, seed=None):
+    u0 = ud.u_wind_speed
+    v0 = ud.v_wind_speed
+    w0 = ud.w_wind_speed
+
+    rotdir = 1.0
+
+    a_rho = 1.0
+    rho0 = a_rho * 0.5
+    del_rho = a_rho * 0.5
+    R0 = 0.4
+    fac = 1.0 * 1024.0
+    xc = 0.5
+    zc = 0.5
+
+    # Coriolis parameter entering the gradient-wind pressure balance
+    f = ud.coriolis_strength[0]
+
+    igs = elem.igs
+
+    hydrostatics.integrated_state(npf, elem, node, th, ud)
+
+    # centrifugal part of the balanced pressure (exponents 12..36)
+    coe = np.zeros((25))
+    coe[0] = 1.0 / 12.0
+    coe[1] = -12.0 / 13.0
+    coe[2] = 9.0 / 2.0
+    coe[3] = -184.0 / 15.0
+    coe[4] = 609.0 / 32.0
+    coe[5] = -222.0 / 17.0
+    coe[6] = -38.0 / 9.0
+    coe[7] = 54.0 / 19.0
+    coe[8] = 783.0 / 20.0
+    coe[9] = -558.0 / 7.0
+    coe[10] = 1053.0 / 22.0
+    coe[11] = 1014.0 / 23.0
+    coe[12] = -1473.0 / 16.0
+    coe[13] = 204.0 / 5.0
+    coe[14] = 510.0 / 13.0
+    coe[15] = -1564.0 / 27.0
+    coe[16] = 153.0 / 8.0
+    coe[17] = 450.0 / 29.0
+    coe[18] = -269.0 / 15.0
+    coe[19] = 174.0 / 31.0
+    coe[20] = 57.0 / 32.0
+    coe[21] = -74.0 / 33.0
+    coe[22] = 15.0 / 17.0
+    coe[23] = -6.0 / 35.0
+    coe[24] = 1.0 / 72.0
+
+    # Coriolis part of the balanced pressure (exponents 7..25)
+    ccoe = np.zeros((19))
+    ccoe[0] = 1.0 / 7.0
+    ccoe[1] = -3.0 / 4.0
+    ccoe[2] = 4.0 / 3.0
+    ccoe[3] = -1.0 / 5.0
+    ccoe[4] = -45.0 / 22.0
+    ccoe[5] = 3.0 / 4.0
+    ccoe[6] = 9.0 / 2.0
+    ccoe[7] = -36.0 / 7.0
+    ccoe[8] = -11.0 / 5.0
+    ccoe[9] = 55.0 / 8.0
+    ccoe[10] = -33.0 / 17.0
+    ccoe[11] = -4.0
+    ccoe[12] = 58.0 / 19.0
+    ccoe[13] = 3.0 / 5.0
+    ccoe[14] = -10.0 / 7.0
+    ccoe[15] = 4.0 / 11.0
+    ccoe[16] = 9.0 / 46.0
+    ccoe[17] = -1.0 / 8.0
+    ccoe[18] = 1.0 / 50.0
+
+    xcm = xc - (ud.xmax - ud.xmin)
+    zcm = zc - (ud.zmax - ud.zmin)
+
+    # cell-centred radius in the x-z plane, broadcast over y
+    xs = elem.x.reshape(-1, 1, 1)
+    zs = elem.z.reshape(1, 1, -1)
+    xccs = np.zeros_like(xs)
+    zccs = np.zeros_like(zs)
+
+    xccs[...] = xc * (np.abs(xs - xc) < np.abs(xs - xcm))
+    xccs[...] += xcm * (np.abs(xs - xc) > np.abs(xs - xcm))
+
+    zccs[...] = zc * (np.abs(zs - zc) < np.abs(zs - zcm))
+    zccs[...] += zcm * (np.abs(zs - zc) > np.abs(zs - zcm))
+
+    r = np.sqrt((xs - xccs) ** 2 + (zs - zccs) ** 2)
+
+    uth = (rotdir * fac * (1.0 - r / R0) ** 6 * (r / R0) ** 6) * (r < R0)
+
+    u = u0 + uth * (-(zs - zccs) / r)
+    v = v0 + np.zeros_like(r)
+    w = w0 + uth * (+(xs - xccs) / r)
+
+    rho = np.zeros_like(r)
+    rho[...] += (rho0 + del_rho * (1.0 - (r / R0) ** 2) ** 6) * (r < R0)
+    rho[...] += rho0 * (r >= R0)
+
+    # broadcast the (icx, 1, icz) slabs over the full y extent (incl. ghosts),
+    # as the legacy file did via np.repeat
+    Sol.rho[...] = rho
+    Sol.rhou[...] = rho * u
+    Sol.rhov[...] = rho * v
+    Sol.rhow[...] = rho * w
+
+    # neutral stratification, pseudo-incompressible: rhoY = 1 everywhere
+    # (the legacy file left npf.p2_cells unset)
+    Sol.rhoY[...] = 1.0
+
+    # nodal balanced pressure
+    xs = node.x[igs[0] : -igs[0]].reshape(-1, 1, 1)
+    zs = node.z[igs[2] : -igs[2]].reshape(1, 1, -1)
+    xccs = np.zeros_like(xs)
+    zccs = np.zeros_like(zs)
+
+    xccs[np.where(np.abs(xs - xc) < np.abs(xs - xcm))] = xc
+    xccs[np.where(np.abs(xs - xc) >= np.abs(xs - xcm))] = xcm
+
+    zccs[np.where(np.abs(zs - zc) < np.abs(zs - zcm))] = zc
+    zccs[np.where(np.abs(zs - zc) >= np.abs(zs - zcm))] = zcm
+
+    r = np.sqrt((xs - xccs) ** 2 + (zs - zccs) ** 2)
+
+    i2 = tuple(slice(igs[dim], -igs[dim]) for dim in range(elem.ndim))
+
+    p2n = np.zeros_like(r)
+    for ip in range(25):
+        p2n += fac * (a_rho * coe[ip] * ((r / R0) ** (12 + ip) - 1.0) * rotdir**2)
+    for ip in range(19):
+        p2n += f * ccoe[ip] * ((r / R0) ** (7 + ip) - 1.0)
+    p2n *= r / R0 < 1.0
+
+    npf.p2_nodes[i2] = th.Gamma * fac * p2n
+
+    ud.nonhydrostasy = float(ud.is_nonhydrostatic)
+    ud.compressibility = float(ud.is_compressible)
+
+    if ud.initial_projection == True:
+        is_compressible = np.copy(ud.is_compressible)
+        compressibility = np.copy(ud.compressibility)
+        ud.is_compressible = 0
+        ud.compressibility = 0.0
+
+        p2aux = np.copy(npf.p2_nodes)
+
+        Sol.rhou -= u0 * Sol.rho
+        Sol.rhov -= v0 * Sol.rho
+        Sol.rhow -= w0 * Sol.rho
+
+        mem = obj()
+        mem.sol = Sol
+        mem.npf = npf
+        mem.elem = elem
+        mem.node = node
+        mem.th = th
+        mem.time = obj()
+        mem.time.t = ud.dtfixed
+        mem.time.step = 0
+        mem.cache = cache.FlowSolverCache()
+
+        implicit_euler.do_implicit_part(
+            mem, ud, ud.dtfixed, writer=None, label="initial_projection"
+        )
+
+        npf.p2_nodes[...] = p2aux
+        npf.dp2_nodes[...] = 0.0
+
+        Sol.rhou += u0 * Sol.rho
+        Sol.rhov += v0 * Sol.rho
+        Sol.rhow += w0 * Sol.rho
+
+        ud.is_compressible = is_compressible
+        ud.compressibility = compressibility
+
+    return Sol
+
+
+def T_from_p_rho(p, rho):
+    return np.divide(p, rho)
+
+
+class obj(object):
+    pass
diff --git a/src/pybella/utils/axes.py b/src/pybella/utils/axes.py
new file mode 100644
index 00000000..8945dd3e
--- /dev/null
+++ b/src/pybella/utils/axes.py
@@ -0,0 +1,129 @@
+"""Axis geometry for the axial-agnosticity refactor — single source of truth.
+
+The solver's dynamics is written in **role space** ``(h1, v, h2)`` — first
+horizontal, vertical, second horizontal. The cyclic permutation
+
+    role_perm(v) = ((v - 1) % 3, v, (v + 1) % 3)
+
+maps roles onto array axes; ``v = 1`` (today's y-vertical convention) gives
+the identity, so all existing configurations bind unchanged.
+
+Only *cyclic* (even) permutations are allowed: the rotation vector is a
+pseudovector, so odd axis swaps would flip every Coriolis cross-term sign.
+This is why the vertical choice selects a cyclic role layout instead of an
+arbitrary one.
+
+Everything that needs to know "which axis is vertical" must consume this
+module rather than hardcoding axis 1.
+"""
+
+import numpy as np
+
+VERTICAL_DEFAULT = 1
+
+# axis-indexed (NOT role-indexed) component names
+MOMENTA = ("rhou", "rhov", "rhow")
+VELOCITIES = ("u", "v", "w")
+
+
+def vertical_axis(ud):
+    """The vertical/gravity array axis, validated in {0, 1, 2}."""
+    v = int(getattr(ud, "gravity_direction", VERTICAL_DEFAULT))
+    if v not in (0, 1, 2):
+        raise ValueError(f"gravity_direction must be 0, 1 or 2, got {v}")
+    return v
+
+
+def role_perm(v):
+    """Roles -> axes: (axis of h1, axis of v, axis of h2). v=1 -> (0, 1, 2)."""
+    return ((v - 1) % 3, v, (v + 1) % 3)
+
+
+def role_of_axis(v):
+    """Axes -> roles: inverse of role_perm. role_of_axis(v)[axis] = role index."""
+    perm = role_perm(v)
+    inv = [0, 0, 0]
+    for role, axis in enumerate(perm):
+        inv[axis] = role
+    return tuple(inv)
+
+
+def horizontal_axes(v):
+    """The (h1, h2) axis pair for vertical v."""
+    perm = role_perm(v)
+    return (perm[0], perm[2])
+
+
+def role_attrs(attrs, v):
+    """Reorder an axis-indexed 3-tuple (e.g. MOMENTA) into role order."""
+    perm = role_perm(v)
+    return tuple(attrs[axis] for axis in perm)
+
+
+def vertical_momentum(ud):
+    return MOMENTA[vertical_axis(ud)]
+
+
+def vertical_velocity(ud):
+    return VELOCITIES[vertical_axis(ud)]
+
+
+def coords_along(grid_obj, axis):
+    """Coordinate array of a SpaceDiscr-like object along an axis."""
+    return (grid_obj.x, grid_obj.y, grid_obj.z)[axis]
+
+
+def extent_along(grid_obj, axis):
+    """(cell count incl. ghosts, ghost count, spacing) along an axis."""
+    return (grid_obj.sc[axis], grid_obj.igs[axis], grid_obj.dxyz[axis])
+
+
+def wall_slabs(ndim, axis, depth=2):
+    """Index tuples selecting the low/high boundary slabs along an axis.
+
+    wall_slabs(3, 1) -> ((:, :2, :), (:, -2:, :)) as slice tuples — the
+    generalisation of the hardcoded ``[:, :2, ...]`` / ``[:, -2:, ...]``.
+    """
+    lo = [slice(None)] * ndim
+    hi = [slice(None)] * ndim
+    lo[axis] = slice(None, depth)
+    hi[axis] = slice(-depth, None)
+    return tuple(lo), tuple(hi)
+
+
+def expand_profile(profile_1d, ndim, vaxis, counts):
+    """Broadcast a 1D vertical profile to the full grid by repetition.
+
+    Reproduces the legacy ``for dim in range(0, ndim, 2): expand_dims +
+    repeat`` construction exactly for vaxis == 1 (ascending non-vertical
+    dims), generalised to any vertical axis. ``counts[dim]`` is the target
+    size along each non-vertical dim (e.g. ``elem.sc``).
+    """
+    out = profile_1d
+    for dim in range(ndim):
+        if dim == vaxis:
+            continue
+        out = np.expand_dims(out, dim)
+        out = np.repeat(out, counts[dim], axis=dim)
+    return out
+
+
+def degenerate_axes(node):
+    """Axes with a single interior cell layer (quasi-2D broadcast targets)."""
+    return [dim for dim in range(node.ndim) if node.iisc[dim] == 2]
+
+
+def permute_axes(arr, sigma):
+    """Move reference axis i to twin axis sigma[i] (for permutation oracles)."""
+    n = arr.ndim
+    return np.moveaxis(arr, list(range(n)), list(sigma[:n]))
+
+
+def validate(ud, ndim):
+    """2D runs are x-y by convention: the vertical must be axis 1."""
+    v = vertical_axis(ud)
+    if ndim == 2 and v != 1:
+        raise ValueError(
+            f"2D runs require gravity_direction == 1 (x-y plane, y vertical); got {v}"
+        )
+    return v
diff --git a/src/pybella/utils/operators/divergence.py b/src/pybella/utils/operators/divergence.py
index 3e8af161..708c7b44 100644
--- a/src/pybella/utils/operators/divergence.py
+++ b/src/pybella/utils/operators/divergence.py
@@ -1,4 +1,5 @@
 import numba as nb
+from .. import axes
 from .. import options as opts
 from . import finite_difference
 
@@ -66,7 +67,9 @@ def compute_3d_components(u_field, v_field, w_field, dx, dy, dz):
     div_x = finite_difference.do_1d(u_field, dx, axis=0)
     # Average to y-cell centers, then to z-faces
     div_x = 0.5 * (div_x[:, :-1, :] + div_x[:, 1:, :])
-    div_x = -0.5 * (div_x[:, :, :-1] + div_x[:, :, 1:])  # Note: negative from original
+    # the legacy "-0.5" here was a sign error (introduced Oct 2021, archive
+    # commit 3661b9d); the divergence must be sign-symmetric in all dims
+    div_x = 0.5 * (div_x[:, :, :-1] + div_x[:, :, 1:])
 
     # Y-direction: ∂v/∂y
     div_y = finite_difference.do_1d(v_field, dy, axis=1)
@@ -116,23 +119,59 @@ def compute_at_nodes(rhs, elem, sol, ud):
     """Main divergence function - handles boundary conditions and calls JIT-compiled core."""
     ndim = elem.ndim
 
-    # Handle boundary conditions
+    # Handle boundary conditions: zero the momenta in the two boundary
+    # slabs of every WALL/RAYLEIGH axis (historically vertical-only, which
+    # left the x-WALL elliptic path broken). The slabs are the ghost
+    # layers, so with terrain this also zeroes the ghost contravariant
+    # fluxes (they are formed from the momenta) — the same wall treatment
+    # as the uniform-Cartesian path.
     if not hasattr(ud, "ATMOSPHERIC_EXTENSION"):
-        if (
-            ud.bdry_type[1] == opts.BdryType.WALL
-            or ud.bdry_type[1] == opts.BdryType.RAYLEIGH
-        ):
-            sol.rhou[:, :2, ...] = 0.0
-            sol.rhov[:, :2, ...] = 0.0
-            sol.rhow[:, :2, ...] = 0.0
-            sol.rhou[:, -2:, ...] = 0.0
-            sol.rhov[:, -2:, ...] = 0.0
-            sol.rhow[:, -2:, ...] = 0.0
+        for dim in range(ndim):
+            if (
+                ud.bdry_type[dim] == opts.BdryType.WALL
+                or ud.bdry_type[dim] == opts.BdryType.RAYLEIGH
+            ):
+                lo, hi = axes.wall_slabs(ndim, dim)
+                for field in (sol.rhou, sol.rhov, sol.rhow):
+                    field[lo] = 0.0
+                    field[hi] = 0.0
 
     # Call appropriate JIT-compiled function
     if ndim == 2:
-        rhs[:] = _momentum_pot_temp_divergence_2d_jit(
-            sol.rho, sol.rhou, sol.rhov, sol.rhoY, elem.dx, elem.dy
+        if elem.metric is not None:
+            # terrain: same contravariant/J-weighted construction as the 3D
+            # branch below, minus the second-horizontal leg (haxes = (0, None),
+            # vaxis = 1 — enforced by axes.validate in 2D)
+            m = elem.metric
+            f_h1, f_v = _metric_contravariant_fluxes_2d_jit(
+                sol.rho, sol.rhoY, sol.rhou, sol.rhov, m.J, m.G1
+            )
+            rhs[:] = compute_2d(f_h1, f_v, elem.dx, elem.dy)
+        else:
+            rhs[:] = _momentum_pot_temp_divergence_2d_jit(
+                sol.rho, sol.rhou, sol.rhov, sol.rhoY, elem.dx, elem.dy
+            )
+    elif elem.metric is not None:
+        # terrain: rhs = J grad.F with J-weighted horizontal fluxes and the
+        # contravariant vertical flux F_v - G1 F_h1 - G2 F_h2 (role space);
+        # the differencing stencils are unchanged
+        m = elem.metric
+        moms = (sol.rhou, sol.rhov, sol.rhow)
+        a_h1, a_h2 = m.haxes
+        f_h1, f_v, f_h2 = _metric_contravariant_fluxes_jit(
+            sol.rho,
+            sol.rhoY,
+            moms[a_h1],
+            moms[m.vaxis],
+            moms[a_h2],
+            m.J,
+            m.G1,
+            m.G2,
+        )
+        flux = [None, None, None]
+        flux[a_h1], flux[m.vaxis], flux[a_h2] = f_h1, f_v, f_h2
+        rhs[:, :, :] = compute_3d_sum(
+            flux[0], flux[1], flux[2], elem.dx, elem.dy, elem.dz
         )
     else:
         _momentum_pot_temp_divergence_3d_jit(
@@ -150,6 +189,39 @@ def compute_at_nodes(rhs, elem, sol, ud):
     return rhs
 
 
+@nb.njit(cache=True)
+def _metric_contravariant_fluxes_jit(rho, rhoY, mom_h1, mom_v, mom_h2, J, G1, G2):
+    """Terrain-following flux components of the theta-weighted momentum.
+
+    Role-ordered inputs/outputs (h1, v, h2). Returns the J-weighted
+    horizontal fluxes and the contravariant vertical flux
+
+        f_v = theta * (mom_v - G1 mom_h1 - G2 mom_h2)
+
+    such that the plain divergence of (J f_h1, f_v, J f_h2) equals
+    J grad.F in physical space.
+    """
+    theta = rhoY / rho
+    f_h1 = mom_h1 * theta
+    f_h2 = mom_h2 * theta
+    f_v = mom_v * theta - G1 * f_h1 - G2 * f_h2
+    return J * f_h1, f_v, J * f_h2
+
+
+@nb.njit(cache=True)
+def _metric_contravariant_fluxes_2d_jit(rho, rhoY, mom_h1, mom_v, J, G1):
+    """2D restriction of :func:`_metric_contravariant_fluxes_jit`.
+
+    Returns the J-weighted horizontal flux and the contravariant vertical
+    flux f_v = theta * (mom_v - G1 mom_h1) such that the plain 2D
+    divergence of (J f_h1, f_v) equals J grad.F in physical space.
+    """
+    theta = rhoY / rho
+    f_h1 = mom_h1 * theta
+    f_v = mom_v * theta - G1 * f_h1
+    return J * f_h1, f_v
+
+
 @nb.njit(cache=True)
 def _momentum_pot_temp_divergence_2d_jit(rho, rhou, rhov, rhoY, dx, dy):
     """
@@ -181,8 +253,6 @@ def _momentum_pot_temp_divergence_3d_jit(rhs, rho, rhou, rhov, rhow, rhoY, dx, d
     rhov_theta = rhov * theta  # y-momentum flux weighted by potential temperature
     rhow_theta = rhow * theta  # z-momentum flux weighted by potential temperature
 
-    # Use generic total divergence operator
-    total_div = compute_3d_sum(rhou_theta, rhov_theta, rhow_theta, dx, dy, dz)
-
-    # Assign to inner region
-    rhs[1:-1, 1:-1, 1:-1] = total_div
+    # Use generic total divergence operator; rhs is interior-sized (node.isc),
+    # which is exactly the shape the cell-array differences produce
+    rhs[:, :, :] = compute_3d_sum(rhou_theta, rhov_theta, rhow_theta, dx, dy, dz)
diff --git a/src/pybella/utils/operators/laplacian/lap2D_manual.py b/src/pybella/utils/operators/laplacian/lap2D_manual.py
index a17d4d4b..058b1812 100644
--- a/src/pybella/utils/operators/laplacian/lap2D_manual.py
+++ b/src/pybella/utils/operators/laplacian/lap2D_manual.py
@@ -4,6 +4,9 @@
 
 
 def get_linop(npf, node, coriolis, diag_inv, ud):
+    """2D (x-y plane) stencil operator: 2D runs are x-y with vertical = axis 1
+    by convention (enforced by axes.validate), so the y_wall/y_atmosphere
+    handling below IS the vertical-role handling."""
     dx = node.dx
     dy = node.dy
 
@@ -13,8 +16,12 @@ def get_linop(npf, node, coriolis, diag_inv, ud):
         y_atmosphere = False
 
     ###################
-    x_wall = ud.bdry_type[0] == opts.BdryType.WALL
-    y_wall = ud.bdry_type[1] == opts.BdryType.WALL
+    # RAYLEIGH is a sponged wall: lap3D, the divergence slab-zeroing and the
+    # node-value scaling all treat it as WALL — the 2D stencil must agree or
+    # a sponged top wraps periodically.
+    _wall = (opts.BdryType.WALL, opts.BdryType.RAYLEIGH)
+    x_wall = ud.bdry_type[0] in _wall
+    y_wall = ud.bdry_type[1] in _wall
 
     cor_slc = (slice(1, -1), slice(1, -1))
     coeff_slc = (slice(1, -1), slice(1, -1))
diff --git a/src/pybella/utils/operators/laplacian/lap3D.py b/src/pybella/utils/operators/laplacian/lap3D.py
index 5cd492f0..f1ae46fa 100644
--- a/src/pybella/utils/operators/laplacian/lap3D.py
+++ b/src/pybella/utils/operators/laplacian/lap3D.py
@@ -1,65 +1,106 @@
+import numpy as np
 import numba as nb
+from ... import options as opts
 
 
-def get_linop(elem, node, npf, ud, diag_inv, dt):
+def get_linop(elem, node, npf, ud, diag_inv, dt, cij):
+    """Build the full-tensor 27-point operator matvec on the node.isc box.
+
+    Discretises  diag_inv * [ sum_ij (1/(d_i d_j)) (1/16) D_i(C_ij F_j p)
+    + hcenter p ]  where F_j is the cell-averaged j-derivative, D_i the
+    cell-to-node i-difference, and C_ij the (axis-indexed) coefficient
+    fields (Gamma^-1 P Theta) * H^-1 — the same H^-1 the momentum
+    correction applies, making the elliptic operator consistent with it.
+    The legacy operator used bare diagonal coefficients plus hand-coded
+    x-z `corrf` cross terms; with H^-1 = identity (no rotation, no
+    buoyancy) this operator reproduces it exactly.
+
+    The solve vector is the C-order ravel of an array shaped node.isc
+    (interior nodes plus one ghost layer per side, [x, y, z]). The outer
+    ghost ring carries zero operator rows; ghost values are reconstructed
+    from periodicity inside the kernel.
+
+    cij: 3x3 nested sequence of full cell-shaped coefficient fields.
+    """
     oodxyz = node.dxyz
     oodxyz = 1.0 / (oodxyz**2)
     oodx2, oody2, oodz2 = oodxyz[0], oodxyz[1], oodxyz[2]
-    odx, odz = 1.0 / node.dx, 1.0 / node.dz
+    odx, ody, odz = 1.0 / node.dx, 1.0 / node.dy, 1.0 / node.dz
 
-    i0 = (slice(0, -1), slice(0, -1), slice(0, -1))
     i1 = (slice(1, -1), slice(1, -1), slice(1, -1))
-    i2 = (slice(2, -2), slice(2, -2), slice(2, -2))
 
     ndim = elem.ndim
-    periodicity = np.empty(ndim, dtype="int")
+    periodicity = np.empty(ndim, dtype="int64")
     for dim in range(ndim):
         periodicity[dim] = ud.bdry_type[dim] == opts.BdryType.PERIODIC
 
-    hplusx = npf.wplus[0][i0][i1]
-    hplusy = npf.wplus[1][i0][i1]
-    hplusz = npf.wplus[2][i0][i1]
+    # cell-valued coefficient boxes on the (isc - 1) cell box surrounding
+    # the node box; copied because the kernel zeroes wall slabs in place
+    C = [[np.ascontiguousarray(cij[i][j][i1]) for j in range(3)] for i in range(3)]
 
-    hcenter = npf.wcenter[i2]
-    diag_inv = diag_inv[i1]
+    # cross blocks only enter when H^-1 has off-diagonal content
+    use_cross = bool(
+        max(np.max(np.abs(C[i][j])) for i in range(3) for j in range(3) if i != j) > 0.0
+    )
 
-    corrf = dt * ud.coriolis_strength[0]
+    # unknowns: interior nodes of the box
+    hcenter = np.ascontiguousarray(npf.wcenter[i1])
+    diag_inv = np.ascontiguousarray(diag_inv)
 
+    # scipy's LinearOperator dtype probe passes an int8 vector; the cast is
+    # a no-copy view for the float64 vectors BiCGSTAB actually sends
     return lambda p: lap3D(
-        p,
-        hplusx,
-        hplusy,
-        hplusz,
+        np.asarray(p, dtype=np.float64),
+        C[0][0],
+        C[0][1],
+        C[0][2],
+        C[1][0],
+        C[1][1],
+        C[1][2],
+        C[2][0],
+        C[2][1],
+        C[2][2],
         hcenter,
         oodx2,
         oody2,
         oodz2,
-        periodicity,
-        diag_inv,
-        corrf,
         odx,
+        ody,
         odz,
+        periodicity,
+        diag_inv,
+        use_cross,
     )
 
 
 @nb.jit(nopython=True, cache=False, nogil=False)
 def lap3D(
     p0,
-    hplusx,
-    hplusy,
-    hplusz,
+    c00,
+    c01,
+    c02,
+    c10,
+    c11,
+    c12,
+    c20,
+    c21,
+    c22,
     hcenter,
     oodx2,
     oody2,
     oodz2,
-    periodicity,
-    diag_inv,
-    corrf,
     odx,
+    ody,
     odz,
+    periodicity,
+    diag_inv,
+    use_cross,
 ):
     shx, shy, shz = hcenter.shape
-    p = p0.reshape(shz + 2, shy + 2, shx + 2)
+    # p0 is the C-order ravel of an [x, y, z] box: reshape must keep that
+    # axis order. Copy so the periodic padding below never mutates the
+    # caller's (scipy's) vector.
+    p = p0.reshape((shx + 2, shy + 2, shz + 2)).copy()
 
     coeff = 1.0 / 16
     lap = np.zeros_like(p)
@@ -96,12 +137,9 @@ def lap3D(
             p[1, :, :] = p[-2, :, :]
             p[-2, :, :] = tmp
         elif bc == False and cnt == 0:
-            hplusx[0, :, :] = 0.0
-            hplusx[-1, :, :] = 0.0
-            hplusy[0, :, :] = 0.0
-            hplusy[-1, :, :] = 0.0
-            hplusz[0, :, :] = 0.0
-            hplusz[-1, :, :] = 0.0
+            for c in (c00, c01, c02, c10, c11, c12, c20, c21, c22):
+                c[0, :, :] = 0.0
+                c[-1, :, :] = 0.0
         if bc == True and cnt == 1:
             tmp = p[:, 1, :]
             p[:, 0, :] = p[:, -3, :]
@@ -109,12 +147,9 @@ def lap3D(
             p[:, 1, :] = p[:, -2, :]
             p[:, -2, :] = tmp
         elif bc == False and cnt == 1:
-            hplusx[:, 0, :] = 0.0
-            hplusx[:, -1, :] = 0.0
-            hplusy[:, 0, :] = 0.0
-            hplusy[:, -1, :] = 0.0
-            hplusz[:, 0, :] = 0.0
-            hplusz[:, -1, :] = 0.0
+            for c in (c00, c01, c02, c10, c11, c12, c20, c21, c22):
+                c[:, 0, :] = 0.0
+                c[:, -1, :] = 0.0
         if bc == True and cnt == 2:
             tmp = p[:, :, 1]
             p[:, :, 0] = p[:, :, -3]
@@ -122,28 +157,15 @@ def lap3D(
             p[:, :, 1] = p[:, :, -2]
             p[:, :, -2] = tmp
         elif bc == False and cnt == 2:
-            hplusx[:, :, 0] = 0.0
-            hplusx[:, :, -1] = 0.0
-            hplusy[:, :, 0] = 0.0
-            hplusy[:, :, -1] = 0.0
-            hplusz[:, :, 0] = 0.0
-            hplusz[:, :, -1] = 0.0
+            for c in (c00, c01, c02, c10, c11, c12, c20, c21, c22):
+                c[:, :, 0] = 0.0
+                c[:, :, -1] = 0.0
         cnt += 1
 
-    leftz = p[:, :, :-1]
-    rightz = p[:, :, 1:]
-
-    z_fluxes = rightz - leftz
-
-    lefty = p[:, :-1, :]
-    righty = p[:, 1:, :]
-
-    y_fluxes = righty - lefty
-
-    leftx = p[:-1, :, :]
-    rightx = p[1:, :, :]
-
-    x_fluxes = rightx - leftx
+    # cell-averaged directional differences F_j(p) on the cell box
+    x_fluxes = p[1:, :, :] - p[:-1, :, :]
+    y_fluxes = p[:, 1:, :] - p[:, :-1, :]
+    z_fluxes = p[:, :, 1:] - p[:, :, :-1]
 
     x_flx = (
         x_fluxes[toplefts[0]]
@@ -164,95 +186,85 @@ def lap3D(
         + z_fluxes[botrights[2]]
     )
 
-    hxzp = hplusx * z_flx
-    hxzpm = hxzp[:-1, :, :]
-    hxzpm = (
-        hxzpm[toplefts[0]]
-        + hxzpm[toprights[0]]
-        + hxzpm[botlefts[0]]
-        + hxzpm[botrights[0]]
-    )
-    hxzpp = hxzp[1:, :, :]
-    hxzpp = (
-        hxzpp[toplefts[0]]
-        + hxzpp[toprights[0]]
-        + hxzpp[botlefts[0]]
-        + hxzpp[botrights[0]]
-    )
+    # diagonal blocks: D_i(C_ii F_i), exactly the legacy structure
+    q = c00 * x_flx
+    qm = q[:-1, :, :]
+    x_flxm = qm[toplefts[0]] + qm[toprights[0]] + qm[botlefts[0]] + qm[botrights[0]]
+    qp = q[1:, :, :]
+    x_flxp = qp[toplefts[0]] + qp[toprights[0]] + qp[botlefts[0]] + qp[botrights[0]]
 
-    hzxp = hplusz * x_flx
-    hzxpm = hzxp[:, :, :-1]
-    hzxpm = (
-        hzxpm[toplefts[2]]
-        + hzxpm[toprights[2]]
-        + hzxpm[botlefts[2]]
-        + hzxpm[botrights[2]]
-    )
-    hzxpp = hzxp[:, :, 1:]
-    hzxpp = (
-        hzxpp[toplefts[2]]
-        + hzxpp[toprights[2]]
-        + hzxpp[botlefts[2]]
-        + hzxpp[botrights[2]]
-    )
-
-    x_flx = hplusx * x_flx
-    x_flxm = x_flx[:-1, :, :]
-    x_flxm = (
-        x_flxm[toplefts[0]]
-        + x_flxm[toprights[0]]
-        + x_flxm[botlefts[0]]
-        + x_flxm[botrights[0]]
-    )
-    x_flxp = x_flx[1:, :, :]
-    x_flxp = (
-        x_flxp[toplefts[0]]
-        + x_flxp[toprights[0]]
-        + x_flxp[botlefts[0]]
-        + x_flxp[botrights[0]]
-    )
+    q = c11 * y_flx
+    qm = q[:, :-1, :]
+    y_flxm = qm[toplefts[1]] + qm[toprights[1]] + qm[botlefts[1]] + qm[botrights[1]]
+    qp = q[:, 1:, :]
+    y_flxp = qp[toplefts[1]] + qp[toprights[1]] + qp[botlefts[1]] + qp[botrights[1]]
 
-    y_flx = hplusy * y_flx
-    y_flxm = y_flx[:, :-1, :]
-    y_flxm = (
-        y_flxm[toplefts[1]]
-        + y_flxm[toprights[1]]
-        + y_flxm[botlefts[1]]
-        + y_flxm[botrights[1]]
-    )
-    y_flxp = y_flx[:, 1:, :]
-    y_flxp = (
-        y_flxp[toplefts[1]]
-        + y_flxp[toprights[1]]
-        + y_flxp[botlefts[1]]
-        + y_flxp[botrights[1]]
-    )
-
-    z_flx = hplusz * z_flx
-    z_flxm = z_flx[:, :, :-1]
-    z_flxm = (
-        z_flxm[toplefts[2]]
-        + z_flxm[toprights[2]]
-        + z_flxm[botlefts[2]]
-        + z_flxm[botrights[2]]
-    )
-    z_flxp = z_flx[:, :, 1:]
-    z_flxp = (
-        z_flxp[toplefts[2]]
-        + z_flxp[toprights[2]]
-        + z_flxp[botlefts[2]]
-        + z_flxp[botrights[2]]
-    )
+    q = c22 * z_flx
+    qm = q[:, :, :-1]
+    z_flxm = qm[toplefts[2]] + qm[toprights[2]] + qm[botlefts[2]] + qm[botrights[2]]
+    qp = q[:, :, 1:]
+    z_flxp = qp[toplefts[2]] + qp[toprights[2]] + qp[botlefts[2]] + qp[botrights[2]]
 
     lap[1:-1, 1:-1, 1:-1] = (
         oodx2 * coeff * (-x_flxm + x_flxp)
         + oody2 * coeff * (-y_flxm + y_flxp)
         + oodz2 * coeff * (-z_flxm + z_flxp)
-        + +1.0 * odx * odz * coeff * corrf * (hxzpp - hxzpm)
-        + -1.0 * odx * odz * coeff * corrf * (hzxpp - hzxpm)
         + hcenter * p[1:-1, 1:-1, 1:-1]
     )
 
+    if use_cross:
+        cross = np.zeros_like(x_flxm)
+
+        # (i=0, j=1): D_x(C_01 F_y)
+        q = c01 * y_flx
+        qm = q[:-1, :, :]
+        qms = qm[toplefts[0]] + qm[toprights[0]] + qm[botlefts[0]] + qm[botrights[0]]
+        qp = q[1:, :, :]
+        qps = qp[toplefts[0]] + qp[toprights[0]] + qp[botlefts[0]] + qp[botrights[0]]
+        cross += odx * ody * coeff * (qps - qms)
+
+        # (i=0, j=2): D_x(C_02 F_z)
+        q = c02 * z_flx
+        qm = q[:-1, :, :]
+        qms = qm[toplefts[0]] + qm[toprights[0]] + qm[botlefts[0]] + qm[botrights[0]]
+        qp = q[1:, :, :]
+        qps = qp[toplefts[0]] + qp[toprights[0]] + qp[botlefts[0]] + qp[botrights[0]]
+        cross += odx * odz * coeff * (qps - qms)
+
+        # (i=1, j=0): D_y(C_10 F_x)
+        q = c10 * x_flx
+        qm = q[:, :-1, :]
+        qms = qm[toplefts[1]] + qm[toprights[1]] + qm[botlefts[1]] + qm[botrights[1]]
+        qp = q[:, 1:, :]
+        qps = qp[toplefts[1]] + qp[toprights[1]] + qp[botlefts[1]] + qp[botrights[1]]
+        cross += ody * odx * coeff * (qps - qms)
+
+        # (i=1, j=2): D_y(C_12 F_z)
+        q = c12 * z_flx
+        qm = q[:, :-1, :]
+        qms = qm[toplefts[1]] + qm[toprights[1]] + qm[botlefts[1]] + qm[botrights[1]]
+        qp = q[:, 1:, :]
+        qps = qp[toplefts[1]] + qp[toprights[1]] + qp[botlefts[1]] + qp[botrights[1]]
+        cross += ody * odz * coeff * (qps - qms)
+
+        # (i=2, j=0): D_z(C_20 F_x)
+        q = c20 * x_flx
+        qm = q[:, :, :-1]
+        qms = qm[toplefts[2]] + qm[toprights[2]] + qm[botlefts[2]] + qm[botrights[2]]
+        qp = q[:, :, 1:]
+        qps = qp[toplefts[2]] + qp[toprights[2]] + qp[botlefts[2]] + qp[botrights[2]]
+        cross += odz * odx * coeff * (qps - qms)
+
+        # (i=2, j=1): D_z(C_21 F_y)
+        q = c21 * y_flx
+        qm = q[:, :, :-1]
+        qms = qm[toplefts[2]] + qm[toprights[2]] + qm[botlefts[2]] + qm[botrights[2]]
+        qp = q[:, :, 1:]
+        qps = qp[toplefts[2]] + qp[toprights[2]] + qp[botlefts[2]] + qp[botrights[2]]
+        cross += odz * ody * coeff * (qps - qms)
+
+        lap[1:-1, 1:-1, 1:-1] += cross
+
     lap = lap * diag_inv
 
     return lap
diff --git a/src/pybella/utils/operators/laplacian/preconditioner.py b/src/pybella/utils/operators/laplacian/preconditioner.py
index 3a6045b6..f03dcf86 100644
--- a/src/pybella/utils/operators/laplacian/preconditioner.py
+++ b/src/pybella/utils/operators/laplacian/preconditioner.py
@@ -1,9 +1,19 @@
 from .. import convolution
 
 
-def prepare_diag(npf, node):
-    """Highly optimized version with minimal function calls."""
+def prepare_diag(npf, node, cii=None):
+    """Highly optimized version with minimal function calls.
+
+    cii: optional per-axis diagonal coefficient fields (the C_ii of the
+    full-tensor 3D operator); defaults to npf.wplus. With identity H^-1
+    the two are bit-identical.
+    """
     ndim = node.ndim
+    w0, w1, w2 = (
+        (npf.wplus[0], npf.wplus[1], npf.wplus[2] if ndim == 3 else None)
+        if cii is None
+        else cii
+    )
 
     coeff = 0.75 if ndim == 2 else 0.0625 if ndim == 3 else None
     if coeff is None:
@@ -17,36 +27,16 @@ def prepare_diag(npf, node):
     diag = npf.wcenter.copy()
 
     # Main diagonal terms
-    diag -= (
-        coeff
-        * inv_dx2
-        * convolution.apply_convolution_kernel(npf.wplus[0], diag_kernel)
-    )
-    diag -= (
-        coeff
-        * inv_dy2
-        * convolution.apply_convolution_kernel(npf.wplus[1], diag_kernel)
-    )
+    diag -= coeff * inv_dx2 * convolution.apply_convolution_kernel(w0, diag_kernel)
+    diag -= coeff * inv_dy2 * convolution.apply_convolution_kernel(w1, diag_kernel)
 
     if ndim == 2:
         # Cross terms
         inv_dxdy = 1.0 / (dx * dy)
-        diag -= (
-            coeff
-            * inv_dxdy
-            * convolution.apply_convolution_kernel(npf.wplus[0], diag_kernel)
-        )
-        diag -= (
-            coeff
-            * inv_dxdy
-            * convolution.apply_convolution_kernel(npf.wplus[1], diag_kernel)
-        )
+        diag -= coeff * inv_dxdy * convolution.apply_convolution_kernel(w0, diag_kernel)
+        diag -= coeff * inv_dxdy * convolution.apply_convolution_kernel(w1, diag_kernel)
     elif ndim == 3:
         inv_dz2 = 1.0 / (dz**2)
-        diag -= (
-            coeff
-            * inv_dz2
-            * convolution.apply_convolution_kernel(npf.wplus[2], diag_kernel)
-        )
+        diag -= coeff * inv_dz2 * convolution.apply_convolution_kernel(w2, diag_kernel)
 
     return 1.0 / diag
diff --git a/src/pybella/utils/user_data.py b/src/pybella/utils/user_data.py
index 66af313f..7bd50b18 100644
--- a/src/pybella/utils/user_data.py
+++ b/src/pybella/utils/user_data.py
@@ -13,9 +13,15 @@ def __init__(self):
         self.dependency_graph = {
             "u_ref": ["h_ref", "t_ref"],
             "Msq": ["u_ref", "R_gas", "T_ref"],
-            "gravity_strength": ["grav", "h_ref", "R_gas", "T_ref"],
-            "i_gravity": ["grav", "h_ref", "R_gas", "T_ref"],
-            "coriolis_strength": ["omega", "t_ref"],
+            "gravity_strength": [
+                "grav",
+                "h_ref",
+                "R_gas",
+                "T_ref",
+                "gravity_direction",
+            ],
+            "i_gravity": ["grav", "h_ref", "R_gas", "T_ref", "gravity_direction"],
+            "coriolis_strength": ["omega", "t_ref", "gravity_direction"],
             "cp_gas": ["gamm", "R_gas"],
             "N_ref": ["grav", "cp_gas", "T_ref"],
             "Nsq_ref": ["grav", "cp_gas", "T_ref"],
@@ -123,6 +129,10 @@ def _init_defaults(self):
         self.blending_type = "half"  # half, full
         self.blending_weight = 0.0 / 16
 
+        # Vertical/gravity axis (array axis index; see utils/axes.py).
+        # 2D runs are x-y by convention and require 1.
+        self.gravity_direction = 1
+
         # Boundary conditions
         self.bdry_type = np.empty((3), dtype=object)
         self.bdry_type[0] = opts.BdryType.PERIODIC
@@ -149,6 +159,10 @@ def _init_defaults(self):
         self.w_wind_speed = 0.0
         self.stratification = self.stratification_function
 
+        # Explicit diffusion (off by default; see flow_solver/numerics/diffusion.py)
+        self.diffusion = False
+        self.diffusion_coeff = 0.0
+
         # Numerics
         self.do_advection = True
         self.limiter_type_scalars = opts.LimiterType.NONE
@@ -207,27 +221,29 @@ def compute_Msq(self):
             self.Msq = self.u_ref * self.u_ref / (self.R_gas * self.T_ref)
 
     def compute_gravity_strength(self):
-        """Compute gravity-related parameters."""
+        """Compute gravity-related parameters along the configured vertical axis."""
+        from . import axes
+
+        v = axes.vertical_axis(self)
         self.i_gravity = np.zeros(3)
         self.gravity_strength = np.zeros(3)
 
-        self.gravity_strength[1] = self.grav * self.h_ref / (self.R_gas * self.T_ref)
-
-        for i in range(3):
-            if (self.gravity_strength[i] > 0.0) or (i == 1):
-                self.i_gravity[i] = 1
-                self.gravity_direction = i
+        self.gravity_strength[v] = self.grav * self.h_ref / (self.R_gas * self.T_ref)
+        self.i_gravity[v] = 1
 
     # Alias for backward compatibility
     compute_i_gravity = compute_gravity_strength
 
     def compute_coriolis_strength(self):
-        """Compute Coriolis parameters."""
+        """Compute Coriolis parameters on the two horizontal axes."""
+        from . import axes
+
+        h1, h2 = axes.horizontal_axes(axes.vertical_axis(self))
         self.i_coriolis = np.zeros(3)
         self.coriolis_strength = np.zeros(3)
 
-        self.coriolis_strength[0] = self.omega * self.t_ref
-        self.coriolis_strength[2] = self.omega * self.t_ref
+        self.coriolis_strength[h1] = self.omega * self.t_ref
+        self.coriolis_strength[h2] = self.omega * self.t_ref
 
     def compute_cp_gas(self):
         """Compute specific heat at constant pressure."""
diff --git a/test_scripts/compare_h5_runs.py b/test_scripts/compare_h5_runs.py
new file mode 100644
index 00000000..ccada4c3
--- /dev/null
+++ b/test_scripts/compare_h5_runs.py
@@ -0,0 +1,86 @@
+"""Bit-for-bit comparison of two pyBELLA output H5 files (Phase gate tool).
+
+CompareSol's per-field tolerances (1e-5) are necessary but not sufficient for
+the axial-agnosticity refactor's pure phases, which must be *bit-identical*
+for the default vertical axis. This walks every dataset common to two run
+files and reports max |a - b|; exit code 1 if any dataset differs (or is
+missing from one side).
+
+Usage:
+    python test_scripts/compare_h5_runs.py baseline.h5 candidate.h5 [--tol 0]
+
+Typical workflow: copy ./outputs/test_<case>/<case>_<N>_<M>.h5 for all cases
+into a scratch baseline dir before starting a phase, rerun the suite after,
+then compare pairwise.
+"""
+
+import argparse
+import sys
+
+import h5py
+import numpy as np
+
+
+def collect(h5):
+    out = {}
+
+    def visit(name, obj):
+        if isinstance(obj, h5py.Dataset):
+            out[name] = obj[...]
+
+    h5.visititems(visit)
+    return out
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("baseline")
+    ap.add_argument("candidate")
+    ap.add_argument("--tol", type=float, default=0.0)
+    ap.add_argument("--quiet", action="store_true")
+    args = ap.parse_args()
+
+    with h5py.File(args.baseline, "r") as fa, h5py.File(args.candidate, "r") as fb:
+        a, b = collect(fa), collect(fb)
+
+    only_a = sorted(set(a) - set(b))
+    only_b = sorted(set(b) - set(a))
+    failures = []
+    worst = []
+
+    for name in sorted(set(a) & set(b)):
+        if a[name].shape != b[name].shape:
+            failures.append(f"{name}: shape {a[name].shape} vs {b[name].shape}")
+            continue
+        if a[name].dtype.kind not in "fiu":
+            continue
+        diff = np.max(np.abs(np.asarray(a[name], float) - np.asarray(b[name], float)))
+        worst.append((diff, name))
+        if diff > args.tol:
+            failures.append(f"{name}: max|diff| = {diff:.3e}")
+
+    if only_a:
+        failures.append(
+            f"only in baseline: {only_a[:5]}{'...' if len(only_a) > 5 else ''}"
+        )
+    if only_b:
+        failures.append(
+            f"only in candidate: {only_b[:5]}{'...' if len(only_b) > 5 else ''}"
+        )
+
+    if not args.quiet:
+        worst.sort(reverse=True)
+        print(f"{len(a)} / {len(b)} datasets; 5 largest diffs:")
+        for d, n in worst[:5]:
+            print(f"  {d:.3e}  {n}")
+
+    if failures:
+        print(f"FAIL ({len(failures)} issues, tol={args.tol:g}):")
+        for f in failures[:20]:
+            print(f"  {f}")
+        sys.exit(1)
+    print(f"OK: bit-identical at tol={args.tol:g}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test_scripts/test_3d_coriolis_oracle.py b/test_scripts/test_3d_coriolis_oracle.py
new file mode 100644
index 00000000..bd21859d
--- /dev/null
+++ b/test_scripts/test_3d_coriolis_oracle.py
@@ -0,0 +1,58 @@
+"""Oracle B: the full-H^-1 3D elliptic path vs the 2D path, WITH Coriolis.
+
+A y-uniform 3D x-z problem with rotation Omega_3 = (Ox, Oy, Oz) is
+physically identical to the 2D x-y problem obtained by relabeling z -> y.
+That relabeling is an odd axis permutation, so the rotation PSEUDOvector
+maps with a sign flip:
+
+    Omega_2D = -(Ox, Oz, Oy),    (u, v, w)_2D = (u, w, v)_3D.
+
+Since the 2D elliptic path embeds the full H^-1 coefficients in its stencil
+(trusted, regression-locked), this validates the 3D full-tensor operator's
+cross terms — signs, axis pairing, and layout — against it. Requires g = 0
+and nonhydrostasy = 1 (the hydrostatic mask and buoyancy term sit on
+different slots under the odd swap).
+
+Uses the travelling-vortex setup of test_3d_elliptic_oracle with
+Omega_3 = (100, 0, 100) (the 3d-coriolis regression case's rotation,
+omega * dt = 1: strong coupling).
+"""
+
+import numpy as np
+
+from pybella.flow_solver.numerics import implicit_euler
+
+from test_3d_elliptic_oracle import build_ud, build_state
+
+TOL = 1e-6
+OMEGA = 100.0
+
+
+def test_3d_full_coriolis_matches_2d():
+    ud3 = build_ud(iny=2, inz=65)
+    ud3.coriolis_strength = np.array([OMEGA, 0.0, OMEGA])
+    mem3 = build_state(ud3, "xz")
+    implicit_euler.do_implicit_part(mem3, ud3, dt=0.01)
+
+    ud2 = build_ud(iny=65, inz=1)
+    # odd permutation (z -> y): pseudovector sign flip
+    ud2.coriolis_strength = np.array([-OMEGA, -OMEGA, 0.0])
+    mem2 = build_state(ud2, "xy")
+    implicit_euler.do_implicit_part(mem2, ud2, dt=0.01)
+
+    jc, jn = 2, 2  # interior y cell / node slice of the 3D state
+    checks = {
+        "p2_nodes": (mem3.npf.p2_nodes[:, jn, :], mem2.npf.p2_nodes),
+        "rhou": (mem3.sol.rhou[:, jc, :], mem2.sol.rhou),
+        "rhow vs rhov (vertical)": (mem3.sol.rhow[:, jc, :], mem2.sol.rhov),
+        "rhov vs rhow (out-of-plane)": (mem3.sol.rhov[:, jc, :], mem2.sol.rhow),
+        "p2 y-uniformity": (mem3.npf.p2_nodes[:, 2, :], mem3.npf.p2_nodes[:, 3, :]),
+        "rhou y-uniformity": (mem3.sol.rhou[:, 2, :], mem3.sol.rhou[:, 3, :]),
+    }
+
+    failures = []
+    for name, (lhs, rhs) in checks.items():
+        diff = np.max(np.abs(lhs - rhs))
+        if diff >= TOL:
+            failures.append(f"{name}: max|diff| = {diff:.3e}")
+    assert not failures, "; ".join(failures)
diff --git a/test_scripts/test_3d_elliptic_oracle.py b/test_scripts/test_3d_elliptic_oracle.py
new file mode 100644
index 00000000..eddb3915
--- /dev/null
+++ b/test_scripts/test_3d_elliptic_oracle.py
@@ -0,0 +1,177 @@
+"""Oracle: the 3D elliptic path on a y-uniform quasi-2D problem must
+reproduce the 2D solver's solution.
+
+Builds twin states carrying the same vortex — 3D in the x-z plane on a
+64x1x64 grid, 2D in the x-y plane on a 64x64 grid — with zero gravity and
+zero Coriolis, runs implicit_euler.do_implicit_part (a pure projection,
+compressibility = 0) on both, and compares the elliptic solve output
+field-by-field. Pass criterion ~ solver tolerance (ud.tol = 1e-8), NOT 1e-2.
+
+On an nx == nz grid a transposed solve is silent in shape terms, so this
+test also asserts exact y-uniformity of the 3D result and that rhov stays
+zero.
+
+Mapping: 2D (x, y, u, v) <-> 3D (x, z, u, w).
+"""
+
+import numpy as np
+
+from pybella.utils import user_data
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.utils import fields, cache
+from pybella.flow_solver.physics import thermodynamics as gd_thermodynamics
+from pybella.flow_solver.physics import hydrostatics
+from pybella.flow_solver.numerics import implicit_euler
+from pybella.tests import test_travelling_vortex_3d_coriolis as tv3d
+
+# ~ solver tolerance (1e-8) with headroom for preconditioner differences
+# between the 2D and 3D paths
+TOL = 1e-6
+
+
+class obj:
+    pass
+
+
+def vortex_plane(a, b):
+    """rho, swirl_a, swirl_b, on a 2D (a, b) meshgrid (cell centres)."""
+    rho0, del_rho, R0, fac = 0.5, 0.5, 0.4, 1024.0
+    ac, bc = 0.5, 0.5
+    r = np.sqrt((a - ac) ** 2 + (b - bc) ** 2)
+    uth = (fac * (1.0 - r / R0) ** 6 * (r / R0) ** 6) * (r < R0)
+    with np.errstate(invalid="ignore", divide="ignore"):
+        ua = np.where(r > 0, uth * (-(b - bc) / r), 0.0)
+        ub = np.where(r > 0, uth * (+(a - ac) / r), 0.0)
+    rho = rho0 + del_rho * (1.0 - (r / R0) ** 2) ** 6 * (r < R0)
+    return rho, ua, ub
+
+
+def p2_plane(a, b, th):
+    """Balanced nodal pressure (centrifugal part only; f = 0)."""
+    R0, fac = 0.4, 1024.0
+    ac, bc = 0.5, 0.5
+    coe = np.array(
+        [
+            1.0 / 12.0,
+            -12.0 / 13.0,
+            9.0 / 2.0,
+            -184.0 / 15.0,
+            609.0 / 32.0,
+            -222.0 / 17.0,
+            -38.0 / 9.0,
+            54.0 / 19.0,
+            783.0 / 20.0,
+            -558.0 / 7.0,
+            1053.0 / 22.0,
+            1014.0 / 23.0,
+            -1473.0 / 16.0,
+            204.0 / 5.0,
+            510.0 / 13.0,
+            -1564.0 / 27.0,
+            153.0 / 8.0,
+            450.0 / 29.0,
+            -269.0 / 15.0,
+            174.0 / 31.0,
+            57.0 / 32.0,
+            -74.0 / 33.0,
+            15.0 / 17.0,
+            -6.0 / 35.0,
+            1.0 / 72.0,
+        ]
+    )
+    r = np.sqrt((a - ac) ** 2 + (b - bc) ** 2)
+    p2n = np.zeros_like(r)
+    for ip in range(25):
+        p2n += fac * (coe[ip] * ((r / R0) ** (12 + ip) - 1.0))
+    p2n *= r / R0 < 1.0
+    return th.Gamma * fac * p2n
+
+
+def build_ud(iny, inz):
+    d = vars(tv3d.UserData())
+    d["iny"] = iny
+    d["inz"] = inz
+    ud = user_data.UserDataInit(**d)
+    ud.coriolis_strength = np.array([0.0, 0.0, 0.0])
+    ud.gravity_strength = np.zeros(3)
+    ud.nonhydrostasy = 1.0
+    ud.is_compressible = 0
+    ud.compressibility = 0.0
+    return ud
+
+
+def build_state(ud, plane):
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = gd_thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    hydrostatics.integrated_state(npf, elem, node, th, ud)
+
+    if plane == "xz":
+        a = elem.x.reshape(-1, 1, 1)
+        b = elem.z.reshape(1, 1, -1)
+        rho, ua, ub = vortex_plane(a + 0 * b, b + 0 * a)
+        sol.rho[...] = rho
+        sol.rhou[...] = rho * ua
+        sol.rhov[...] = 0.0
+        sol.rhow[...] = rho * ub
+        an = node.x[2:-2].reshape(-1, 1, 1)
+        bn = node.z[2:-2].reshape(1, 1, -1)
+        p2 = p2_plane(an + 0 * bn, bn + 0 * an, th)
+        npf.p2_nodes[2:-2, 2:-2, 2:-2] = p2
+    else:  # xy
+        a = elem.x.reshape(-1, 1)
+        b = elem.y.reshape(1, -1)
+        rho, ua, ub = vortex_plane(a + 0 * b, b + 0 * a)
+        sol.rho[...] = rho
+        sol.rhou[...] = rho * ua
+        sol.rhov[...] = rho * ub
+        sol.rhow[...] = 0.0
+        an = node.x[2:-2].reshape(-1, 1)
+        bn = node.y[2:-2].reshape(1, -1)
+        p2 = p2_plane(an + 0 * bn, bn + 0 * an, th)
+        npf.p2_nodes[2:-2, 2:-2] = p2
+
+    sol.rhoY[...] = 1.0
+    sol.rhoX[...] = 0.0
+
+    mem = obj()
+    mem.sol = sol
+    mem.npf = npf
+    mem.elem = elem
+    mem.node = node
+    mem.th = th
+    mem.cache = cache.FlowSolverCache()
+    return mem
+
+
+def test_3d_elliptic_path_matches_2d():
+    dt = 0.01
+
+    ud3 = build_ud(iny=2, inz=65)
+    mem3 = build_state(ud3, "xz")
+    implicit_euler.do_implicit_part(mem3, ud3, dt)
+
+    ud2 = build_ud(iny=65, inz=1)
+    mem2 = build_state(ud2, "xy")
+    implicit_euler.do_implicit_part(mem2, ud2, dt)
+
+    jc, jn = 2, 2  # interior y cell / node slice of the 3D state
+    checks = {
+        "p2_nodes (3D xz-slice vs 2D)": (
+            mem3.npf.p2_nodes[:, jn, :],
+            mem2.npf.p2_nodes,
+        ),
+        "rhou (3D xz-slice vs 2D)": (mem3.sol.rhou[:, jc, :], mem2.sol.rhou),
+        "rhow vs rhov (3D vs 2D)": (mem3.sol.rhow[:, jc, :], mem2.sol.rhov),
+        "p2_nodes y-uniformity (3D)": (
+            mem3.npf.p2_nodes[:, 2, :],
+            mem3.npf.p2_nodes[:, 3, :],
+        ),
+        "rhou y-uniformity (3D)": (mem3.sol.rhou[:, 2, :], mem3.sol.rhou[:, 3, :]),
+        "rhov stays zero (3D)": (mem3.sol.rhov, np.zeros_like(mem3.sol.rhov)),
+    }
+
+    for name, (lhs, rhs) in checks.items():
+        diff = np.max(np.abs(lhs - rhs))
+        assert diff < TOL, f"{name}: max|diff| = {diff:.3e} >= {TOL:.0e}"
diff --git a/test_scripts/test_agnesi_2d_equivalence.py b/test_scripts/test_agnesi_2d_equivalence.py
new file mode 100644
index 00000000..76177426
--- /dev/null
+++ b/test_scripts/test_agnesi_2d_equivalence.py
@@ -0,0 +1,114 @@
+"""Native-2D vs quasi-2D-3D Agnesi equivalence — the lap2D-terrain payoff proof.
+
+Two gates:
+
+1. **Path comparison** (15 golden-master steps, 128x64): native 2D (lap2D
+   with terrain-folded cross terms) vs the proven quasi-2D 3D path (lap3D
+   full tensor). The two laplacian families carry *historically different
+   wall discretizations* — measured on a FLAT wall-bounded impulse, the
+   pre-existing 2D-vs-3D gap is already ~13% rel-L2 in w / ~19% in p2
+   after 5 steps (2026-06-10, solver-tolerance independent). The gates
+   here sit just above that honest cross-discretization floor: they catch
+   sign/axis/J-factor errors (which blow up by orders of magnitude), not
+   stencil-convention differences the flat solver already had.
+
+2. **Absolute physics** (the strong gate): the native-2D run passes the
+   same Smith (1980) analytic-oracle thresholds as the 3D path —
+   wrongness, not just change. Same reduced config as
+   ``test_agnesi_analytic`` (96x48, t U / a = 12), roughly halved runtime
+   in 2D. Calibration 2026-06-10 (native 2D): w 0.40, u' 0.43,
+   drag_ratio 0.98, flux_constancy 0.04 — within a percent of the 3D
+   path's values, which is the real equivalence statement.
+"""
+
+import numpy as np
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import time_update
+from pybella.flow_solver.physics import thermodynamics
+from pybella.flow_solver.utils import cache, fields
+from pybella.flow_solver.utils.boundary import cell_boundary as bdry_c
+from pybella.tests import agnesi_smith_analytic as smith
+from pybella.tests import test_agnesi_hydrostatic as case
+from pybella.utils import user_data
+from pybella.utils.data_structures import ModelState
+
+
+class _StubWriter:
+    def write(self, *args, **kwargs):
+        pass
+
+    def populate(self, *args, **kwargs):
+        pass
+
+    def write_all(self, *args, **kwargs):
+        pass
+
+
+def _run(inz, steps=None, grid=None):
+    ud = user_data.UserDataInit(**vars(case.UserData()))
+    ud.coriolis_strength = np.array(ud.coriolis_strength)
+    ud.inz = inz
+    ud.tout = [1e6]
+    ud.diag = False
+    if steps is not None:
+        ud.stepmax = steps
+    if grid is not None:
+        ud.inx, ud.iny, ud.inbcy = grid
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = case.sol_init(sol, npf, elem, node, th, ud)
+    mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    return time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter()), ud
+
+
+def _slab(arr, ndim):
+    if ndim == 2:
+        return arr[2:-2, 2:-2]
+    return arr[2:-2, 2:-2, 0]
+
+
+def test_native_2d_matches_quasi_2d_3d():
+    mem3, _ = _run(inz=2)
+    mem2, _ = _run(inz=1)
+
+    # measured 2026-06-10: rho 4.9e-6, rhou 9.4e-5, rhov 8.8e-2 (wave
+    # perturbation on a near-zero field), rhoY 4.9e-6, rhoX 1.1e-3
+    for attr, tol in (
+        ("rho", 2e-5),
+        ("rhou", 4e-4),
+        ("rhov", 0.2),
+        ("rhoY", 2e-5),
+        ("rhoX", 5e-3),
+    ):
+        a = _slab(getattr(mem3.sol, attr), mem3.elem.ndim)
+        b = _slab(getattr(mem2.sol, attr), mem2.elem.ndim)
+        scale = max(np.linalg.norm(a), 1e-30)
+        err = np.linalg.norm(a - b) / scale
+        assert err <= tol, f"{attr}: 2D vs 3D rel-L2 {err:.3e} > {tol}"
+
+    # wave field: measured 0.11 (the wall-convention floor); an axis/sign/J
+    # defect lands at O(1)
+    w3 = _slab(mem3.sol.rhov / mem3.sol.rho, mem3.elem.ndim)
+    w2 = _slab(mem2.sol.rhov / mem2.sol.rho, mem2.elem.ndim)
+    err_w = np.linalg.norm(w3 - w2) / np.linalg.norm(w3)
+    assert err_w <= 0.25, f"w wave field: 2D vs 3D rel-L2 {err_w:.3e}"
+
+
+def test_native_2d_passes_smith_oracle():
+    """Native 2D vs the Smith (1980) analytic solution — same gates as the
+    3D oracle in test_agnesi_analytic.py."""
+    mem, ud = _run(inz=1, steps=240, grid=(96 + 1, 48 + 1, 24))
+    metrics, _ = smith.compare(mem, ud, z_lo_SI=1000.0, z_hi_SI=4500.0)
+
+    assert metrics["w"] <= 0.55, f"w rel-L2 vs Smith: {metrics['w']:.3f}"
+    assert metrics["u"] <= 0.60, f"u' rel-L2 vs Smith: {metrics['u']:.3f}"
+    assert (
+        0.85 <= metrics["drag_ratio"] <= 1.15
+    ), f"momentum flux / analytic drag: {metrics['drag_ratio']:.3f}"
+    assert (
+        metrics["flux_constancy"] <= 0.10
+    ), f"momentum-flux height variation: {metrics['flux_constancy']:.3f}"
diff --git a/test_scripts/test_agnesi_analytic.py b/test_scripts/test_agnesi_analytic.py
new file mode 100644
index 00000000..379fbda2
--- /dev/null
+++ b/test_scripts/test_agnesi_analytic.py
@@ -0,0 +1,82 @@
+"""Agnesi mountain-wave analytic oracle: simulation vs Smith (1980).
+
+Runs the ``test_agnesi_hydrostatic`` configuration in-process to a
+quasi-steady state (t U / a ~ 10) and compares the wave field against the
+steady linear hydrostatic solution and the analytic wave drag — catching
+*wrongness* of the terrain-following dynamics, not just *change*.
+
+Gates (rel-L2 in the window 1-9 km, below the sponge):
+- vertical velocity w vs Smith,
+- horizontal perturbation u' vs Smith,
+- vertically integrated momentum flux vs the analytic drag,
+- flux constancy with height (linear steady state transports momentum
+  uniformly to the breaking/sponge level).
+"""
+
+import numpy as np
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import time_update
+from pybella.flow_solver.physics import thermodynamics
+from pybella.flow_solver.utils import cache, fields
+from pybella.flow_solver.utils.boundary import cell_boundary as bdry_c
+from pybella.tests import agnesi_smith_analytic as smith
+from pybella.tests import test_agnesi_hydrostatic as case
+from pybella.utils import user_data
+from pybella.utils.data_structures import ModelState
+
+
+class _StubWriter:
+    def write(self, *args, **kwargs):
+        pass
+
+    def populate(self, *args, **kwargs):
+        pass
+
+    def write_all(self, *args, **kwargs):
+        pass
+
+
+def run_to_steady_state(steps=240):
+    """Reduced oracle config: 96x48, t = steps * 50 s (240 -> t U / a = 12).
+
+    The wave field below ~5 km is quasi-steady by then (vertical group
+    speed ~ U^2 / (N a) = 1 m/s); the comparison window stays below that.
+    Calibration (2026-06-10, this config): w 0.40, u' 0.43, drag_ratio
+    0.98, flux_constancy 0.04. The full-resolution 128x64 run at
+    t U / a = 20 gives w 0.39 / u' 0.31 / drag 1.04 — resolution is not
+    the limiter, residual spin-up transients are.
+    """
+    ud = user_data.UserDataInit(**vars(case.UserData()))
+    ud.coriolis_strength = np.array(ud.coriolis_strength)
+    ud.inx = 96 + 1  # dx ~ 2.08 km (a / dx = 4.8)
+    ud.iny = 48 + 1  # dy = 500 m
+    ud.inbcy = 24  # sponge above 12 km
+    ud.stepmax = steps
+    ud.tout = [1e6]
+    ud.diag = False
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = case.sol_init(sol, npf, elem, node, th, ud)
+    mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    mem = time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+    return mem, ud
+
+
+def test_agnesi_vs_smith():
+    mem, ud = run_to_steady_state()
+    metrics, _ = smith.compare(mem, ud, z_lo_SI=1000.0, z_hi_SI=4500.0)
+
+    # gates ~35-40% above the calibrated values (see run_to_steady_state);
+    # any metric-term sign/factor/orientation bug blows these completely
+    assert metrics["w"] <= 0.55, f"w rel-L2 vs Smith: {metrics['w']:.3f}"
+    assert metrics["u"] <= 0.60, f"u' rel-L2 vs Smith: {metrics['u']:.3f}"
+    assert (
+        0.85 <= metrics["drag_ratio"] <= 1.15
+    ), f"momentum flux / analytic drag: {metrics['drag_ratio']:.3f}"
+    assert (
+        metrics["flux_constancy"] <= 0.10
+    ), f"momentum-flux variation with height: {metrics['flux_constancy']:.3f}"
diff --git a/test_scripts/test_agnesi_smoke.py b/test_scripts/test_agnesi_smoke.py
new file mode 100644
index 00000000..e890eec5
--- /dev/null
+++ b/test_scripts/test_agnesi_smoke.py
@@ -0,0 +1,22 @@
+"""Plumbing smoke test: quasi-2D mountain-wave configuration shape.
+
+Runs the ``smoke_agnesi`` case (x horizontal, y vertical, z degenerate
+periodic) via the real ``pybella -ic`` entry point for a few steps —
+pushing a vertical-slice setup through the full-tensor 3D elliptic path
+(``lap3D``) that terrain metric terms attach to. Flat in Phase 0 of the
+terrain work; the Agnesi hill switches on once the metric-aware operators
+land. No golden master: the Agnesi regression case is separate.
+"""
+
+import subprocess
+
+
+def test_agnesi_smoke_production_path():
+    result = subprocess.run(
+        ["pybella", "-ic", "smoke_agnesi", "-N", "1"], capture_output=True, text=True
+    )
+    assert result.returncode == 0, (
+        f"Command failed with return code {result.returncode}\n"
+        f"STDERR:\n{result.stderr.strip()}\n"
+        f"STDOUT:\n{result.stdout.strip()}"
+    )
diff --git a/test_scripts/test_axes.py b/test_scripts/test_axes.py
new file mode 100644
index 00000000..ca214f7d
--- /dev/null
+++ b/test_scripts/test_axes.py
@@ -0,0 +1,106 @@
+"""Unit tests for the axis-geometry module (axial-agnosticity Phase 0)."""
+
+import numpy as np
+import pytest
+
+from pybella.utils import axes
+
+
+class _UD:
+    def __init__(self, v=None):
+        if v is not None:
+            self.gravity_direction = v
+
+
+def _perm_sign(p):
+    sign = 1
+    p = list(p)
+    for i in range(len(p)):
+        for j in range(i + 1, len(p)):
+            if p[i] > p[j]:
+                sign = -sign
+    return sign
+
+
+@pytest.mark.parametrize("v", [0, 1, 2])
+def test_role_perm_is_cyclic_and_invertible(v):
+    perm = axes.role_perm(v)
+    assert sorted(perm) == [0, 1, 2]
+    assert perm[1] == v  # vertical sits in the middle role slot
+    assert _perm_sign(perm) == +1  # even (cyclic) only — pseudovector safety
+    inv = axes.role_of_axis(v)
+    for role, axis in enumerate(perm):
+        assert inv[axis] == role
+
+
+def test_v1_is_identity():
+    assert axes.role_perm(1) == (0, 1, 2)
+    assert axes.role_of_axis(1) == (0, 1, 2)
+    assert axes.horizontal_axes(1) == (0, 2)
+    assert axes.role_attrs(axes.MOMENTA, 1) == ("rhou", "rhov", "rhow")
+    assert axes.vertical_momentum(_UD()) == "rhov"  # default v = 1
+    assert axes.vertical_axis(_UD()) == 1
+
+
+def test_role_attrs_rotates():
+    assert axes.role_attrs(axes.MOMENTA, 2) == ("rhov", "rhow", "rhou")
+    assert axes.role_attrs(axes.MOMENTA, 0) == ("rhow", "rhou", "rhov")
+
+
+def test_vertical_axis_validation():
+    with pytest.raises(ValueError):
+        axes.vertical_axis(_UD(3))
+    with pytest.raises(ValueError):
+        axes.validate(_UD(2), ndim=2)
+    assert axes.validate(_UD(1), ndim=2) == 1
+    assert axes.validate(_UD(0), ndim=3) == 0
+
+
+def test_wall_slabs_match_legacy_pattern():
+    lo, hi = axes.wall_slabs(3, 1)
+    a = np.arange(4 * 6 * 5).reshape(4, 6, 5)
+    assert np.array_equal(a[lo], a[:, :2, :])
+    assert np.array_equal(a[hi], a[:, -2:, :])
+    lo0, hi0 = axes.wall_slabs(2, 0)
+    b = np.arange(20).reshape(4, 5)
+    assert np.array_equal(b[lo0], b[:2, :])
+    assert np.array_equal(b[hi0], b[-2:, :])
+
+
+@pytest.mark.parametrize("ndim", [2, 3])
+def test_expand_profile_reproduces_legacy_for_v1(ndim):
+    rng = np.random.default_rng(0)
+    counts = (4, 7, 5)[:ndim]
+    prof = rng.standard_normal(counts[1])
+
+    # legacy construction (fields.py get_dSdy / get_S0c)
+    legacy = prof
+    for dim in range(0, ndim, 2):
+        legacy = np.expand_dims(legacy, dim)
+        legacy = np.repeat(legacy, counts[dim], axis=dim)
+
+    new = axes.expand_profile(prof, ndim, 1, counts)
+    assert new.shape == legacy.shape
+    assert np.array_equal(new, legacy)  # bit-identical
+
+
+def test_expand_profile_other_axes():
+    prof = np.arange(5.0)
+    out = axes.expand_profile(prof, 3, 2, (3, 4, 5))
+    assert out.shape == (3, 4, 5)
+    assert np.array_equal(out[1, 2, :], prof)
+    out0 = axes.expand_profile(prof, 3, 0, (5, 3, 4))
+    assert out0.shape == (5, 3, 4)
+    assert np.array_equal(out0[:, 1, 2], prof)
+
+
+def test_permute_axes_roundtrip():
+    rng = np.random.default_rng(1)
+    a = rng.standard_normal((3, 4, 5))
+    sigma = (1, 2, 0)  # cyclic: ref axis i -> twin axis sigma[i]
+    t = axes.permute_axes(a, sigma)
+    assert t.shape == (5, 3, 4)
+    inv = tuple(np.argsort(sigma))
+    assert np.array_equal(axes.permute_axes(t, inv), a)
+    # spot value check: a[i,j,k] should equal t[k,i,j] for sigma=(1,2,0)
+    assert a[2, 1, 3] == t[3, 2, 1]
diff --git a/test_scripts/test_flow_solver.py b/test_scripts/test_flow_solver.py
index 0f8a4d41..4141bcb7 100644
--- a/test_scripts/test_flow_solver.py
+++ b/test_scripts/test_flow_solver.py
@@ -6,9 +6,15 @@
     "ic",
     [
         "test_travelling_vortex",
+        "test_travelling_vortex_3d_coriolis",
         "test_internal_long_wave",
+        "test_igw_baldauf_brdar",
         "test_lamb_wave",
         "test_unstable_lamb",
+        "test_swe_vortex",
+        "test_straka",
+        "test_agnesi_hydrostatic",
+        "test_schaer_ridge",
     ],
 )
 def test_single_run(ic):
diff --git a/test_scripts/test_igw_analytic.py b/test_scripts/test_igw_analytic.py
new file mode 100644
index 00000000..7398285f
--- /dev/null
+++ b/test_scripts/test_igw_analytic.py
@@ -0,0 +1,113 @@
+"""Physics oracle: the igw_baldauf_brdar case vs the linear analytic reference.
+
+Runs the Baldauf & Brdar internal-gravity-wave case in-process (regression
+configuration: dt = 500 s, dx ~ 20 km, t_end = 15500 s, f-plane Coriolis) and
+compares the final x-mean-free wave fields against the numerically-exact
+linear reference (``pybella.tests.baldauf_brdar_analytic``). Unlike the
+golden-master comparisons (which catch *change*), this catches *wrongness*:
+a flipped Coriolis or buoyancy sign, a broken dispersion relation, or wrong
+wave amplitudes blow these bounds by an order of magnitude (a pure sign
+error alone gives rel L2 ~ 1.4-2.0).
+
+Measured rel-L2 (2026-06-10, regression config, AFTER the 2D out-of-plane
+Coriolis fixes in BOTH explicit_euler.do_forward_step and the implicit
+correction's w-row): u 0.285, vo 0.045, w 0.556, p 0.670, rho 0.346.
+Gates are >=1.4x above. The residual was shown
+to decompose into (validated by refinement studies, see
+dev_notes/regression_harness.md):
+
+- temporal:  O((omega dt)^2) phase error — at dt = 125 s:
+  u 0.133, vo 0.028, w 0.272, rho 0.221;
+- spatial:   under-resolved w/rho structure at dx = 20 km — at dx ~ 10 km
+  (f = 0, dt = 125 s): u 0.080, w 0.166, rho 0.197;
+- p' is split between rhoY and p2 internally at these amplitudes, so p is
+  only loosely gated.
+
+History: this oracle originally measured vo pinned at 0.44 rel-L2
+independent of dt with a sim/ref amplitude ratio ~0.6 — the fingerprint of
+the (since fixed) 2D defect where the rhow row of the explicit forward step
+sat behind ``if ndim == 3`` and the out-of-plane momentum received only the
+implicit half of the Coriolis rotation.
+
+Comparator internals validated separately: energy drift ~1e-13 (neutral
+discretisation, exact-in-time eigenpropagation), z-refinement converged
+(refine 2 vs 4 identical to 3 digits), t = 0 round-trip ~3e-4.
+
+Also writes ref/sim/diff snapshot PNGs for u' and w' into the case's output
+directory so the wave physics can be verified by eye alongside the other
+regression images.
+"""
+
+import numpy as np
+
+from pybella.tests import baldauf_brdar_analytic as bb
+
+GATES = {
+    "u": 0.40,
+    "vo": 0.10,
+    "w": 0.80,
+    "p": 1.00,  # loose: pressure decomposition is scheme-internal
+    "rho": 0.50,
+}
+
+
+def _demean(q):
+    return q - q.mean(axis=0, keepdims=True)
+
+
+def test_igw_matches_linear_analytic():
+    ud, ic, end, zc, t_end = bb.run_sim()
+
+    par = bb.IGWParams(ud)
+    L = (ud.xmax - ud.xmin) * ud.h_ref
+    ref, diag = bb.evolve_linear(ic, L, zc, t_end, par)
+
+    # comparator-internal exactness
+    assert diag["energy_drift"] < 1e-9
+
+    failures = []
+    for key, gate in GATES.items():
+        s, r = _demean(end[key]), ref[key]
+        rel = np.linalg.norm(s - r) / np.linalg.norm(r)
+        if rel > gate:
+            failures.append(f"{key}: rel L2 {rel:.3f} > gate {gate}")
+
+    # amplitude sanity on the primary wave field
+    amp_ratio = np.abs(_demean(end["u"])).max() / np.abs(ref["u"]).max()
+    if not (0.7 < amp_ratio < 1.3):
+        failures.append(f"u amplitude ratio sim/ref {amp_ratio:.2f} outside (0.7, 1.3)")
+
+    _write_pngs(end, ref, zc, L)
+
+    assert not failures, "; ".join(failures)
+
+
+def _write_pngs(end, ref, zc, L):
+    import matplotlib
+
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+
+    outdir = "./outputs/test_igw_baldauf_brdar/"
+    nx = end["u"].shape[0]
+    x = (np.arange(nx) + 0.5) * (L / nx) / 1000.0 - L / 2000.0  # km
+    zk = zc / 1000.0
+
+    for key, label in (("u", "u' [m/s]"), ("w", "w' [m/s]")):
+        s, r = _demean(end[key]), ref[key]
+        fig, axs = plt.subplots(1, 3, figsize=(14, 3), sharey=True)
+        for ax, (arr, title) in zip(
+            axs, ((r, "linear analytic"), (s, "pyBELLA"), (s - r, "diff"))
+        ):
+            pc = ax.pcolormesh(x, zk, arr.T, shading="auto", cmap="RdBu_r")
+            ax.set_title(f"{label} — {title}")
+            ax.set_xlabel("x [km]")
+            fig.colorbar(pc, ax=ax)
+        axs[0].set_ylabel("z [km]")
+        fig.tight_layout()
+        fig.savefig(outdir + f"analytic_{key}.png", dpi=110)
+        plt.close(fig)
+
+
+if __name__ == "__main__":
+    test_igw_matches_linear_analytic()
diff --git a/test_scripts/test_permutation_oracle.py b/test_scripts/test_permutation_oracle.py
new file mode 100644
index 00000000..58f1217c
--- /dev/null
+++ b/test_scripts/test_permutation_oracle.py
@@ -0,0 +1,205 @@
+"""Permutation oracle: the endgame proof of axial agnosticity.
+
+The 2D internal-long-wave reference (x-y plane, gravity on axis 1,
+stratified, walls, full Coriolis) is embedded as a quasi-2D 3D state with
+its axes relabeled by a CYCLIC permutation sigma (ref axis i -> twin axis
+sigma[i]) and the vertical placed accordingly:
+
+- T2: sigma = (1, 2, 0), gravity_direction = 2  (z-vertical convention)
+- T0: sigma = (2, 0, 1), gravity_direction = 0  (x-vertical)
+
+Only cyclic (even) permutations are physical relabelings: the rotation
+vector is a pseudovector, so its components permute WITHOUT sign flips —
+and indeed `compute_coriolis_strength` produces exactly the sigma-mapped
+Omega from `omega` and `gravity_direction` alone, with no hand-tuning.
+
+Both twins are stepped K times through the full solver (advection +
+explicit + implicit/elliptic) and must reproduce the sigma-mapped 2D
+reference fields plus exact uniformity along the degenerate axis. This
+exercises, off the y-axis: gravity reads, buoyancy on MOMENTA[v],
+hydrostatic profiles along v, wall zeroing / gravity ghost cells /
+quasi-2D broadcasts on arbitrary axes, role-bound Coriolis (explicit +
+H^-1), the full-tensor 3D elliptic operator, and the advection sweeps.
+"""
+
+import numpy as np
+import pytest
+
+from pybella.utils import user_data, data_structures, axes
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import time_update as dis_time_update
+from pybella.flow_solver.utils import fields, cache
+from pybella.flow_solver.utils.boundary import cell_boundary as bdry_c
+from pybella.flow_solver.physics import thermodynamics as gd_thermodynamics
+from pybella.tests.test_internal_long_wave import UserData as ILW, sol_init
+
+K_STEPS = 8
+TOL = 1e-6
+
+PROFILE_ATTRS = ("p0", "p20", "rho0", "S0", "S10", "pi0", "rhoY0", "Y0")
+
+
+class _StubWriter:
+    def write(self, *a, **k):
+        pass
+
+    def populate(self, *a, **k):
+        pass
+
+
+def _build_ref():
+    ud = user_data.UserDataInit(**vars(ILW()))
+    ud.diag = False
+    ud.stepmax = K_STEPS
+
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = gd_thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = sol_init(sol, npf, elem, node, th, ud)
+
+    mem = data_structures.ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    return mem, ud
+
+
+def _embed(ref2d, sigma, dummy_axis, dummy_count):
+    """ref (a, b) array -> twin 3D array, broadcast along the dummy axis."""
+    arr = ref2d[:, :, None]  # ref axes (0, 1, dummy=2)
+    arr = axes.permute_axes(arr, sigma)
+    return np.repeat(arr, dummy_count, axis=dummy_axis)
+
+
+def _build_twin(ref_mem, ud_ref, sigma, v):
+    d = vars(ILW())
+    ud = user_data.UserDataInit(**d)
+    ud.diag = False
+    ud.stepmax = K_STEPS
+    ud.gravity_direction = v  # retriggers gravity/coriolis computation
+
+    # relabel grid extents and counts: ref axis i -> twin axis sigma[i];
+    # the former (collapsed) ref z-axis becomes the twin's degenerate axis
+    ins = [None] * 3
+    mins = [None] * 3
+    maxs = [None] * 3
+    bdry = [None] * 3
+    ref_ins = (ud_ref.inx, ud_ref.iny, 2)
+    ref_mins = (ud_ref.xmin, ud_ref.ymin, 0.0)
+    ref_maxs = (ud_ref.xmax, ud_ref.ymax, 1.0)
+    for i in range(3):
+        ins[sigma[i]] = ref_ins[i]
+        mins[sigma[i]] = ref_mins[i]
+        maxs[sigma[i]] = ref_maxs[i]
+        bdry[sigma[i]] = ud_ref.bdry_type[i]
+    ud.inx, ud.iny, ud.inz = ins
+    ud.xmin, ud.ymin, ud.zmin = mins
+    ud.xmax, ud.ymax, ud.zmax = maxs
+    for i in range(3):
+        ud.bdry_type[i] = bdry[i]
+
+    # the pseudovector check: with omega + gravity_direction alone, the
+    # computed Coriolis vector must equal the sigma-mapped reference one
+    expected = np.zeros(3)
+    for i in range(3):
+        expected[sigma[i]] = ud_ref.coriolis_strength[i]
+    assert np.array_equal(ud.coriolis_strength, expected), (
+        ud.coriolis_strength,
+        expected,
+    )
+
+    ud.nonhydrostasy = ud_ref.nonhydrostasy
+    ud.compressibility = ud_ref.compressibility
+
+    elem, node = dis_grid.grid_init(ud)
+    axes.validate(ud, elem.ndim)
+    sol = fields.CellSolField(elem.sc)
+    th = gd_thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+
+    dummy = sigma[2]
+    nc_dummy = elem.sc[dummy]
+    nn_dummy = node.sc[dummy]
+
+    # cell fields: embed sigma-mapped; momenta map componentwise by sigma
+    for fld in ("rho", "rhoY", "rhoX"):
+        getattr(sol, fld)[...] = _embed(
+            getattr(ref_mem.sol, fld), sigma, dummy, nc_dummy
+        )
+    for i in range(3):
+        twin_name = axes.MOMENTA[sigma[i]]
+        getattr(sol, twin_name)[...] = _embed(
+            getattr(ref_mem.sol, axes.MOMENTA[i]), sigma, dummy, nc_dummy
+        )
+
+    # node / cell pressure fields
+    npf.p2_nodes[...] = _embed(ref_mem.npf.p2_nodes, sigma, dummy, nn_dummy)
+    npf.p2_cells[...] = _embed(ref_mem.npf.p2_cells, sigma, dummy, nc_dummy)
+
+    # hydrostatic profiles: 1D along the vertical, lengths match by design
+    for attr in PROFILE_ATTRS:
+        getattr(npf.HydroState, attr)[...] = getattr(ref_mem.npf.HydroState, attr)
+        getattr(npf.HydroState_n, attr)[...] = getattr(ref_mem.npf.HydroState_n, attr)
+
+    mem = data_structures.ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    return mem, ud
+
+
+def _run(mem, ud):
+    return dis_time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+
+
+@pytest.mark.parametrize(
+    "sigma,v",
+    [((1, 2, 0), 2), ((2, 0, 1), 0)],
+    ids=["T2-z-vertical", "T0-x-vertical"],
+)
+def test_permutation_twin(sigma, v):
+    ref_mem, ud_ref = _build_ref()
+    twin_mem, ud_twin = _build_twin(ref_mem, ud_ref, sigma, v)
+
+    ref_mem = _run(ref_mem, ud_ref)
+    twin_mem = _run(twin_mem, ud_twin)
+    assert abs(ref_mem.time.t - twin_mem.time.t) == 0.0
+
+    dummy = sigma[2]
+    # interior slices; pick the first interior index on the dummy axis
+    i2_ref = (slice(2, -2), slice(2, -2))
+
+    def twin_slice(arr3, node=False):
+        idx = [slice(2, -2)] * 3
+        idx[dummy] = 2
+        sl = arr3[tuple(idx)]
+        # remaining two axes are (ref0, ref1) in sigma order; sort them back
+        a, b = [sigma[i] for i in (0, 1)]
+        return sl if a < b else sl.T
+
+    failures = []
+
+    def check(name, twin3, ref2):
+        diff = np.max(np.abs(twin_slice(twin3) - ref2[i2_ref]))
+        if not diff < TOL:
+            failures.append(f"{name}: max|diff| = {diff:.3e}")
+
+    check("rho", twin_mem.sol.rho, ref_mem.sol.rho)
+    check("rhoY", twin_mem.sol.rhoY, ref_mem.sol.rhoY)
+    check("rhoX", twin_mem.sol.rhoX, ref_mem.sol.rhoX)
+    for i in range(3):
+        check(
+            f"momentum {axes.MOMENTA[i]} -> {axes.MOMENTA[sigma[i]]}",
+            getattr(twin_mem.sol, axes.MOMENTA[sigma[i]]),
+            getattr(ref_mem.sol, axes.MOMENTA[i]),
+        )
+    check("p2_nodes", twin_mem.npf.p2_nodes, ref_mem.npf.p2_nodes)
+
+    # exact uniformity along the degenerate axis (interior layers)
+    lo = [slice(None)] * 3
+    hi = [slice(None)] * 3
+    lo[dummy], hi[dummy] = 2, 3
+    for fld in ("rho", "rhou", "rhov", "rhow"):
+        arr = getattr(twin_mem.sol, fld)
+        uni = np.max(np.abs(arr[tuple(lo)] - arr[tuple(hi)]))
+        if uni != 0.0:
+            failures.append(f"uniformity {fld}: {uni:.3e}")
+
+    assert not failures, "; ".join(failures)
diff --git a/test_scripts/test_schaer_analytic.py b/test_scripts/test_schaer_analytic.py
new file mode 100644
index 00000000..12b9f349
--- /dev/null
+++ b/test_scripts/test_schaer_analytic.py
@@ -0,0 +1,184 @@
+"""Schär ridge: linear-oracle gates + the Gal-Chen-vs-SLEVE discriminator.
+
+Runs the ``test_schaer_ridge`` configuration in-process on a reduced grid
+(+-32 km, 128x48, dx = 500 m) to quasi-steady state (480 x 12.5 s = 6000 s,
+t U / a = 12) twice — once with the case's SLEVE transform, once forced to
+Gal-Chen — and asserts:
+
+1. SLEVE vs the linear FFT oracle (``tests/schaer_linear_analytic.py``):
+   field, drag and flux-constancy gates. Calibration 2026-06-10: w 0.312,
+   u' 0.307, drag_ratio 1.104, flux_constancy 0.043. (At 800 steps the
+   periodic-domain mean-flow deceleration drags drag_ratio to ~1.2 — the
+   metrics are quoted at the 480-step quasi-steady window on purpose.)
+2. The discriminator: the small-scale spectral fraction of w at 4-9 km —
+   where the lambda = 4 km response is evanescent-dead in the true
+   solution — must collapse under SLEVE relative to Gal-Chen
+   (calibration: E_ss 0.004 vs 0.070, a factor ~17) and w must agree with
+   linear theory at least as well. Relative assertions: calibration drift
+   cannot silently invert the conclusion.
+
+The oracle itself is self-tested here against Smith's closed-form Agnesi
+drag and the evanescent-ridge zero-drag property (no simulation needed).
+"""
+
+import matplotlib
+
+matplotlib.use("Agg")
+
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import terrain
+from pybella.flow_solver.discretisation import time_update
+from pybella.flow_solver.physics import thermodynamics
+from pybella.flow_solver.utils import cache, fields
+from pybella.flow_solver.utils.boundary import cell_boundary as bdry_c
+from pybella.tests import agnesi_smith_analytic as smith
+from pybella.tests import schaer_linear_analytic as lin
+from pybella.tests import test_schaer_ridge as case
+from pybella.utils import user_data
+from pybella.utils.data_structures import ModelState
+
+
+class _StubWriter:
+    def write(self, *args, **kwargs):
+        pass
+
+    def populate(self, *args, **kwargs):
+        pass
+
+    def write_all(self, *args, **kwargs):
+        pass
+
+
+def run_to_steady_state(transform="sleve", steps=480):
+    """Reduced oracle config: +-32 km, 128x48 (dx 500 m), sponge above
+    ~11.4 km, t U / a = 12. ~10 s per run."""
+    ud = user_data.UserDataInit(**vars(case.UserData()))
+    ud.coriolis_strength = np.array(ud.coriolis_strength)
+    ud.xmin, ud.xmax = -32000.0 / ud.h_ref, 32000.0 / ud.h_ref
+    ud.inx = 128 + 1
+    ud.iny = 48 + 1
+    ud.inbcy = 20
+    ud.stepmax = steps
+    ud.tout = [1e6]
+    ud.diag = False
+    if transform == "galchen":
+        ud.vertical_transform = terrain.GalChenTransform()
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = case.sol_init(sol, npf, elem, node, th, ud)
+    mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    mem = time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+    return mem, ud
+
+
+# --- oracle self-tests (no simulation) ---------------------------------------
+
+
+def test_oracle_reproduces_smith_drag():
+    U, N, h0, a, rho0 = 10.0, 0.01, 100.0, 10000.0, 1.2
+    L, nx = 400e3, 2048
+    x = (np.arange(nx) + 0.5) * (L / nx) - L / 2
+    h = h0 * a**2 / (x**2 + a**2)
+    D = lin.analytic_drag(x, h, U, N, rho0)
+    D_smith = 0.25 * np.pi * rho0 * N * U * h0**2
+    assert abs(D / D_smith - 1.0) < 0.02
+
+
+def test_oracle_evanescent_ridge_has_no_drag():
+    U, N = 10.0, 0.01
+    L, nx = 400e3, 2048
+    x = (np.arange(nx) + 0.5) * (L / nx) - L / 2
+    h = 50.0 * np.cos(2 * np.pi * x / 4000.0) * np.exp(-((x / 20000.0) ** 2))
+    D = lin.analytic_drag(x, h, U, N, 1.2)
+    D_scale = 0.25 * np.pi * 1.2 * N * U * 100.0**2
+    assert abs(D) < 1e-6 * D_scale
+
+
+def test_oracle_fields_match_smith_for_agnesi():
+    """Analytic-vs-analytic: the FFT solution against Smith's closed form
+    (hydrostatic approximation) for the Agnesi profile — agreement to the
+    size of the nonhydrostatic correction at N a / U = 10."""
+    U, N, h0, a = 10.0, 0.01, 100.0, 10000.0
+    L, nx = 400e3, 1024
+    x = (np.arange(nx) + 0.5) * (L / nx) - L / 2
+    h = h0 * a**2 / (x**2 + a**2)
+    zlev = np.linspace(500.0, 9000.0, 30)
+    z = np.broadcast_to(zlev, (nx, zlev.size)).copy()
+    w_f, _ = lin.linear_fields(x, z, h, U, N)
+    _, w_s, _ = smith.smith_fields(x, zlev, {"U": U, "N": N, "h0": h0, "a": a})
+    assert np.linalg.norm(w_f - w_s) / np.linalg.norm(w_s) < 0.2
+
+
+# --- simulation gates + discriminator ----------------------------------------
+
+
+def _plot_comparison(metrics_by_tr, fields_by_tr, ud):
+    out_dir = os.path.join("outputs", "schaer_discriminator")
+    os.makedirs(out_dir, exist_ok=True)
+    fig, axs = plt.subplots(1, 3, figsize=(16, 4.2), sharey=True)
+    (x, z, w_ref), labels = fields_by_tr["ref"], ["sleve", "galchen"]
+    for ax, key in zip(axs[:2], labels):
+        xx, zz, w = fields_by_tr[key]
+        lim = np.abs(w_ref).max()
+        ax.pcolormesh(
+            xx / 1e3, zz / 1e3, w, cmap="RdBu_r", vmin=-lim, vmax=lim, shading="auto"
+        )
+        ax.set_title(f"w [{key}]  E_ss={metrics_by_tr[key]['E_ss']:.3f}")
+        ax.set_xlabel("x [km]")
+    lim = np.abs(w_ref).max()
+    axs[2].pcolormesh(
+        x / 1e3, z / 1e3, w_ref, cmap="RdBu_r", vmin=-lim, vmax=lim, shading="auto"
+    )
+    axs[2].set_title("w [linear oracle]")
+    axs[2].set_xlabel("x [km]")
+    axs[0].set_ylabel("z [km]")
+    fig.tight_layout()
+    fig.savefig(os.path.join(out_dir, "schaer_w_discriminator.png"), dpi=130)
+    plt.close(fig)
+
+
+def test_schaer_discriminator():
+    metrics = {}
+    plot_fields = {}
+    for tr in ("sleve", "galchen"):
+        mem, ud = run_to_steady_state(tr)
+        m, _ = lin.compare(mem, ud, z_lo_SI=1000.0, z_hi_SI=6000.0)
+        metrics[tr] = m
+        x, z, _, w_sim, _ = smith.sim_perturbations_SI(mem, ud)
+        xx = np.broadcast_to(x.reshape(-1, 1), z.shape)
+        plot_fields[tr] = (xx, z, w_sim)
+
+    h_SI = ud.orography(x / ud.h_ref, 0.0) * ud.h_ref
+    w_ref, _ = lin.linear_fields(x, z, h_SI, ud.U0, ud.NN)
+    plot_fields["ref"] = (xx, z, w_ref)
+    _plot_comparison(metrics, plot_fields, ud)
+
+    sl, gc = metrics["sleve"], metrics["galchen"]
+
+    # 1. SLEVE vs linear oracle (calibrated 0.312 / 0.307 / 1.104 / 0.043;
+    #    Nh0/U = 0.25 is weakly nonlinear — gates ~40% above calibration)
+    assert sl["w"] <= 0.45, f"w rel-L2 vs oracle: {sl['w']:.3f}"
+    assert sl["u"] <= 0.45, f"u' rel-L2 vs oracle: {sl['u']:.3f}"
+    assert 0.90 <= sl["drag_ratio"] <= 1.25, f"drag ratio: {sl['drag_ratio']:.3f}"
+    assert sl["flux_constancy"] <= 0.10, f"flux constancy: {sl['flux_constancy']:.3f}"
+
+    # 2. the discriminator (relative — calibration drift cannot invert it):
+    #    small-scale w aloft collapses under SLEVE (calibrated 0.004 vs
+    #    0.070), and the wave field agrees with linear theory at least as
+    #    well as under Gal-Chen
+    assert sl["E_ss"] <= 0.5 * gc["E_ss"], (
+        f"SLEVE small-scale fraction {sl['E_ss']:.4f} not below half of "
+        f"Gal-Chen's {gc['E_ss']:.4f}"
+    )
+    assert sl["E_ss"] <= 0.02, f"absolute small-scale fraction: {sl['E_ss']:.4f}"
+    assert (
+        sl["w"] <= gc["w"] + 0.02
+    ), f"SLEVE w ({sl['w']:.3f}) worse than Gal-Chen ({gc['w']:.3f})"
diff --git a/test_scripts/test_schaer_smoke.py b/test_scripts/test_schaer_smoke.py
new file mode 100644
index 00000000..635c8155
--- /dev/null
+++ b/test_scripts/test_schaer_smoke.py
@@ -0,0 +1,22 @@
+"""Plumbing smoke test: Schär ridge through the production entry point.
+
+Runs ``test_schaer_ridge`` (native 2D, SLEVE transform, RAYLEIGH top) via
+``pybella -ic`` — the same invocation the golden-master comparison uses —
+and gates on a clean return code. The physics gates live in
+``test_schaer_analytic.py``.
+"""
+
+import subprocess
+
+
+def test_schaer_smoke_production_path():
+    result = subprocess.run(
+        ["pybella", "-ic", "test_schaer_ridge", "-N", "1"],
+        capture_output=True,
+        text=True,
+    )
+    assert result.returncode == 0, (
+        f"Command failed with return code {result.returncode}\n"
+        f"STDERR:\n{result.stderr.strip()}\n"
+        f"STDOUT:\n{result.stdout.strip()}"
+    )
diff --git a/test_scripts/test_terrain_2d_oracle.py b/test_scripts/test_terrain_2d_oracle.py
new file mode 100644
index 00000000..e01b4fea
--- /dev/null
+++ b/test_scripts/test_terrain_2d_oracle.py
@@ -0,0 +1,173 @@
+"""Native-2D terrain elliptic oracle: discrete div∘correction == operator.
+
+2D sibling of ``test_terrain_elliptic_oracle.py``: the lap2D gather kernel
+with the terrain-folded coefficient arrays (C = wplus ⊙ J A^T H^-1 A, the
+cross terms riding the pre-existing cxy/cyx slots) must be exactly the
+composition of the 2D metric divergence with the metric-corrected momentum
+correction.
+
+Checks, on the smoke_agnesi configuration collapsed to native 2D (inz = 1):
+
+1. flat baseline: the composition identity holds for the plain 2D solver
+   (the lap2D family was never proven this way — this gates the phase),
+2. the same with out-of-plane Coriolis (exercises the legacy cxy/cyx path),
+3. forced-flat metric == plain lap2D application (bypass contract),
+4. terrain (witch-of-Agnesi hill): composition identity, with and without
+   Coriolis (terrain cross terms + H^-1 off-diagonals folded together).
+
+The 2D solve vector is the node.i2 interior box (transposed C-ravel); the
+operator output lives on the rhs[node.i1] box of the convolution-shaped
+node arrays. Comparisons exclude a 3-node boundary window as in 3D.
+"""
+
+import numpy as np
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import terrain
+from pybella.flow_solver.numerics import coriolis, implicit_euler
+from pybella.flow_solver.physics import thermodynamics
+from pybella.flow_solver.utils import cache, fields
+from pybella.flow_solver.utils.boundary import cell_boundary as bdry_c
+from pybella.flow_solver.utils.boundary import node_boundary as bdry_n
+from pybella.tests import smoke_agnesi
+from pybella.utils import user_data
+from pybella.utils.data_structures import ModelState
+from pybella.utils.operators import divergence
+from pybella.utils.operators.laplacian import preconditioner
+
+
+def _agnesi_hill(ud, h0_m=300.0, a_m=5000.0):
+    h0 = h0_m / ud.h_ref
+    a = a_m / ud.h_ref
+
+    def h(xi1, xi2):
+        return h0 * a**2 / (xi1**2 + a**2) + 0.0 * xi2
+
+    return h
+
+
+def _make_mem(orography=None, coriolis_z=0.0, sleve=False):
+    ud = user_data.UserDataInit(**vars(smoke_agnesi.UserData()))
+    ud.coriolis_strength = np.array(ud.coriolis_strength)
+    ud.coriolis_strength[2] = coriolis_z
+    ud.inz = 1  # collapse the degenerate axis: native 2D grid
+    ud.orography = orography(ud) if orography is not None else None
+    if sleve:
+        hill = ud.orography
+        ud.orography_smooth = lambda xi1, xi2: 0.5 * hill(xi1, xi2)
+        ud.vertical_transform = terrain.SLEVETransform(
+            s1=6000.0 / ud.h_ref, s2=1500.0 / ud.h_ref
+        )
+    elem, node = dis_grid.grid_init(ud)
+    assert elem.ndim == 2
+    sol = fields.CellSolField(elem.sc)
+    th = thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = smoke_agnesi.sol_init(sol, npf, elem, node, th, ud)
+    mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    return mem, ud
+
+
+def _smooth_p_box(node):
+    """Deterministic smooth pressure on the node.i2 solve box."""
+    x = node.x[node.igx : -node.igx].reshape(-1, 1)
+    y = node.y[node.igy : -node.igy].reshape(1, -1)
+    Lx = node.x[-1] - node.x[0]
+    return (
+        np.sin(2 * np.pi * x / Lx) * np.cos(np.pi * y)
+        + 0.1 * np.cos(2 * np.pi * x / Lx + 0.3) * y
+    )
+
+
+def _diag_inv_like_solver(mem, ud, dt):
+    """Replicate the diag_inv construction of _prepare_2d_system.
+
+    Geometric factors only — the legacy 2D preconditioner keeps H^-1 out
+    of the diagonal, and the terrain branch preserves that.
+    """
+    del ud, dt
+    if mem.elem.metric is not None:
+        met = mem.elem.metric
+        return preconditioner.prepare_diag(
+            mem.npf,
+            mem.node,
+            cii=(
+                mem.npf.wplus[0] * met.J,
+                mem.npf.wplus[1] * (1.0 + met.G1 * met.G1) * met.ooJ,
+                None,
+            ),
+        )
+    return preconditioner.prepare_diag(mem.npf, mem.node)
+
+
+def _operator_and_composition(orography=None, coriolis_z=0.0, sleve=False):
+    mem, ud = _make_mem(orography, coriolis_z, sleve)
+    node = mem.node
+    dt = float(ud.dtfixed)
+
+    implicit_euler.operator_coefficients_nodes(mem, ud, dt)
+
+    # operator side (the gather kernel ravels coefficient copies at build
+    # time, so the linop is immune to later mutation)
+    lap, _ = implicit_euler._prepare_linear_system(mem, ud, dt)
+    p_box = _smooth_p_box(node)
+    lhs = np.asarray(lap @ p_box.T.ravel()).reshape(node.iicy, node.iicx).T
+
+    # composition side: divergence of the pure pressure correction
+    diag_inv = _diag_inv_like_solver(mem, ud, dt)
+    p_full = np.zeros((node.icx, node.icy))
+    p_full[node.i2] = p_box
+    bdry_n.set_ghost_nodes(p_full, node, ud)
+
+    mem.sol.rhou[...] = 0.0
+    mem.sol.rhov[...] = 0.0
+    mem.sol.rhow[...] = 0.0
+    implicit_euler._correction_nodes(mem, ud, dt, p_full, 0)
+    rhs = np.zeros_like(mem.npf.rhs)
+    divergence.compute_at_nodes(rhs, mem.elem, mem.sol, ud)
+
+    comp = diag_inv[node.i1] * (
+        -(1.0 / dt) * rhs[node.i1] + mem.npf.wcenter[node.i1] * p_box
+    )
+    return lhs, comp
+
+
+def _rel_err(lhs, comp):
+    win = (slice(3, -3), slice(3, -3))
+    scale = np.max(np.abs(lhs[win]))
+    return np.max(np.abs((lhs - comp)[win])) / scale
+
+
+def test_composition_identity_flat_baseline():
+    lhs, comp = _operator_and_composition()
+    assert _rel_err(lhs, comp) <= 1e-12
+
+
+def test_composition_identity_flat_with_coriolis():
+    lhs, comp = _operator_and_composition(coriolis_z=0.2)
+    assert _rel_err(lhs, comp) <= 1e-12
+
+
+def test_flat_metric_matches_plain_operator():
+    flat = lambda ud: (lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2)
+    lhs_plain, _ = _operator_and_composition()
+    lhs_flat, _ = _operator_and_composition(flat)
+    scale = np.max(np.abs(lhs_plain))
+    assert np.max(np.abs(lhs_flat - lhs_plain)) / scale <= 1e-13
+
+
+def test_composition_identity_with_terrain():
+    lhs, comp = _operator_and_composition(_agnesi_hill)
+    assert _rel_err(lhs, comp) <= 1e-12
+
+
+def test_composition_identity_terrain_and_coriolis():
+    lhs, comp = _operator_and_composition(_agnesi_hill, coriolis_z=0.2)
+    assert _rel_err(lhs, comp) <= 1e-12
+
+
+def test_composition_identity_terrain_sleve():
+    """Eta-dependent Jacobian through the native-2D elliptic assembly."""
+    lhs, comp = _operator_and_composition(_agnesi_hill, sleve=True)
+    assert _rel_err(lhs, comp) <= 1e-12
diff --git a/test_scripts/test_terrain_elliptic_oracle.py b/test_scripts/test_terrain_elliptic_oracle.py
new file mode 100644
index 00000000..0f098875
--- /dev/null
+++ b/test_scripts/test_terrain_elliptic_oracle.py
@@ -0,0 +1,165 @@
+"""Terrain elliptic-operator oracle: discrete div∘correction == operator.
+
+The semi-implicit projection is consistent only if the assembled elliptic
+operator (lap3D with the C_ij = wplus * (J A^T H^-1 A) tensor) is exactly
+the composition of the discrete divergence with the discrete metric-
+corrected momentum correction — mismatched staggering or a dropped 1/J
+shows up here long before it corrupts a mountain-wave run.
+
+Checks, on the smoke_agnesi configuration (quasi-2D x-y-vertical 3D):
+
+1. flat baseline: the composition identity holds for the plain solver
+   (guards the test's own assembly against phantom mismatches),
+2. forced-flat metric == plain operator application (~1e-13),
+3. terrain (witch-of-Agnesi hill): composition identity still holds.
+
+Comparisons exclude a 3-node boundary window: ghost reconstruction inside
+the lap3D kernel and production ghost filling are convention-equivalent
+but not bit-equal at the box edge; interior nodes carry the full stencil.
+"""
+
+import numpy as np
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import terrain
+from pybella.flow_solver.numerics import coriolis, implicit_euler
+from pybella.flow_solver.physics import thermodynamics
+from pybella.flow_solver.utils import cache, fields
+from pybella.flow_solver.utils.boundary import cell_boundary as bdry_c
+from pybella.flow_solver.utils.boundary import node_boundary as bdry_n
+from pybella.tests import smoke_agnesi
+from pybella.utils import axes, user_data
+from pybella.utils.operators import divergence
+from pybella.utils.operators.laplacian import preconditioner
+
+
+def _agnesi_hill(ud, h0_m=300.0, a_m=5000.0):
+    h0 = h0_m / ud.h_ref
+    a = a_m / ud.h_ref
+
+    def h(xi1, xi2):
+        return h0 * a**2 / (xi1**2 + a**2) + 0.0 * xi2
+
+    return h
+
+
+def _sleve_config(ud):
+    """Split the oracle hill 50/50 smooth/residual (analytic, trivially
+    consistent) and pick decay scales inside the 10 km domain."""
+    hill = ud.orography
+    ud.orography_smooth = lambda xi1, xi2: 0.5 * hill(xi1, xi2)
+    ud.vertical_transform = terrain.SLEVETransform(
+        s1=6000.0 / ud.h_ref, s2=1500.0 / ud.h_ref
+    )
+
+
+def _make_mem(orography=None, sleve=False):
+    ud = user_data.UserDataInit(**vars(smoke_agnesi.UserData()))
+    ud.coriolis_strength = np.array(ud.coriolis_strength)
+    # smoke_agnesi carries its own hill; the oracle controls terrain itself
+    ud.orography = orography(ud) if orography is not None else None
+    if sleve:
+        _sleve_config(ud)
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = smoke_agnesi.sol_init(sol, npf, elem, node, th, ud)
+    from pybella.utils.data_structures import ModelState
+
+    mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    return mem, ud
+
+
+def _smooth_p_box(node):
+    """Deterministic smooth pressure on the node.isc solve box."""
+    x = node.x[1:-1].reshape(-1, 1, 1)
+    y = node.y[1:-1].reshape(1, -1, 1)
+    z = node.z[1:-1].reshape(1, 1, -1)
+    Lx = node.x[-1] - node.x[0]
+    return (
+        np.sin(2 * np.pi * x / Lx) * np.cos(np.pi * y)
+        + 0.1 * np.cos(2 * np.pi * x / Lx + 0.3) * y
+        + 0.0 * z
+    )
+
+
+def _diag_inv_like_solver(mem, ud, dt):
+    """Replicate the diag_inv construction of _prepare_3d_system."""
+    hv = coriolis.compute_inverse_coefficients(mem, ud, dt)
+    h_role = ((hv[0], hv[1], hv[2]), (hv[3], hv[4], hv[5]), (hv[6], hv[7], hv[8]))
+    if mem.elem.metric is not None:
+        h_role = terrain.elliptic_tensor(mem.elem.metric, h_role)
+    rho_of = axes.role_of_axis(axes.vertical_axis(ud))
+    cij = [
+        [mem.npf.wplus[i] * h_role[rho_of[i]][rho_of[j]] for j in range(3)]
+        for i in range(3)
+    ]
+    return preconditioner.prepare_diag(
+        mem.npf, mem.node, cii=(cij[0][0], cij[1][1], cij[2][2])
+    )
+
+
+def _operator_and_composition(orography=None, sleve=False):
+    mem, ud = _make_mem(orography, sleve)
+    node = mem.node
+    dt = float(ud.dtfixed)
+
+    implicit_euler.operator_coefficients_nodes(mem, ud, dt)
+
+    # operator side (captures coefficient copies, immune to later mutation)
+    lap, _ = implicit_euler._prepare_linear_system(mem, ud, dt)
+    p_box = _smooth_p_box(node)
+    lhs = np.asarray(lap @ p_box.ravel()).reshape(node.isc)
+
+    # composition side: divergence of the pure pressure correction
+    diag_inv = _diag_inv_like_solver(mem, ud, dt)
+    p_full = np.zeros(node.sc)
+    p_full[node.i1] = p_box
+    bdry_n.set_ghost_nodes(p_full, node, ud)
+
+    mem.sol.rhou[...] = 0.0
+    mem.sol.rhov[...] = 0.0
+    mem.sol.rhow[...] = 0.0
+    implicit_euler._correction_nodes(mem, ud, dt, p_full, 0)
+    rhs = np.zeros(node.isc)
+    divergence.compute_at_nodes(rhs, mem.elem, mem.sol, ud)
+
+    comp = diag_inv * (-(1.0 / dt) * rhs + mem.npf.wcenter * p_box)
+    return lhs, comp
+
+
+def _window(shape):
+    """3-node inset where the axis allows it (degenerate axes keep 1)."""
+    return tuple(slice(3, -3) if n > 8 else slice(1, -1) for n in shape)
+
+
+def _rel_err(lhs, comp):
+    win = _window(lhs.shape)
+    scale = np.max(np.abs(lhs[win]))
+    return np.max(np.abs((lhs - comp)[win])) / scale
+
+
+def test_composition_identity_flat_baseline():
+    lhs, comp = _operator_and_composition()
+    assert _rel_err(lhs, comp) <= 1e-12
+
+
+def test_flat_metric_matches_plain_operator():
+    flat = lambda ud: (lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2)
+    lhs_plain, _ = _operator_and_composition()
+    lhs_flat, _ = _operator_and_composition(flat)
+    scale = np.max(np.abs(lhs_plain))
+    assert np.max(np.abs(lhs_flat - lhs_plain)) / scale <= 1e-13
+
+
+def test_composition_identity_with_terrain():
+    lhs, comp = _operator_and_composition(_agnesi_hill)
+    assert _rel_err(lhs, comp) <= 1e-12
+
+
+def test_composition_identity_with_terrain_sleve():
+    """First eta-dependent Jacobian through the elliptic assembly."""
+    lhs, comp = _operator_and_composition(_agnesi_hill, sleve=True)
+    assert _rel_err(lhs, comp) <= 1e-12
diff --git a/test_scripts/test_terrain_identity_oracle.py b/test_scripts/test_terrain_identity_oracle.py
new file mode 100644
index 00000000..6dbd1149
--- /dev/null
+++ b/test_scripts/test_terrain_identity_oracle.py
@@ -0,0 +1,258 @@
+"""Terrain h == 0 identity oracle.
+
+Two guarantees, two mechanisms:
+
+1. **Bypass identity (bit-exact).** No registered golden-master case
+   defines ``orography``, so ``elem.metric is None`` and every solver
+   call site takes the pre-terrain code path untouched. The 9-case
+   golden-master suite (``test_flow_solver.py``) is the authority; this
+   file just pins the precondition.
+
+2. **Forced-flat identity (~1e-13).** The metric machinery switched ON
+   with ``h == 0`` is algebraically the identity (J == 1, G == 0) but
+   multiplies extra factors through the operators. As each metric-aware
+   operator lands (Phases 1-6 of the terrain plan), a comparison of the
+   flat-metric path against the plain path is added here — run in both
+   the compressible and pseudo-incompressible regimes to cover the
+   pi-update / wcenter consistency factors.
+"""
+
+import importlib
+
+import numpy as np
+import pytest
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.interfaces.ic_config import IC_MODULES
+from pybella.utils import user_data
+from pybella.utils import options as opts
+from pybella.utils.operators import divergence
+
+# the golden-master regression cases (smoke_* cases are exempt: they own
+# no target and are allowed to grow terrain)
+GOLDEN_MASTER_CASES = [
+    "test_travelling_vortex",
+    "test_travelling_vortex_3d_coriolis",
+    "test_internal_long_wave",
+    "test_igw_baldauf_brdar",
+    "test_lamb_wave",
+    "test_blending_warm_bubble",
+    "test_unstable_lamb",
+    "test_swe_vortex",
+    "test_straka",
+]
+
+
+def test_golden_master_cases_have_no_orography():
+    """Precondition for bit-identity: every target-bearing case must take
+    the metric-bypass path (elem.metric is None)."""
+    for ic in GOLDEN_MASTER_CASES:
+        module = importlib.import_module(IC_MODULES[ic])
+        ud = user_data.UserDataInit(**vars(module.UserData()))
+        assert getattr(ud, "orography", None) is None, (
+            f"{ic} defines orography — golden-master cases must stay on "
+            "the uniform-Cartesian bypass path"
+        )
+
+
+# --- forced-flat operator comparisons (fleshed out per terrain phase) -------
+#
+# Phase 3: elliptic operator C_ij and wcenter
+# Phase 6: advective fluxes + CFL
+
+
+class _StubUD:
+    def __init__(self, v=1, orography=None):
+        self.inx, self.iny, self.inz = 13, 9, 7
+        self.xmin, self.xmax = -1.0, 1.0
+        self.ymin, self.ymax = 0.0, 2.0
+        self.zmin, self.zmax = -0.5, 0.5
+        self.bdry_type = np.array(
+            [opts.BdryType.PERIODIC, opts.BdryType.WALL, opts.BdryType.PERIODIC]
+        )
+        self.gravity_direction = v
+        if orography is not None:
+            self.orography = orography
+
+
+class _StubSol:
+    """Smooth, deterministic 3D fields (no randomness, reproducible)."""
+
+    def __init__(self, elem):
+        # x + y + z forces every field to the full (icx, icy, icz) shape
+        x, y, z = np.meshgrid(elem.x, elem.y, elem.z, indexing="ij")
+        self.rho = 1.0 + 0.1 * np.sin(x) * np.cos(y) + 0.05 * z
+        self.rhoY = self.rho * (1.0 + 0.02 * np.cos(x + y - z))
+        self.rhou = np.sin(2 * x) + 0.3 * y * z
+        self.rhov = np.cos(y) * (1.0 + 0.2 * x) + 0.0 * z
+        self.rhow = 0.5 * np.sin(z + x) + 0.0 * y
+
+
+def _rhs_for(ud_metric):
+    elem, node = dis_grid.grid_init(ud_metric)
+    sol = _StubSol(elem)
+    rhs = np.zeros(node.isc)
+    divergence.compute_at_nodes(rhs, elem, sol, ud_metric)
+    return rhs
+
+
+def test_divergence_flat_metric_matches_plain():
+    flat = lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2
+    rhs_plain = _rhs_for(_StubUD())
+    rhs_flat = _rhs_for(_StubUD(orography=flat))
+    assert np.max(np.abs(rhs_flat - rhs_plain)) <= 1e-13
+
+
+def test_gradient_map_flat_metric_is_exact_identity():
+    flat = lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2
+    elem, _ = dis_grid.grid_init(_StubUD(orography=flat))
+    from pybella.flow_solver.discretisation import terrain
+
+    sol = _StubSol(elem)
+    dp0 = [sol.rhou.copy(), sol.rhov.copy(), sol.rhow.copy()]
+    dp = terrain.apply_gradient_map(elem.metric, [d.copy() for d in dp0])
+    for a, b in zip(dp, dp0):
+        # J == 1, G == 0: multiplication by 1.0 and subtraction of 0.0 are
+        # exact in floating point — bit-identity, not approximation
+        assert np.array_equal(a, b)
+
+
+@pytest.mark.parametrize("v", [0, 1, 2])
+def test_gradient_map_constant_metric_algebra(v):
+    from pybella.utils import axes
+    from pybella.flow_solver.discretisation.terrain import (
+        MetricFields,
+        apply_gradient_map,
+    )
+
+    elem, _ = dis_grid.grid_init(_StubUD(v=v))
+    sol = _StubSol(elem)
+    dp0 = [sol.rhou.copy(), sol.rhov.copy(), sol.rhow.copy()]
+    a_h1, a_h2 = axes.horizontal_axes(v)
+    shape = dp0[0].shape
+    J0, g1, g2 = 2.0, 0.3, -0.7
+    metric = MetricFields(
+        J=np.full(shape, J0),
+        G1=np.full(shape, g1),
+        G2=np.full(shape, g2),
+        z=np.zeros(shape),
+        vaxis=v,
+        haxes=(a_h1, a_h2),
+    )
+    dp = apply_gradient_map(metric, [d.copy() for d in dp0])
+    assert np.allclose(dp[a_h1], dp0[a_h1] - (g1 / J0) * dp0[v], atol=1e-15)
+    assert np.allclose(dp[a_h2], dp0[a_h2] - (g2 / J0) * dp0[v], atol=1e-15)
+    assert np.allclose(dp[v], dp0[v] / J0, atol=1e-15)
+
+
+@pytest.mark.parametrize("v", [0, 1, 2])
+def test_divergence_metric_wiring(v):
+    """Algebraic wiring checks of the role-axis mapping, exact to roundoff.
+
+    With theta == 1 (rho == rhoY), constant J and constant slopes, the
+    metric divergence must equal the plain divergence of analytically
+    pre-transformed momenta.
+    """
+    from pybella.utils import axes
+
+    ud = _StubUD(v=v)
+    ud.bdry_type = np.array([opts.BdryType.PERIODIC] * 3)
+    elem, node = dis_grid.grid_init(ud)
+    sol = _StubSol(elem)
+    sol.rhoY = sol.rho.copy()  # theta == 1
+
+    a_h1, a_h2 = axes.horizontal_axes(v)
+    moms = (sol.rhou, sol.rhov, sol.rhow)
+
+    # hand-build a constant metric (bypasses the transform on purpose)
+    from pybella.flow_solver.discretisation.terrain import MetricFields
+
+    shape = sol.rho.shape
+    J0, g1, g2 = 2.0, 0.3, -0.7
+    elem.metric = MetricFields(
+        J=np.full(shape, J0),
+        G1=np.full(shape, g1),
+        G2=np.full(shape, g2),
+        z=np.zeros(shape),
+        vaxis=v,
+        haxes=(a_h1, a_h2),
+    )
+    rhs_metric = np.zeros(node.isc)
+    divergence.compute_at_nodes(rhs_metric, elem, sol, ud)
+    elem.metric = None
+
+    # reference: plain divergence of the pre-transformed momenta
+    ref_moms = [None, None, None]
+    ref_moms[a_h1] = J0 * moms[a_h1]
+    ref_moms[a_h2] = J0 * moms[a_h2]
+    ref_moms[v] = moms[v] - g1 * moms[a_h1] - g2 * moms[a_h2]
+    sol.rhou, sol.rhov, sol.rhow = ref_moms
+    rhs_ref = np.zeros(node.isc)
+    divergence.compute_at_nodes(rhs_ref, elem, sol, ud)
+
+    assert np.max(np.abs(rhs_metric - rhs_ref)) <= 1e-13
+
+
+# --- native 2D (lap2D path) -------------------------------------------------
+
+
+class _StubSol2D:
+    """Smooth, deterministic 2D fields."""
+
+    def __init__(self, elem):
+        x, y = np.meshgrid(elem.x, elem.y, indexing="ij")
+        self.rho = 1.0 + 0.1 * np.sin(x) * np.cos(y)
+        self.rhoY = self.rho * (1.0 + 0.02 * np.cos(x + y))
+        self.rhou = np.sin(2 * x) + 0.3 * y
+        self.rhov = np.cos(y) * (1.0 + 0.2 * x)
+        self.rhow = np.zeros_like(x)
+
+
+def _rhs_for_2d(ud_metric):
+    ud_metric.inz = 1
+    elem, node = dis_grid.grid_init(ud_metric)
+    sol = _StubSol2D(elem)
+    rhs = np.zeros((elem.icx - 1, elem.icy - 1))
+    divergence.compute_at_nodes(rhs, elem, sol, ud_metric)
+    return rhs
+
+
+def test_divergence_2d_flat_metric_matches_plain():
+    flat = lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2
+    rhs_plain = _rhs_for_2d(_StubUD())
+    rhs_flat = _rhs_for_2d(_StubUD(orography=flat))
+    assert np.max(np.abs(rhs_flat - rhs_plain)) <= 1e-13
+
+
+def test_divergence_2d_metric_wiring():
+    """2D sibling of test_divergence_metric_wiring (haxes = (0, None))."""
+    from pybella.flow_solver.discretisation.terrain import MetricFields
+
+    ud = _StubUD()
+    ud.inz = 1
+    ud.bdry_type = np.array([opts.BdryType.PERIODIC] * 3)
+    elem, node = dis_grid.grid_init(ud)
+    sol = _StubSol2D(elem)
+    sol.rhoY = sol.rho.copy()  # theta == 1
+
+    shape = sol.rho.shape
+    J0, g1 = 2.0, 0.3
+    elem.metric = MetricFields(
+        J=np.full(shape, J0),
+        G1=np.full(shape, g1),
+        G2=None,
+        z=np.zeros(shape),
+        vaxis=1,
+        haxes=(0, None),
+    )
+    rhs_metric = np.zeros((elem.icx - 1, elem.icy - 1))
+    divergence.compute_at_nodes(rhs_metric, elem, sol, ud)
+    elem.metric = None
+
+    rhou0 = sol.rhou.copy()
+    sol.rhou = J0 * rhou0
+    sol.rhov = sol.rhov - g1 * rhou0
+    rhs_ref = np.zeros((elem.icx - 1, elem.icy - 1))
+    divergence.compute_at_nodes(rhs_ref, elem, sol, ud)
+
+    assert np.max(np.abs(rhs_metric - rhs_ref)) <= 1e-13
diff --git a/test_scripts/test_terrain_resting_atmosphere.py b/test_scripts/test_terrain_resting_atmosphere.py
new file mode 100644
index 00000000..445bda2c
--- /dev/null
+++ b/test_scripts/test_terrain_resting_atmosphere.py
@@ -0,0 +1,223 @@
+"""Resting atmosphere over terrain — the classic TFC physics gate.
+
+A hydrostatically balanced isothermal atmosphere at rest over a witch-of-
+Agnesi hill must stay (nearly) at rest: every spurious metric force —
+imbalanced pressure-gradient mapping, wrong Jacobian factors, ghost-cell
+hydrostatics at the wrong height — shows up as wind generated from nothing.
+Discrete rest is not exact (corner-averaged gradients across columns of
+different physical height carry truncation error), so the gate is a small
+velocity bound, not machine zero.
+
+Also pins the h == 0 equivalence of the field-mode hydrostates against the
+legacy 1D profiles (exact for the analytical state, quadrature-tight for
+the integrated state).
+"""
+
+import numpy as np
+import pytest
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import time_update
+from pybella.flow_solver.physics import hydrostatics, thermodynamics
+from pybella.flow_solver.utils import cache, fields
+from pybella.flow_solver.utils.boundary import cell_boundary as bdry_c
+from pybella.tests import smoke_agnesi
+from pybella.utils import axes, user_data
+from pybella.utils.data_structures import ModelState
+
+
+class _StubWriter:
+    def write(self, *args, **kwargs):
+        pass
+
+    def populate(self, *args, **kwargs):
+        pass
+
+    def write_all(self, *args, **kwargs):
+        pass
+
+
+def _agnesi(ud, h0_m=400.0, a_m=5000.0):
+    h0, a = h0_m / ud.h_ref, a_m / ud.h_ref
+    return lambda xi1, xi2: h0 * a**2 / (xi1**2 + a**2) + 0.0 * xi2
+
+
+def _make_resting_mem(orography=True, steps=5, inz=2, sleve=False):
+    ud = user_data.UserDataInit(**vars(smoke_agnesi.UserData()))
+    ud.coriolis_strength = np.array(ud.coriolis_strength)
+    ud.u_wind_speed = 0.0
+    ud.stepmax = steps
+    ud.inz = inz  # 2 = quasi-2D 3D (lap3D path), 1 = native 2D (lap2D path)
+    ud.tout = [1e6]  # step-limited
+    # smoke_agnesi carries its own hill; override per variant
+    ud.orography = _agnesi(ud) if orography else None
+    if sleve:
+        from pybella.flow_solver.discretisation import terrain
+
+        hill = ud.orography
+        ud.orography_smooth = lambda xi1, xi2: 0.5 * hill(xi1, xi2)
+        ud.vertical_transform = terrain.SLEVETransform(
+            s1=6000.0 / ud.h_ref, s2=1500.0 / ud.h_ref
+        )
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = smoke_agnesi.sol_init(sol, npf, elem, node, th, ud)
+    mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+    return mem, ud
+
+
+def _max_speed_ms(mem, ud):
+    i2 = tuple(slice(2, -2) for _ in range(mem.elem.ndim))
+    speeds = [
+        np.max(np.abs(getattr(mem.sol, m)[i2] / mem.sol.rho[i2]))
+        for m in ("rhou", "rhov", "rhow")
+    ]
+    return max(speeds) * ud.u_ref
+
+
+@pytest.mark.parametrize("inz", [2, 1], ids=["q2d3d", "native2d"])
+def test_resting_atmosphere_over_hill(inz):
+    mem, ud = _make_resting_mem(orography=True, steps=5, inz=inz)
+    mem = time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+    vmax = _max_speed_ms(mem, ud)
+    # measured ~1e-10 m/s (the bicgstab solve floor, same as flat): with the
+    # p2 == 0 perturbation-pressure convention the discrete rest state is
+    # exact through the metric machinery, not merely truncation-small
+    assert vmax < 1e-8, f"spurious wind over terrain: max |v| = {vmax:.3e} m/s"
+
+
+@pytest.mark.parametrize("inz", [2, 1], ids=["q2d3d", "native2d"])
+def test_resting_atmosphere_over_hill_sleve(inz):
+    """Balanced rest under the first eta-dependent Jacobian: field-mode
+    hydrostates at SLEVE heights, bottom BC with J varying through the
+    ghost rows, metric advection and elliptic assembly all see J(eta)."""
+    mem, ud = _make_resting_mem(orography=True, steps=5, inz=inz, sleve=True)
+    mem = time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+    vmax = _max_speed_ms(mem, ud)
+    assert vmax < 1e-8, f"spurious wind over SLEVE terrain: {vmax:.3e} m/s"
+
+
+@pytest.mark.parametrize("inz", [2, 1], ids=["q2d3d", "native2d"])
+def test_resting_atmosphere_flat_stays_still(inz):
+    """Flat baseline: quantifies the no-terrain spurious-wind floor."""
+    mem, ud = _make_resting_mem(orography=False, steps=5, inz=inz)
+    mem = time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+    vmax = _max_speed_ms(mem, ud)
+    assert vmax < 1e-8, f"flat resting atmosphere drifted: {vmax:.3e} m/s"
+
+
+@pytest.mark.parametrize("inz", [2, 1], ids=["q2d3d", "native2d"])
+def test_uniform_flow_flat_metric_matches_plain(inz):
+    """Full time loop, wind on: forced-flat metric == plain to roundoff.
+
+    Exercises every metric-aware branch (divergence, gradients, elliptic,
+    ghost cells, sweep-oriented metric flips) with J == 1, G == 0 against
+    the untouched uniform-Cartesian path.
+    """
+
+    def run(orography):
+        ud = user_data.UserDataInit(**vars(smoke_agnesi.UserData()))
+        ud.coriolis_strength = np.array(ud.coriolis_strength)
+        ud.stepmax = 3
+        ud.tout = [1e6]
+        ud.inz = inz
+        # forced-flat metric vs true bypass (the case's own hill is overridden)
+        ud.orography = (lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2) if orography else None
+        elem, node = dis_grid.grid_init(ud)
+        sol = fields.CellSolField(elem.sc)
+        th = thermodynamics.ThermodynamicalQuantities(ud)
+        npf = fields.NodePressureField(elem, node, ud)
+        sol = smoke_agnesi.sol_init(sol, npf, elem, node, th, ud)
+        mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+        bdry_c.set_ghost_cells(mem, ud)
+        return time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+
+    plain = run(False)
+    flat = run(True)
+    for attr in ("rho", "rhou", "rhov", "rhow", "rhoY"):
+        a = getattr(plain.sol, attr)
+        b = getattr(flat.sol, attr)
+        scale = max(np.max(np.abs(a)), 1.0)
+        err = np.max(np.abs(a - b)) / scale
+        assert err <= 1e-12, f"{attr}: rel {err:.2e}"
+
+
+@pytest.mark.parametrize("inz", [2, 1], ids=["q2d3d", "native2d"])
+def test_mountain_wave_smoke_conservation_and_response(inz):
+    """First end-to-end terrain run: 10 m/s wind over the 400 m hill.
+
+    Gates: (i) J-weighted mass and P = rho*Y are conserved by the metric
+    advection (flux form, periodic x, no-flux walls), (ii) the hill
+    actually forces a vertical-velocity response of a sane magnitude
+    (linear estimate U * max|dh/dx| ~ 0.5 m/s for these parameters).
+    """
+    ud = user_data.UserDataInit(**vars(smoke_agnesi.UserData()))
+    ud.coriolis_strength = np.array(ud.coriolis_strength)
+    ud.stepmax = 5
+    ud.tout = [1e6]
+    ud.inz = inz
+    elem, node = dis_grid.grid_init(ud)
+    sol = fields.CellSolField(elem.sc)
+    th = thermodynamics.ThermodynamicalQuantities(ud)
+    npf = fields.NodePressureField(elem, node, ud)
+    sol = smoke_agnesi.sol_init(sol, npf, elem, node, th, ud)
+    mem = ModelState(elem, node, sol, npf, th, cache.FlowSolverCache())
+    bdry_c.set_ghost_cells(mem, ud)
+
+    i2 = tuple(slice(2, -2) for _ in range(elem.ndim))
+    J = elem.metric.J
+    mass0 = np.sum((J * mem.sol.rho)[i2])
+    P0 = np.sum((J * mem.sol.rhoY)[i2])
+
+    mem = time_update.do(mem, ud, tout=ud.tout[0], debug_writer=_StubWriter())
+
+    mass1 = np.sum((J * mem.sol.rho)[i2])
+    P1 = np.sum((J * mem.sol.rhoY)[i2])
+    assert abs(mass1 - mass0) / mass0 < 1e-10, f"mass drift {(mass1-mass0)/mass0:.2e}"
+    assert abs(P1 - P0) / P0 < 1e-10, f"P drift {(P1-P0)/P0:.2e}"
+
+    w_ms = np.max(np.abs(mem.sol.rhov[i2] / mem.sol.rho[i2])) * ud.u_ref
+    assert 1e-3 < w_ms < 5.0, f"mountain-wave w response out of range: {w_ms:.3e} m/s"
+
+
+def test_field_mode_hydrostates_match_profiles_when_flat():
+    flat = lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2
+
+    def states_pair(builder):
+        ud0 = user_data.UserDataInit(**vars(smoke_agnesi.UserData()))
+        ud0.coriolis_strength = np.array(ud0.coriolis_strength)
+        ud0.orography = None
+        elem0, node0 = dis_grid.grid_init(ud0)
+        npf0 = fields.NodePressureField(elem0, node0, ud0)
+        builder(npf0, elem0, node0, thermodynamics.ThermodynamicalQuantities(ud0), ud0)
+
+        ud1 = user_data.UserDataInit(**vars(smoke_agnesi.UserData()))
+        ud1.coriolis_strength = np.array(ud1.coriolis_strength)
+        ud1.orography = flat
+        elem1, node1 = dis_grid.grid_init(ud1)
+        npf1 = fields.NodePressureField(elem1, node1, ud1)
+        builder(npf1, elem1, node1, thermodynamics.ThermodynamicalQuantities(ud1), ud1)
+
+        v = axes.vertical_axis(ud0)
+        return npf0, npf1, elem0, v
+
+    # analytical_state: identical expressions either way -> exact.
+    # integrated_state: the legacy profile uses coarse per-cell trapezoids,
+    # the field branch a fine-grid quadrature — they differ by the COARSE
+    # scheme's truncation (~1.5e-5 here); the bound guards wiring errors
+    # (axis, sign, reference level), not quadrature equivalence.
+    for builder, tol in (
+        (hydrostatics.analytical_state, 0.0),
+        (hydrostatics.integrated_state, 5e-5),
+    ):
+        npf0, npf1, elem0, v = states_pair(builder)
+        for attr in ("rho0", "rhoY0", "p0", "p20", "S0", "Y0"):
+            profile = axes.expand_profile(
+                getattr(npf0.HydroState, attr), elem0.ndim, v, elem0.sc
+            )
+            field = getattr(npf1.HydroState, attr)
+            err = np.max(np.abs(field - profile)) / np.max(np.abs(profile))
+            assert err <= tol, f"{builder.__name__}.{attr}: rel {err:.2e} > {tol}"
diff --git a/test_scripts/test_terrain_transform.py b/test_scripts/test_terrain_transform.py
new file mode 100644
index 00000000..108993e9
--- /dev/null
+++ b/test_scripts/test_terrain_transform.py
@@ -0,0 +1,364 @@
+"""Unit tests for the terrain-following transform + metric field builder.
+
+Covers the Gal-Chen analytic identities, the SLEVE-ready interface
+contract (slope = decay * dh), the h == 0 exact-identity guarantee, and
+the role orientation of the metric arrays for every gravity_direction.
+"""
+
+import numpy as np
+import pytest
+
+from pybella.flow_solver.discretisation import grid as dis_grid
+from pybella.flow_solver.discretisation import terrain
+from pybella.utils import axes
+from pybella.utils import options as opts
+
+# --- transform-level identities --------------------------------------------
+
+
+def test_galchen_surface_and_top():
+    tr = terrain.GalChenTransform()
+    eta0, etat = 0.0, 2.0
+    h = np.array([0.0, 0.1, 0.3])
+    # surface follows terrain, top is flat
+    assert np.allclose(tr.z(eta0, h, eta0, etat), eta0 + h)
+    assert np.allclose(tr.z(etat, h, eta0, etat), etat)
+
+
+def test_galchen_jacobian_matches_fd():
+    tr = terrain.GalChenTransform()
+    eta0, etat = 0.5, 3.0
+    h = 0.2
+    eta = np.linspace(eta0, etat, 11)
+    d = 1e-6
+    fd = (tr.z(eta + d, h, eta0, etat) - tr.z(eta - d, h, eta0, etat)) / (2 * d)
+    assert np.allclose(tr.jacobian(eta, h, eta0, etat), fd, atol=1e-9)
+    # Gal-Chen J is eta-independent and equals 1 - h/(etat - eta0)
+    assert np.allclose(tr.jacobian(eta, h, eta0, etat), 1.0 - h / (etat - eta0))
+
+
+def test_galchen_slope_is_decay_weighted():
+    tr = terrain.GalChenTransform()
+    eta0, etat = 0.0, 1.0
+    eta = np.linspace(eta0, etat, 5)
+    dh = 0.07
+    assert np.allclose(tr.slope(eta, dh, eta0, etat), dh * (etat - eta) / (etat - eta0))
+    # slope vanishes at the flat top, equals dh at the surface
+    assert tr.slope(etat, dh, eta0, etat) == 0.0
+    assert tr.slope(eta0, dh, eta0, etat) == dh
+
+
+# --- builder ----------------------------------------------------------------
+
+
+class _StubUD:
+    """Minimal ud for grid_init + build_metric_fields."""
+
+    def __init__(
+        self,
+        v=1,
+        orography=None,
+        orography_grad=None,
+        orography_smooth=None,
+        orography_smooth_grad=None,
+        vertical_transform=None,
+    ):
+        self.inx, self.iny, self.inz = 9, 7, 5
+        self.xmin, self.xmax = -1.0, 1.0
+        self.ymin, self.ymax = 0.0, 2.0
+        self.zmin, self.zmax = -0.5, 0.5
+        self.bdry_type = np.array(
+            [opts.BdryType.PERIODIC, opts.BdryType.WALL, opts.BdryType.PERIODIC]
+        )
+        self.gravity_direction = v
+        if orography is not None:
+            self.orography = orography
+        if orography_grad is not None:
+            self.orography_grad = orography_grad
+        if orography_smooth is not None:
+            self.orography_smooth = orography_smooth
+        if orography_smooth_grad is not None:
+            self.orography_smooth_grad = orography_smooth_grad
+        if vertical_transform is not None:
+            self.vertical_transform = vertical_transform
+
+
+def _hill(amplitude=0.05, width=0.3):
+    def h(xi1, xi2):
+        return amplitude / (1.0 + (xi1 / width) ** 2) + 0.0 * xi2
+
+    def dh1(xi1, xi2):
+        return (
+            -2.0 * amplitude * xi1 / width**2 / (1.0 + (xi1 / width) ** 2) ** 2
+            + 0.0 * xi2
+        )
+
+    def dh2(xi1, xi2):
+        return 0.0 * xi1 + 0.0 * xi2
+
+    return h, (dh1, dh2)
+
+
+def test_metric_none_without_orography():
+    elem, node = dis_grid.grid_init(_StubUD())
+    assert elem.metric is None
+    assert node.metric is None
+
+
+def test_h_zero_gives_exact_identity_metric():
+    flat = lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2
+    ud = _StubUD(orography=flat)
+    elem, node = dis_grid.grid_init(ud)
+    for grid_obj in (elem, node):
+        m = grid_obj.metric
+        assert m is not None
+        v = axes.vertical_axis(ud)
+        # exact identities, not approximate: h == 0 incurs no rounding
+        assert np.all(m.J == 1.0)
+        assert np.all(m.ooJ == 1.0)
+        assert np.all(m.G1 == 0.0)
+        assert np.all(m.G2 == 0.0)
+        eta = axes.coords_along(grid_obj, v)
+        shape = [1] * grid_obj.ndim
+        shape[v] = -1
+        assert np.all(m.z == np.broadcast_to(eta.reshape(shape), m.z.shape))
+
+
+@pytest.mark.parametrize("v", [0, 1, 2])
+def test_role_orientation(v):
+    h, grad = _hill()
+    ud = _StubUD(v=v, orography=h, orography_grad=grad)
+    elem, _ = dis_grid.grid_init(ud)
+    m = elem.metric
+    a_h1, a_h2 = axes.horizontal_axes(v)
+    assert m.vaxis == v
+    assert m.haxes == (a_h1, a_h2)
+    assert m.J.shape == tuple(int(elem.sc[d]) for d in range(elem.ndim))
+
+    # J varies along h1 (the hill axis) and is constant along v and h2
+    assert np.ptp(m.J, axis=a_h1).max() > 0.0
+    assert np.ptp(m.J, axis=v).max() == 0.0  # Gal-Chen: eta-independent
+    assert np.ptp(m.J, axis=a_h2).max() == 0.0
+
+    # G1 decays with height: max slope at the bottom layer, ~0 at the top
+    lo, hi = axes.wall_slabs(elem.ndim, v, depth=1)
+    assert np.abs(m.G1[lo]).max() > np.abs(m.G1[hi]).max()
+    # z is strictly increasing along the vertical
+    assert np.all(np.diff(m.z, axis=v) > 0.0)
+    # flat ridge: no slope along h2
+    assert np.all(m.G2 == 0.0)
+
+
+def test_fd_slope_matches_analytic_grad():
+    h, grad = _hill()
+    ud_fd = _StubUD(orography=h)
+    ud_an = _StubUD(orography=h, orography_grad=grad)
+    # resolve the hill (width 0.3) properly so the second-order FD converges;
+    # walls in x: the hill does not decay to zero at the domain edge, so the
+    # periodic coordinate wrap would (correctly) introduce a seam kink that
+    # FD smears but the analytic gradient does not — not what's tested here
+    for ud in (ud_fd, ud_an):
+        ud.inx = 129
+        ud.bdry_type = np.array([opts.BdryType.WALL] * 3)
+    elem_fd, _ = dis_grid.grid_init(ud_fd)
+    elem_an, _ = dis_grid.grid_init(ud_an)
+    # ~4e-4 truncation at the steepest point on this grid; the test guards
+    # orientation/sign/scale, not FD order
+    assert np.allclose(elem_fd.metric.G1, elem_an.metric.G1, atol=1e-3)
+
+
+def test_orography_above_top_rejected():
+    tall = lambda xi1, xi2: 2.5 + 0.0 * xi1 + 0.0 * xi2  # > ymax - ymin
+    with pytest.raises(ValueError, match="Jacobian"):
+        dis_grid.grid_init(_StubUD(orography=tall))
+
+
+def test_flip_forward_backward_roundtrip():
+    h, grad = _hill()
+    ud = _StubUD(orography=h, orography_grad=grad)
+    elem, _ = dis_grid.grid_init(ud)
+    m = elem.metric
+    J0, G10, vax0, hax0 = m.J.copy(), m.G1.copy(), m.vaxis, m.haxes
+    m.flip_forward()
+    assert m.J.shape == tuple(np.roll(J0.shape, -1))
+    assert m.vaxis == (vax0 - 1) % elem.ndim
+    m.flip_backward()
+    assert np.array_equal(m.J, J0)
+    assert np.array_equal(m.G1, G10)
+    assert m.vaxis == vax0
+    assert m.haxes == hax0
+
+
+# --- SLEVE -------------------------------------------------------------------
+
+
+def _sleve(n=1.0):
+    # decay scales well separated and inside the [0, 2] stub domain
+    return terrain.SLEVETransform(s1=1.5, s2=0.25, n=n)
+
+
+def _ridge(amplitude=0.04, k=12.0):
+    """Small-scale ridge h2 on a smooth envelope h1 (analytic split)."""
+
+    def h1(xi1, xi2):
+        return amplitude * np.exp(-(xi1**2)) + 0.0 * xi2
+
+    def h(xi1, xi2):
+        return h1(xi1, xi2) * (1.0 + 0.5 * np.cos(k * xi1))
+
+    def dh1_1(xi1, xi2):
+        return -2.0 * xi1 * amplitude * np.exp(-(xi1**2)) + 0.0 * xi2
+
+    def dh_1(xi1, xi2):
+        return dh1_1(xi1, xi2) * (1.0 + 0.5 * np.cos(k * xi1)) - h1(
+            xi1, xi2
+        ) * 0.5 * k * np.sin(k * xi1)
+
+    zero = lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2
+    return h, (dh_1, zero), h1, (dh1_1, zero)
+
+
+@pytest.mark.parametrize("n", [1.0, 1.35])
+def test_sleve_surface_and_top(n):
+    tr = _sleve(n)
+    eta0, etat = 0.0, 2.0
+    h1 = np.array([0.0, 0.05, 0.1])
+    h2 = np.array([0.0, 0.03, -0.02])
+    z_surf = tr.z(eta0, (h1, h2), eta0, etat)
+    assert np.allclose(z_surf, eta0 + h1 + h2)
+    assert np.allclose(tr.z(etat, (h1, h2), eta0, etat), etat)
+
+
+@pytest.mark.parametrize("n", [1.0, 1.35])
+def test_sleve_jacobian_matches_fd(n):
+    tr = _sleve(n)
+    eta0, etat = 0.0, 2.0
+    h = (0.08, 0.04)
+    # stay inside (eta0, etat): the FD stencil must not straddle the
+    # surface where the below-ground linear extension kicks in
+    eta = np.linspace(eta0 + 1e-3, etat - 1e-3, 41)
+    d = 1e-7
+    fd = (tr.z(eta + d, h, eta0, etat) - tr.z(eta - d, h, eta0, etat)) / (2 * d)
+    assert np.allclose(tr.jacobian(eta, h, eta0, etat), fd, atol=1e-6)
+
+
+def test_sleve_jacobian_eta_dependent():
+    tr = _sleve()
+    eta0, etat = 0.0, 2.0
+    eta = np.linspace(eta0, etat, 9)
+    J = tr.jacobian(eta, (0.08, 0.04), eta0, etat)
+    assert np.ptp(J) > 1e-3  # the first eta-dependent Jacobian
+
+
+def test_sleve_n_gt_1_has_uniform_surface_jacobian():
+    tr = _sleve(n=1.35)
+    eta0, etat = 0.0, 2.0
+    J_surf = tr.jacobian(eta0, (0.08, 0.04), eta0, etat)
+    assert np.allclose(J_surf, 1.0)  # db_i(0) = 0 for n > 1
+
+
+@pytest.mark.parametrize("n", [1.0, 1.35])
+def test_sleve_below_ground_extension_is_finite_and_smooth(n):
+    tr = _sleve(n)
+    eta0, etat = 0.0, 2.0
+    eta_ghost = np.array([-0.4, -0.2, -1e-9])
+    for fn in (tr.z, tr.jacobian):
+        vals = fn(eta_ghost, (0.08, 0.04), eta0, etat)
+        assert np.all(np.isfinite(vals))
+    # continuity across the surface (for n > 1 the decay slope behaves as
+    # zeta^(n-1) just above ground — continuous but steep, hence the loose
+    # tolerance; n == 1 matches to machine precision)
+    assert np.allclose(
+        tr.jacobian(-1e-9, (0.08, 0.04), eta0, etat),
+        tr.jacobian(+1e-9, (0.08, 0.04), eta0, etat),
+        atol=1e-3 if n > 1.0 else 1e-9,
+    )
+
+
+def test_sleve_scale_separation():
+    """The point of SLEVE: at mid-levels the small-scale decay b2 is far
+    below b1, so pure small-scale terrain barely distorts the grid there —
+    under Gal-Chen it still carries ~half its surface slope."""
+    tr = _sleve()
+    gc = terrain.GalChenTransform()
+    eta0, etat = 0.0, 2.0
+    eta_mid = 1.0
+    dh = 1.0  # pure small-scale slope
+    g_sleve = tr.slope(eta_mid, (0.0, dh), eta0, etat)
+    g_gc = gc.slope(eta_mid, dh, eta0, etat)
+    assert abs(g_sleve) < 0.05 * abs(g_gc)
+    # and b2 <= b1 everywhere for s2 < s1
+    eta = np.linspace(eta0, etat, 33)
+    b1, _ = tr._b_db(eta, eta0, etat, tr.s1)
+    b2, _ = tr._b_db(eta, eta0, etat, tr.s2)
+    assert np.all(b2 <= b1 + 1e-12)
+
+
+def test_sleve_builder_requires_smooth_split():
+    h, grad, _, _ = _ridge()
+    ud = _StubUD(orography=h, orography_grad=grad, vertical_transform=_sleve())
+    with pytest.raises(ValueError, match="orography_smooth"):
+        dis_grid.grid_init(ud)
+
+
+def test_sleve_h_zero_gives_exact_identity_metric():
+    flat = lambda xi1, xi2: 0.0 * xi1 + 0.0 * xi2
+    ud = _StubUD(orography=flat, orography_smooth=flat, vertical_transform=_sleve())
+    elem, node = dis_grid.grid_init(ud)
+    for grid_obj in (elem, node):
+        m = grid_obj.metric
+        assert np.all(m.J == 1.0)
+        assert np.all(m.G1 == 0.0)
+        assert np.all(m.G2 == 0.0)
+
+
+def test_sleve_builder_slope_matches_fd_of_z():
+    """G1 from the builder == d z / d xi1 at fixed eta, by FD across columns."""
+    h, grad, h1, grad1 = _ridge()
+    ud = _StubUD(
+        orography=h,
+        orography_grad=grad,
+        orography_smooth=h1,
+        orography_smooth_grad=grad1,
+        vertical_transform=_sleve(),
+    )
+    ud.inx = 257  # resolve the k = 12 ridge for the cross-column FD
+    elem, _ = dis_grid.grid_init(ud)
+    m = elem.metric
+    dz_dxi1 = np.gradient(m.z, elem.dx, axis=0)
+    inner = (slice(4, -4), slice(None), slice(None))
+    assert np.allclose(m.G1[inner], dz_dxi1[inner], atol=2e-3)
+
+
+def test_sleve_jacobian_positivity_rejected():
+    # small-scale amplitude ~ s2: the residual decay overshoots J <= 0
+    h, grad, h1, grad1 = _ridge(amplitude=0.3, k=12.0)
+    ud = _StubUD(
+        orography=h,
+        orography_grad=grad,
+        orography_smooth=h1,
+        orography_smooth_grad=grad1,
+        vertical_transform=terrain.SLEVETransform(s1=1.5, s2=0.05),
+    )
+    with pytest.raises(ValueError, match="Jacobian"):
+        dis_grid.grid_init(ud)
+
+
+def test_sleve_flip_roundtrip_with_eta_dependent_J():
+    h, grad, h1, grad1 = _ridge()
+    ud = _StubUD(
+        orography=h,
+        orography_grad=grad,
+        orography_smooth=h1,
+        orography_smooth_grad=grad1,
+        vertical_transform=_sleve(),
+    )
+    elem, _ = dis_grid.grid_init(ud)
+    m = elem.metric
+    assert np.ptp(m.J, axis=m.vaxis).max() > 0.0  # genuinely eta-dependent
+    J0, G10, vax0, hax0 = m.J.copy(), m.G1.copy(), m.vaxis, m.haxes
+    m.flip_forward()
+    m.flip_backward()
+    assert np.array_equal(m.J, J0)
+    assert np.array_equal(m.G1, G10)
+    assert (m.vaxis, m.haxes) == (vax0, hax0)
diff --git a/test_scripts/test_zvert_smoke.py b/test_scripts/test_zvert_smoke.py
new file mode 100644
index 00000000..72894b2d
--- /dev/null
+++ b/test_scripts/test_zvert_smoke.py
@@ -0,0 +1,23 @@
+"""Plumbing smoke test: gravity_direction = 2 through the production path.
+
+Runs the ``smoke_zvert`` case (z-vertical, met convention) via the real
+``pybella -ic`` entry point for a few steps — covering the IC registry,
+``prepare.initialise`` (incl. ``axes.validate``), hydrostatics along z,
+axis-2 gravity ghost cells / wall zeroing, and the full time loop, which
+the in-process permutation oracle bypasses. No golden master: physics
+agnosticity is proven by ``test_permutation_oracle.py``; a permanent
+z-vertical regression case is deferred to the terrain-following work.
+"""
+
+import subprocess
+
+
+def test_zvert_production_path():
+    result = subprocess.run(
+        ["pybella", "-ic", "smoke_zvert", "-N", "1"], capture_output=True, text=True
+    )
+    assert result.returncode == 0, (
+        f"Command failed with return code {result.returncode}\n"
+        f"STDERR:\n{result.stderr.strip()}\n"
+        f"STDOUT:\n{result.stdout.strip()}"
+    )
diff --git a/test_scripts/tolerance_audit.py b/test_scripts/tolerance_audit.py
new file mode 100644
index 00000000..3f054f0d
--- /dev/null
+++ b/test_scripts/tolerance_audit.py
@@ -0,0 +1,99 @@
+"""Tolerance audit utility (regression_harness.md gap #1).
+
+Runs every regression case N times, harvests CompareSol's per-field max-abs
+errors from the run logs, and prints a table of observed error vs current
+tolerance, plus a suggested tightened tolerance per case
+(max observed error across reps and fields, rounded up to the next power of
+ten, with one extra decade of headroom for cross-platform/BLAS variation;
+never looser than the current tolerance).
+
+Manual utility, not collected by pytest. Usage:
+
+    python test_scripts/tolerance_audit.py [reps]
+"""
+
+import math
+import re
+import subprocess
+import sys
+from collections import defaultdict
+
+CASES = [
+    "test_travelling_vortex",
+    "test_travelling_vortex_3d_coriolis",
+    "test_internal_long_wave",
+    "test_igw_baldauf_brdar",
+    "test_lamb_wave",
+    "test_unstable_lamb",
+    "test_blending_warm_bubble",
+    "test_swe_vortex",
+    "test_straka",
+]
+
+LINE = re.compile(
+    r"Test passed for (\w+) \| L2: ([\d.eE+-]+), Rel L2: ([\d.eE+-]+|inf), "
+    r"Max Abs: ([\d.eE+-]+)"
+)
+
+
+def run_case(case):
+    """Run one case; return {field: max_abs_error} or None on failure."""
+    proc = subprocess.run(
+        [sys.executable, "-m", "pybella", "-ic", case, "-N", "1"],
+        capture_output=True,
+        text=True,
+    )
+    out = proc.stdout + proc.stderr
+    if proc.returncode != 0:
+        print(f"  !! {case} FAILED (rc={proc.returncode}); tail:")
+        print("\n".join(out.strip().splitlines()[-5:]))
+        return None
+    errors = {}
+    for m in LINE.finditer(out):
+        field, _, _, max_abs = m.groups()
+        errors[field] = max(errors.get(field, 0.0), float(max_abs))
+    return errors
+
+
+def suggest(max_err):
+    """Next power of ten above max_err, plus one decade of headroom."""
+    if max_err == 0.0:
+        return 1e-12
+    return 10.0 ** (math.ceil(math.log10(max_err)) + 1)
+
+
+def main():
+    reps = int(sys.argv[1]) if len(sys.argv) > 1 else 3
+    results = {}
+    for case in CASES:
+        worst = defaultdict(float)
+        for rep in range(reps):
+            print(f"== {case} rep {rep + 1}/{reps}")
+            errors = run_case(case)
+            if errors is None:
+                worst = None
+                break
+            for field, err in errors.items():
+                worst[field] = max(worst[field], err)
+        results[case] = dict(worst) if worst is not None else None
+
+    print("\n\n==================== AUDIT SUMMARY ====================")
+    for case, worst in results.items():
+        print(f"\n{case}:")
+        if worst is None:
+            print("  RUN FAILED — investigate before tightening")
+            continue
+        if not worst:
+            print("  no CompareSol lines found (diag off?)")
+            continue
+        overall = max(worst.values())
+        print(f"  {'field':10s} {'max_abs_err':>12s}")
+        for field, err in sorted(worst.items()):
+            print(f"  {field:10s} {err:12.3e}")
+        print(
+            f"  worst: {overall:.3e}  ->  suggested tolerance: {suggest(overall):.0e}"
+        )
+
+
+if __name__ == "__main__":
+    main()