diff --git a/fpie/core/openmp/grid.cc b/fpie/core/openmp/grid.cc index 7862eb01..a0ed4122 100644 --- a/fpie/core/openmp/grid.cc +++ b/fpie/core/openmp/grid.cc @@ -32,13 +32,13 @@ inline void OpenMPGridSolver::update_equation(int id) { int id1 = off3 - 3; int id2 = off3 + 3; int id3 = off3 + m3; - tgt[off3 + 0] = (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + + tmp[off3 + 0] = (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + tgt[id3 + 0]) / 4.0; - tgt[off3 + 1] = (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + + tmp[off3 + 1] = (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + tgt[id3 + 1]) / 4.0; - tgt[off3 + 2] = (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + + tmp[off3 + 2] = (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + tgt[id3 + 2]) / 4.0; } @@ -91,6 +91,15 @@ OpenMPGridSolver::step(int iteration) { } } } +#pragma omp parallel for schedule(static) + for (int id = 0; id < N * M; ++id) { + if (mask[id]) { + int off3 = id * 3; + tgt[off3 + 0] = tmp[off3 + 0]; + tgt[off3 + 1] = tmp[off3 + 1]; + tgt[off3 + 2] = tmp[off3 + 2]; + } + } } calc_error(); #pragma omp parallel for schedule(static) diff --git a/fpie/process.py b/fpie/process.py index a5c84401..bb8ca22b 100644 --- a/fpie/process.py +++ b/fpie/process.py @@ -8,7 +8,25 @@ from fpie import np_solver -CPU_COUNT = os.cpu_count() or 1 +DEFAULT_CPU_CAP = 8 + + +def _default_cpu_count() -> int: + """Pick a conservative default for CPU-backed solvers.""" + candidates: list[int] = [] + cpu_count = os.cpu_count() + if cpu_count is not None: + candidates.append(cpu_count) + try: + candidates.append(len(os.sched_getaffinity(0))) + except (AttributeError, OSError): + pass + if not candidates: + return 1 + return max(1, min(min(candidates), DEFAULT_CPU_CAP)) + + +CPU_COUNT = _default_cpu_count() DEFAULT_BACKEND = "numpy" ALL_BACKEND = ["numpy"] MPI: Any | None = None diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 49fed554..5fc9cf19 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -6,7 +6,11 @@ import numpy as np -from fpie.process import EquProcessor, GridProcessor +from fpie.process import ( + ALL_BACKEND, + EquProcessor, + GridProcessor, +) class SmokeTest(unittest.TestCase): @@ -41,6 +45,26 @@ def test_grid_processor_numpy_backend(self) -> None: self.assertEqual(out.dtype, np.uint8) self.assertEqual(err.shape, (3,)) + @unittest.skipUnless("openmp" in ALL_BACKEND, "OpenMP backend unavailable") + def test_grid_processor_openmp_matches_numpy(self) -> None: + """OpenMP grid solver should match the NumPy Jacobi update.""" + rng = np.random.default_rng(0) + src = rng.integers(0, 256, size=(24, 24, 3), dtype=np.uint8) + tgt = rng.integers(0, 256, size=(24, 24, 3), dtype=np.uint8) + mask = np.zeros((24, 24), dtype=np.uint8) + mask[2:-2, 2:-2] = (rng.random((20, 20)) > 0.35).astype(np.uint8) * 255 + + proc_np = GridProcessor(backend="numpy", grid_x=1, grid_y=1) + proc_omp = GridProcessor(backend="openmp", n_cpu=4, grid_x=1, grid_y=1) + proc_np.reset(src, mask, tgt.copy(), (0, 0), (0, 0)) + proc_omp.reset(src, mask, tgt.copy(), (0, 0), (0, 0)) + + out_np, err_np = proc_np.step(5) + out_omp, err_omp = proc_omp.step(5) + + np.testing.assert_array_equal(out_omp, out_np) + np.testing.assert_allclose(err_omp, err_np, rtol=1e-5, atol=1e-5) + def test_cli_check_backend(self) -> None: """Verify the CLI can report available backends.""" result = subprocess.run(