From f69d11b362a0d0c70964701408a8310c71b0dbb2 Mon Sep 17 00:00:00 2001 From: Jiayi Weng Date: Fri, 20 Mar 2026 12:42:49 -0700 Subject: [PATCH 1/5] Fix OpenMP grid races and default thread count --- fpie/core/openmp/grid.cc | 39 +++++++++++++++++++++++++++------------ fpie/core/openmp/solver.h | 1 + fpie/process.py | 20 +++++++++++++++++++- tests/test_smoke.py | 31 +++++++++++++++++++++++++++++-- 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/fpie/core/openmp/grid.cc b/fpie/core/openmp/grid.cc index 7862eb01..10a23809 100644 --- a/fpie/core/openmp/grid.cc +++ b/fpie/core/openmp/grid.cc @@ -5,23 +5,26 @@ #include "solver.h" OpenMPGridSolver::OpenMPGridSolver(int grid_x, int grid_y, int n_cpu) - : imgbuf(NULL), tmp(NULL), GridSolver(grid_x, grid_y) { + : imgbuf(NULL), tmp(NULL), next_tgt(NULL), GridSolver(grid_x, grid_y) { omp_set_num_threads(n_cpu); } OpenMPGridSolver::~OpenMPGridSolver() { - if (tmp != NULL) { + if (next_tgt != NULL) { delete[] tmp; + delete[] next_tgt; delete[] imgbuf; } } void OpenMPGridSolver::post_reset() { - if (tmp != NULL) { + if (next_tgt != NULL) { delete[] tmp; + delete[] next_tgt; delete[] imgbuf; } tmp = new float[N * m3]; + next_tgt = new float[N * m3]; imgbuf = new unsigned char[N * m3]; memset(tmp, 0, sizeof(float) * N * m3); } @@ -32,15 +35,18 @@ inline void OpenMPGridSolver::update_equation(int id) { int id1 = off3 - 3; int id2 = off3 + 3; int id3 = off3 + m3; - tgt[off3 + 0] = (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + - tgt[id3 + 0]) / - 4.0; - tgt[off3 + 1] = (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + - tgt[id3 + 1]) / - 4.0; - tgt[off3 + 2] = (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + - tgt[id3 + 2]) / - 4.0; + next_tgt[off3 + 0] = + (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + + tgt[id3 + 0]) / + 4.0; + next_tgt[off3 + 1] = + (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + + tgt[id3 + 1]) / + 4.0; + next_tgt[off3 + 2] = + (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + + tgt[id3 + 2]) / + 4.0; } void OpenMPGridSolver::calc_error() { @@ -91,6 +97,15 @@ OpenMPGridSolver::step(int iteration) { } } } +#pragma omp parallel for schedule(static) + for (int id = 0; id < N * M; ++id) { + if (mask[id]) { + int off3 = id * 3; + tgt[off3 + 0] = next_tgt[off3 + 0]; + tgt[off3 + 1] = next_tgt[off3 + 1]; + tgt[off3 + 2] = next_tgt[off3 + 2]; + } + } } calc_error(); #pragma omp parallel for schedule(static) diff --git a/fpie/core/openmp/solver.h b/fpie/core/openmp/solver.h index 1d03fb2b..e87a9630 100644 --- a/fpie/core/openmp/solver.h +++ b/fpie/core/openmp/solver.h @@ -29,6 +29,7 @@ class OpenMPEquSolver : public EquSolver { class OpenMPGridSolver : public GridSolver { unsigned char* imgbuf; float* tmp; + float* next_tgt; public: OpenMPGridSolver(int grid_x, int grid_y, int n_cpu); diff --git a/fpie/process.py b/fpie/process.py index a5c84401..bb8ca22b 100644 --- a/fpie/process.py +++ b/fpie/process.py @@ -8,7 +8,25 @@ from fpie import np_solver -CPU_COUNT = os.cpu_count() or 1 +DEFAULT_CPU_CAP = 8 + + +def _default_cpu_count() -> int: + """Pick a conservative default for CPU-backed solvers.""" + candidates: list[int] = [] + cpu_count = os.cpu_count() + if cpu_count is not None: + candidates.append(cpu_count) + try: + candidates.append(len(os.sched_getaffinity(0))) + except (AttributeError, OSError): + pass + if not candidates: + return 1 + return max(1, min(min(candidates), DEFAULT_CPU_CAP)) + + +CPU_COUNT = _default_cpu_count() DEFAULT_BACKEND = "numpy" ALL_BACKEND = ["numpy"] MPI: Any | None = None diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 49fed554..7e33ae99 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -6,7 +6,11 @@ import numpy as np -from fpie.process import EquProcessor, GridProcessor +from fpie.process import ( + ALL_BACKEND, + EquProcessor, + GridProcessor, +) class SmokeTest(unittest.TestCase): @@ -41,6 +45,30 @@ def test_grid_processor_numpy_backend(self) -> None: self.assertEqual(out.dtype, np.uint8) self.assertEqual(err.shape, (3,)) + @unittest.skipUnless("openmp" in ALL_BACKEND, "OpenMP backend unavailable") + def test_grid_processor_openmp_matches_numpy(self) -> None: + """OpenMP grid solver should match the NumPy Jacobi update.""" + rng = np.random.default_rng(0) + src = rng.integers(0, 256, size=(24, 24, 3), dtype=np.uint8) + tgt = rng.integers(0, 256, size=(24, 24, 3), dtype=np.uint8) + mask = np.zeros((24, 24), dtype=np.uint8) + mask[2:-2, 2:-2] = ( + rng.random((20, 20)) > 0.35 + ).astype(np.uint8) * 255 + + proc_np = GridProcessor(backend="numpy", grid_x=1, grid_y=1) + proc_omp = GridProcessor( + backend="openmp", n_cpu=4, grid_x=1, grid_y=1 + ) + proc_np.reset(src, mask, tgt.copy(), (0, 0), (0, 0)) + proc_omp.reset(src, mask, tgt.copy(), (0, 0), (0, 0)) + + out_np, err_np = proc_np.step(5) + out_omp, err_omp = proc_omp.step(5) + + np.testing.assert_array_equal(out_omp, out_np) + np.testing.assert_allclose(err_omp, err_np, rtol=1e-5, atol=1e-5) + def test_cli_check_backend(self) -> None: """Verify the CLI can report available backends.""" result = subprocess.run( @@ -61,6 +89,5 @@ def test_cli_check_backend(self) -> None: self.assertIn("numpy", result.stdout) - if __name__ == "__main__": unittest.main() From 440c9fef6defcbba9c333af3153771fed47af1a7 Mon Sep 17 00:00:00 2001 From: Jiayi Weng Date: Fri, 20 Mar 2026 12:44:33 -0700 Subject: [PATCH 2/5] Format smoke test --- tests/test_smoke.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 7e33ae99..5fc9cf19 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -52,14 +52,10 @@ def test_grid_processor_openmp_matches_numpy(self) -> None: src = rng.integers(0, 256, size=(24, 24, 3), dtype=np.uint8) tgt = rng.integers(0, 256, size=(24, 24, 3), dtype=np.uint8) mask = np.zeros((24, 24), dtype=np.uint8) - mask[2:-2, 2:-2] = ( - rng.random((20, 20)) > 0.35 - ).astype(np.uint8) * 255 + mask[2:-2, 2:-2] = (rng.random((20, 20)) > 0.35).astype(np.uint8) * 255 proc_np = GridProcessor(backend="numpy", grid_x=1, grid_y=1) - proc_omp = GridProcessor( - backend="openmp", n_cpu=4, grid_x=1, grid_y=1 - ) + proc_omp = GridProcessor(backend="openmp", n_cpu=4, grid_x=1, grid_y=1) proc_np.reset(src, mask, tgt.copy(), (0, 0), (0, 0)) proc_omp.reset(src, mask, tgt.copy(), (0, 0), (0, 0)) @@ -89,5 +85,6 @@ def test_cli_check_backend(self) -> None: self.assertIn("numpy", result.stdout) + if __name__ == "__main__": unittest.main() From 3eda04f5f1cc2a3611dc8a96d96341e086a4db46 Mon Sep 17 00:00:00 2001 From: Jiayi Weng Date: Fri, 20 Mar 2026 12:46:15 -0700 Subject: [PATCH 3/5] Format OpenMP grid solver --- fpie/core/openmp/grid.cc | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fpie/core/openmp/grid.cc b/fpie/core/openmp/grid.cc index 10a23809..ddbbe442 100644 --- a/fpie/core/openmp/grid.cc +++ b/fpie/core/openmp/grid.cc @@ -35,18 +35,15 @@ inline void OpenMPGridSolver::update_equation(int id) { int id1 = off3 - 3; int id2 = off3 + 3; int id3 = off3 + m3; - next_tgt[off3 + 0] = - (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + - tgt[id3 + 0]) / - 4.0; - next_tgt[off3 + 1] = - (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + - tgt[id3 + 1]) / - 4.0; - next_tgt[off3 + 2] = - (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + - tgt[id3 + 2]) / - 4.0; + next_tgt[off3 + 0] = (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + + tgt[id2 + 0] + tgt[id3 + 0]) / + 4.0; + next_tgt[off3 + 1] = (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + + tgt[id2 + 1] + tgt[id3 + 1]) / + 4.0; + next_tgt[off3 + 2] = (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + + tgt[id2 + 2] + tgt[id3 + 2]) / + 4.0; } void OpenMPGridSolver::calc_error() { From 79f000a9e22e344497b7633eabd6262cf4446cd7 Mon Sep 17 00:00:00 2001 From: Jiayi Weng Date: Fri, 20 Mar 2026 12:50:26 -0700 Subject: [PATCH 4/5] Reuse OpenMP grid scratch buffer --- fpie/core/openmp/grid.cc | 36 ++++++++++++++++++------------------ fpie/core/openmp/solver.h | 1 - 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/fpie/core/openmp/grid.cc b/fpie/core/openmp/grid.cc index ddbbe442..646ddc7c 100644 --- a/fpie/core/openmp/grid.cc +++ b/fpie/core/openmp/grid.cc @@ -5,26 +5,23 @@ #include "solver.h" OpenMPGridSolver::OpenMPGridSolver(int grid_x, int grid_y, int n_cpu) - : imgbuf(NULL), tmp(NULL), next_tgt(NULL), GridSolver(grid_x, grid_y) { + : imgbuf(NULL), tmp(NULL), GridSolver(grid_x, grid_y) { omp_set_num_threads(n_cpu); } OpenMPGridSolver::~OpenMPGridSolver() { - if (next_tgt != NULL) { + if (tmp != NULL) { delete[] tmp; - delete[] next_tgt; delete[] imgbuf; } } void OpenMPGridSolver::post_reset() { - if (next_tgt != NULL) { + if (tmp != NULL) { delete[] tmp; - delete[] next_tgt; delete[] imgbuf; } tmp = new float[N * m3]; - next_tgt = new float[N * m3]; imgbuf = new unsigned char[N * m3]; memset(tmp, 0, sizeof(float) * N * m3); } @@ -35,15 +32,18 @@ inline void OpenMPGridSolver::update_equation(int id) { int id1 = off3 - 3; int id2 = off3 + 3; int id3 = off3 + m3; - next_tgt[off3 + 0] = (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + - tgt[id2 + 0] + tgt[id3 + 0]) / - 4.0; - next_tgt[off3 + 1] = (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + - tgt[id2 + 1] + tgt[id3 + 1]) / - 4.0; - next_tgt[off3 + 2] = (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + - tgt[id2 + 2] + tgt[id3 + 2]) / - 4.0; + tmp[off3 + 0] = + (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + + tgt[id3 + 0]) / + 4.0; + tmp[off3 + 1] = + (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + + tgt[id3 + 1]) / + 4.0; + tmp[off3 + 2] = + (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + + tgt[id3 + 2]) / + 4.0; } void OpenMPGridSolver::calc_error() { @@ -98,9 +98,9 @@ OpenMPGridSolver::step(int iteration) { for (int id = 0; id < N * M; ++id) { if (mask[id]) { int off3 = id * 3; - tgt[off3 + 0] = next_tgt[off3 + 0]; - tgt[off3 + 1] = next_tgt[off3 + 1]; - tgt[off3 + 2] = next_tgt[off3 + 2]; + tgt[off3 + 0] = tmp[off3 + 0]; + tgt[off3 + 1] = tmp[off3 + 1]; + tgt[off3 + 2] = tmp[off3 + 2]; } } } diff --git a/fpie/core/openmp/solver.h b/fpie/core/openmp/solver.h index e87a9630..1d03fb2b 100644 --- a/fpie/core/openmp/solver.h +++ b/fpie/core/openmp/solver.h @@ -29,7 +29,6 @@ class OpenMPEquSolver : public EquSolver { class OpenMPGridSolver : public GridSolver { unsigned char* imgbuf; float* tmp; - float* next_tgt; public: OpenMPGridSolver(int grid_x, int grid_y, int n_cpu); From f212e7f3e8b0909011b0b5bbced9c690e3f073a1 Mon Sep 17 00:00:00 2001 From: Jiayi Weng Date: Fri, 20 Mar 2026 12:52:32 -0700 Subject: [PATCH 5/5] Format OpenMP grid scratch reuse --- fpie/core/openmp/grid.cc | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fpie/core/openmp/grid.cc b/fpie/core/openmp/grid.cc index 646ddc7c..a0ed4122 100644 --- a/fpie/core/openmp/grid.cc +++ b/fpie/core/openmp/grid.cc @@ -32,18 +32,15 @@ inline void OpenMPGridSolver::update_equation(int id) { int id1 = off3 - 3; int id2 = off3 + 3; int id3 = off3 + m3; - tmp[off3 + 0] = - (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + - tgt[id3 + 0]) / - 4.0; - tmp[off3 + 1] = - (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + - tgt[id3 + 1]) / - 4.0; - tmp[off3 + 2] = - (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + - tgt[id3 + 2]) / - 4.0; + tmp[off3 + 0] = (grad[off3 + 0] + tgt[id0 + 0] + tgt[id1 + 0] + tgt[id2 + 0] + + tgt[id3 + 0]) / + 4.0; + tmp[off3 + 1] = (grad[off3 + 1] + tgt[id0 + 1] + tgt[id1 + 1] + tgt[id2 + 1] + + tgt[id3 + 1]) / + 4.0; + tmp[off3 + 2] = (grad[off3 + 2] + tgt[id0 + 2] + tgt[id1 + 2] + tgt[id2 + 2] + + tgt[id3 + 2]) / + 4.0; } void OpenMPGridSolver::calc_error() {