Removed array reallocation

danieljvickers · danieljvickers · commit 6faf89662e4c · 2026-05-30T13:01:25.000-04:00
diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp
@@ -192,21 +192,6 @@ contains
             #:endfor
         end do
 
-        ! Non-rank-0 processes may have patch_ib sized to num_ib_patches_max_namelist while rank 0 grew it
-        ! for particle beds. Grow here before receiving the broadcast entries.
-        if (proc_rank /= 0 .and. num_ibs > size(patch_ib)) then
-            block
-                type(ib_patch_parameters), allocatable :: tmp(:)
-                integer                                :: n
-                n = size(patch_ib)
-                allocate (tmp(n))
-                tmp(1:n) = patch_ib(1:n)
-                deallocate (patch_ib)
-                allocate (patch_ib(num_ib_patches_max))
-                patch_ib(1:n) = tmp
-            end block
-        end if
-
         do i = 1, num_ibs
             #:for VAR in [ 'radius', 'length_x', 'length_y', 'length_z', &
                 & 'x_centroid', 'y_centroid', 'z_centroid', 'c', 'm', 'p', 't', 'theta', 'slip', 'mass', &
diff --git a/src/simulation/m_particle_bed.fpp b/src/simulation/m_particle_bed.fpp
@@ -1,14 +1,15 @@
 !>
 !! @file m_particle_bed.fpp
 !! @brief Generates particle beds: converts particle_bed specifications into
-!!        individual sphere/circle patch_ib entries before MPI broadcast.
+!!        individual sphere/circle particle_bed_ibs entries before reduction.
 
 !> @brief Generates particle beds by converting particle_bed patch specifications into individual immersed boundary patches before
-!! MPI broadcast.
+!! domain reduction. Each rank runs the same deterministic placement so no MPI broadcast of particle positions is needed.
 module m_particle_bed
 
     use m_global_parameters
     use m_constants
+    use m_mpi_common
 
     implicit none
 
@@ -18,32 +19,45 @@ module m_particle_bed
 
 contains
 
-    !> Generate all particle beds and append the resulting particles to patch_ib. Called on rank 0 only, before
-    !! s_mpi_bcast_user_inputs. Uses a spatial hash grid (cell size = min_dist) so each candidate requires only 3^dim distance
-    !! checks on average instead of O(n).
-    impure subroutine s_generate_particle_beds()
-
-        integer               :: b, ib_idx, geom
-        integer               :: n_placed, n_total_placed
-        integer(8)            :: n_attempts, max_attempts
-        real(wp)              :: xmin, xmax, ymin, ymax, zmin, zmax, min_dist
-        real(wp)              :: rx, ry, rz, dist
-        real(wp)              :: t_start, t_end
-        integer               :: seed
-        logical               :: overlaps
-        real(wp), allocatable :: placed(:,:)
+    !> Generate all particle beds and fill particle_bed_ibs. Called on all ranks before s_reduce_ib_patch_array. Uses a spatial hash
+    !! grid (cell size = min_dist) so each candidate requires only 3^dim distance checks on average instead of O(n). The placement
+    !! is fully deterministic given the per-bed seed, so all ranks produce an identical result without MPI.
+    impure subroutine s_generate_particle_beds(particle_bed_ibs)
+
+        type(ib_patch_parameters), allocatable, intent(out), dimension(:) :: particle_bed_ibs
+        integer                                                           :: b, ib_idx, geom
+        integer                                                           :: n_placed, n_total_placed, n_total_particles
+        integer(8)                                                        :: n_attempts, max_attempts
+        real(wp)                                                          :: xmin, xmax, ymin, ymax, zmin, zmax, min_dist
+        real(wp)                                                          :: rx, ry, rz, dist
+        real(wp)                                                          :: t_start, t_end
+        integer                                                           :: seed
+        logical                                                           :: overlaps
+        real(wp), allocatable                                             :: placed(:,:)
 
         ! Spatial hash grid
         integer              :: hash_size, slot
         integer              :: bx, by, bz, nbx, nby, nbz
         integer              :: dx_b, dy_b, dz_b, dz_lo, dz_hi, j
         integer, allocatable :: hash_head(:), chain_next(:)
 
-        if (num_particle_beds == 0) return
+        if (num_particle_beds == 0) then
+            allocate (particle_bed_ibs(0))
+            return
+        end if
 
         call cpu_time(t_start)
         n_total_placed = 0
 
+        ! Pre-count total particles across all beds so particle_bed_ibs can be allocated exactly once.
+        n_total_particles = 0
+        do b = 1, num_particle_beds
+            n_total_particles = n_total_particles + particle_bed(b)%num_particles
+        end do
+        allocate (particle_bed_ibs(n_total_particles))
+
+        ib_idx = 0  ! index into particle_bed_ibs
+
         do b = 1, num_particle_beds
             xmin = particle_bed(b)%x_centroid - 0.5_wp*particle_bed(b)%length_x
             xmax = particle_bed(b)%x_centroid + 0.5_wp*particle_bed(b)%length_x
@@ -133,32 +147,41 @@ contains
                     chain_next(n_placed) = hash_head(slot)
                     hash_head(slot) = n_placed
 
-                    num_ibs = num_ibs + 1
-                    ib_idx = num_ibs
-
-                    patch_ib(ib_idx)%gbl_patch_id = ib_idx
-                    patch_ib(ib_idx)%geometry = geom
-                    patch_ib(ib_idx)%x_centroid = rx
-                    patch_ib(ib_idx)%y_centroid = ry
-                    patch_ib(ib_idx)%z_centroid = rz
-                    patch_ib(ib_idx)%angles(1) = 0._wp
-                    patch_ib(ib_idx)%angles(2) = 0._wp
-                    patch_ib(ib_idx)%angles(3) = 0._wp
-                    patch_ib(ib_idx)%vel(1) = 0._wp
-                    patch_ib(ib_idx)%vel(2) = 0._wp
-                    patch_ib(ib_idx)%vel(3) = 0._wp
-                    patch_ib(ib_idx)%angular_vel(1) = 0._wp
-                    patch_ib(ib_idx)%angular_vel(2) = 0._wp
-                    patch_ib(ib_idx)%angular_vel(3) = 0._wp
-                    patch_ib(ib_idx)%radius = particle_bed(b)%radius
-                    patch_ib(ib_idx)%mass = particle_bed(b)%mass
-                    patch_ib(ib_idx)%moving_ibm = particle_bed(b)%moving_ibm
+                    ib_idx = ib_idx + 1
+
+                    ! gbl_patch_id is relative within particle_bed_ibs here; s_reduce_ib_patch_array adjusts to global indexing.
+                    particle_bed_ibs(ib_idx)%gbl_patch_id = ib_idx
+                    particle_bed_ibs(ib_idx)%geometry = geom
+                    particle_bed_ibs(ib_idx)%x_centroid = rx
+                    particle_bed_ibs(ib_idx)%y_centroid = ry
+                    particle_bed_ibs(ib_idx)%z_centroid = rz
+                    particle_bed_ibs(ib_idx)%step_x_centroid = rx
+                    particle_bed_ibs(ib_idx)%step_y_centroid = ry
+                    particle_bed_ibs(ib_idx)%step_z_centroid = rz
+                    particle_bed_ibs(ib_idx)%angles(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%step_angles(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%vel(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%step_vel(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%angular_vel(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%step_angular_vel(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%force(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%torque(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%centroid_offset(:) = 0._wp
+                    particle_bed_ibs(ib_idx)%rotation_matrix = 0._wp
+                    particle_bed_ibs(ib_idx)%rotation_matrix(1, 1) = 1._wp
+                    particle_bed_ibs(ib_idx)%rotation_matrix(2, 2) = 1._wp
+                    particle_bed_ibs(ib_idx)%rotation_matrix(3, 3) = 1._wp
+                    particle_bed_ibs(ib_idx)%rotation_matrix_inverse = particle_bed_ibs(ib_idx)%rotation_matrix
+                    particle_bed_ibs(ib_idx)%radius = particle_bed(b)%radius
+                    particle_bed_ibs(ib_idx)%mass = particle_bed(b)%mass
+                    particle_bed_ibs(ib_idx)%moment = dflt_real
+                    particle_bed_ibs(ib_idx)%moving_ibm = particle_bed(b)%moving_ibm
+                    particle_bed_ibs(ib_idx)%slip = .false.
                 end if
             end do
 
             if (n_placed < particle_bed(b)%num_particles) then
-                print *, "Error :: Failed to place all IBs ib particle bed"
-                stop
+                call s_mpi_abort("Error :: Failed to place all particles in particle bed")
             end if
 
             n_total_placed = n_total_placed + n_placed
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
@@ -887,16 +887,22 @@ contains
 
         if (model_eqns == 3) call s_initialize_internal_energy_equations(q_cons_ts(1)%vf)
         if (ib) then
-            if (cfl_dt .and. n_start > 0) then
-                call s_read_ib_restart_data(n_start)
-            else if (t_step_start > 0) then
-                call s_read_ib_restart_data(t_step_start)
-            else
-                ! particle bed generated on first time step
-                call s_generate_particle_beds()
-            end if
-            call s_instantiate_STL_models()
-            call s_reduce_ib_patch_array()
+            block
+                type(ib_patch_parameters), allocatable :: particle_bed_ibs(:)
+                
+                if (cfl_dt .and. n_start > 0) then
+                    call s_read_ib_restart_data(n_start)
+                    allocate (particle_bed_ibs(0))
+                else if (t_step_start > 0) then
+                    call s_read_ib_restart_data(t_step_start)
+                    allocate (particle_bed_ibs(0))
+                else
+                    call s_generate_particle_beds(particle_bed_ibs)
+                end if
+                call s_instantiate_STL_models()
+                call s_reduce_ib_patch_array(particle_bed_ibs)
+                deallocate (particle_bed_ibs)
+            end block
             call s_ibm_setup()
             if (t_step_start == 0 .or. (cfl_dt .and. n_start == 0)) then
                 call s_write_ib_data_file(0)
@@ -1169,80 +1175,103 @@ contains
 
     end subroutine s_read_ib_restart_data
 
-    !> @brief takes the patch_ib struct array that contains all global IB patches and reduces to only contain patches that are in
-    !! the local computational domain.
-    subroutine s_reduce_ib_patch_array()
+    !> @brief Merges patch_ib (namelist patches, fixed at num_ib_patches_max_namelist) with particle_bed_ibs (CPU-only, exact size)
+    !! and reduces to only the patches in or near the local computational domain. patch_ib is never reallocated; the local subset is
+    !! written in-place from the front. particle_bed_ibs is owned by the caller and freed there after this returns.
+    subroutine s_reduce_ib_patch_array(particle_bed_ibs)
 
-        type(ib_patch_parameters), allocatable :: patch_ib_gbl(:)
-        real(wp), dimension(3)                 :: centroid
-        integer                                :: i, j
-        integer                                :: num_aware_ibs
-        logical                                :: is_in_neighborhood, is_local
+        type(ib_patch_parameters), intent(in), dimension(:) :: particle_bed_ibs
+        real(wp), dimension(3)                              :: centroid
+        integer                                             :: i
+        integer                                             :: num_namelist_ibs, num_bed_ibs
 
-        ! do all set up for moving immersed boundaries
+        num_namelist_ibs = num_ibs
+        num_bed_ibs = size(particle_bed_ibs)
 
+        ! Check for moving IBs across both namelist and particle bed patches.
         moving_immersed_boundary_flag = .false.
-        do i = 1, num_ibs
+        do i = 1, num_namelist_ibs
             if (patch_ib(i)%moving_ibm /= 0) then
                 moving_immersed_boundary_flag = .true.
                 exit
             end if
         end do
+        if (.not. moving_immersed_boundary_flag) then
+            do i = 1, num_bed_ibs
+                if (particle_bed_ibs(i)%moving_ibm /= 0) then
+                    moving_immersed_boundary_flag = .true.
+                    exit
+                end if
+            end do
+        end if
 
-        allocate (patch_ib_gbl(num_ibs))
-        patch_ib_gbl(1:num_ibs) = patch_ib(1:num_ibs)
-        call get_neighbor_bounds()  ! make sure the bounds of the neighbors are correctly set up
-        call s_compute_ib_neighbor_ranks()  ! build lookup of all neighbor MPI ranks
-
-        num_gbl_ibs = num_ibs
-        num_local_ibs = num_ibs
-        @:PROHIBIT(num_local_ibs > num_local_ibs_max, &
-                   & "Too many IBs on a single processor rank. Modify case file or increase limit of num_local_ibs_max to resolve.")
-        do i = 1, num_local_ibs_max
-            local_ib_patch_ids(i) = i
-        end do
+        call get_neighbor_bounds()
+        call s_compute_ib_neighbor_ranks()
 
-        $:GPU_EXIT_DATA(delete='[patch_ib]')
-        deallocate (patch_ib)
+        num_gbl_ibs = num_namelist_ibs + num_bed_ibs
 
 #ifdef MFC_MPI
         if (num_procs == 1) then
-            ! single-rank: every patch is local; allocate to exact size and copy
-            allocate (patch_ib(num_gbl_ibs))
-            patch_ib(1:num_gbl_ibs) = patch_ib_gbl(1:num_gbl_ibs)
-            deallocate (patch_ib_gbl)
+            ! single-rank: all patches are local; append particle bed entries directly into patch_ib.
+            @:PROHIBIT(num_gbl_ibs > num_ib_patches_max_namelist, &
+                       & "Total IB count exceeds patch_ib capacity. Increase num_ib_patches_max_namelist.")
+            do i = 1, num_bed_ibs
+                patch_ib(num_namelist_ibs + i) = particle_bed_ibs(i)
+                patch_ib(num_namelist_ibs + i)%gbl_patch_id = num_namelist_ibs + i
+            end do
+            num_ibs = num_gbl_ibs
+            num_local_ibs = num_gbl_ibs
+            do i = 1, num_gbl_ibs
+                local_ib_patch_ids(i) = i
+            end do
         else
-            ! multi-rank: carve out the local neighbourhood subset
-            num_aware_ibs = min(num_local_ibs_max*(2*ib_neighborhood_radius + 1)**num_dims, num_ib_patches_max)
-            allocate (patch_ib(num_aware_ibs))
-
-            num_local_ibs = 0
+            ! multi-rank: compact namelist patches in-place (write_idx <= read_idx, no aliasing), then append local particle beds.
             num_ibs = 0
-            do i = 1, num_gbl_ibs
-                ! catch the edge case where th collision lies just outside the computational domain
-                is_in_neighborhood = .true.
-                is_local = .true.
-                centroid = [patch_ib_gbl(i)%x_centroid, patch_ib_gbl(i)%y_centroid, 0._wp]
-                if (num_dims == 3) centroid(3) = patch_ib_gbl(i)%z_centroid
-
+            num_local_ibs = 0
+            do i = 1, num_namelist_ibs
+                centroid = [patch_ib(i)%x_centroid, patch_ib(i)%y_centroid, 0._wp]
+                if (num_dims == 3) centroid(3) = patch_ib(i)%z_centroid
                 if (f_neighborhood_ranks_own_location(centroid)) then
                     num_ibs = num_ibs + 1
-                    patch_ib(num_ibs) = patch_ib_gbl(i)
+                    patch_ib(num_ibs) = patch_ib(i)
                     patch_ib(num_ibs)%gbl_patch_id = i
                     if (f_local_rank_owns_location(centroid)) then
                         num_local_ibs = num_local_ibs + 1
                         local_ib_patch_ids(num_local_ibs) = num_ibs
                     end if
                 end if
             end do
-
-            deallocate (patch_ib_gbl)
+            do i = 1, num_bed_ibs
+                centroid = [particle_bed_ibs(i)%x_centroid, particle_bed_ibs(i)%y_centroid, 0._wp]
+                if (num_dims == 3) centroid(3) = particle_bed_ibs(i)%z_centroid
+                if (f_neighborhood_ranks_own_location(centroid)) then
+                    num_ibs = num_ibs + 1
+                    @:PROHIBIT(num_ibs > num_ib_patches_max_namelist, &
+                               & "Local IB count exceeds patch_ib capacity. Increase num_ib_patches_max_namelist.")
+                    patch_ib(num_ibs) = particle_bed_ibs(i)
+                    patch_ib(num_ibs)%gbl_patch_id = num_namelist_ibs + i
+                    if (f_local_rank_owns_location(centroid)) then
+                        num_local_ibs = num_local_ibs + 1
+                        local_ib_patch_ids(num_local_ibs) = num_ibs
+                    end if
+                end if
+            end do
+            @:PROHIBIT(num_local_ibs > num_local_ibs_max, &
+                       & "Too many IBs on a single processor rank. Modify case file or increase limit of num_local_ibs_max to resolve.")
         end if
 #else
-        ! no-MPI: every patch is local; allocate to exact size and copy
-        allocate (patch_ib(num_gbl_ibs))
-        patch_ib(1:num_gbl_ibs) = patch_ib_gbl(1:num_gbl_ibs)
-        deallocate (patch_ib_gbl)
+        ! no-MPI: all patches are local; append particle bed entries directly into patch_ib.
+        @:PROHIBIT(num_gbl_ibs > num_ib_patches_max_namelist, &
+                   & "Total IB count exceeds patch_ib capacity. Increase num_ib_patches_max_namelist.")
+        do i = 1, num_bed_ibs
+            patch_ib(num_namelist_ibs + i) = particle_bed_ibs(i)
+            patch_ib(num_namelist_ibs + i)%gbl_patch_id = num_namelist_ibs + i
+        end do
+        num_ibs = num_gbl_ibs
+        num_local_ibs = num_gbl_ibs
+        do i = 1, num_gbl_ibs
+            local_ib_patch_ids(i) = i
+        end do
 #endif
 
         $:GPU_ENTER_DATA(create='[patch_ib]')