Skip to content

Commit 6faf896

Browse files
Removed array reallocation
1 parent cf73406 commit 6faf896

3 files changed

Lines changed: 148 additions & 111 deletions

File tree

src/simulation/m_mpi_proxy.fpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -192,21 +192,6 @@ contains
192192
#:endfor
193193
end do
194194

195-
! Non-rank-0 processes may have patch_ib sized to num_ib_patches_max_namelist while rank 0 grew it
196-
! for particle beds. Grow here before receiving the broadcast entries.
197-
if (proc_rank /= 0 .and. num_ibs > size(patch_ib)) then
198-
block
199-
type(ib_patch_parameters), allocatable :: tmp(:)
200-
integer :: n
201-
n = size(patch_ib)
202-
allocate (tmp(n))
203-
tmp(1:n) = patch_ib(1:n)
204-
deallocate (patch_ib)
205-
allocate (patch_ib(num_ib_patches_max))
206-
patch_ib(1:n) = tmp
207-
end block
208-
end if
209-
210195
do i = 1, num_ibs
211196
#:for VAR in [ 'radius', 'length_x', 'length_y', 'length_z', &
212197
& 'x_centroid', 'y_centroid', 'z_centroid', 'c', 'm', 'p', 't', 'theta', 'slip', 'mass', &

src/simulation/m_particle_bed.fpp

Lines changed: 62 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
!>
22
!! @file m_particle_bed.fpp
33
!! @brief Generates particle beds: converts particle_bed specifications into
4-
!! individual sphere/circle patch_ib entries before MPI broadcast.
4+
!! individual sphere/circle particle_bed_ibs entries before reduction.
55

66
!> @brief Generates particle beds by converting particle_bed patch specifications into individual immersed boundary patches before
7-
!! MPI broadcast.
7+
!! domain reduction. Each rank runs the same deterministic placement so no MPI broadcast of particle positions is needed.
88
module m_particle_bed
99

1010
use m_global_parameters
1111
use m_constants
12+
use m_mpi_common
1213

1314
implicit none
1415

@@ -18,32 +19,45 @@ module m_particle_bed
1819

1920
contains
2021

21-
!> Generate all particle beds and append the resulting particles to patch_ib. Called on rank 0 only, before
22-
!! s_mpi_bcast_user_inputs. Uses a spatial hash grid (cell size = min_dist) so each candidate requires only 3^dim distance
23-
!! checks on average instead of O(n).
24-
impure subroutine s_generate_particle_beds()
25-
26-
integer :: b, ib_idx, geom
27-
integer :: n_placed, n_total_placed
28-
integer(8) :: n_attempts, max_attempts
29-
real(wp) :: xmin, xmax, ymin, ymax, zmin, zmax, min_dist
30-
real(wp) :: rx, ry, rz, dist
31-
real(wp) :: t_start, t_end
32-
integer :: seed
33-
logical :: overlaps
34-
real(wp), allocatable :: placed(:,:)
22+
!> Generate all particle beds and fill particle_bed_ibs. Called on all ranks before s_reduce_ib_patch_array. Uses a spatial hash
23+
!! grid (cell size = min_dist) so each candidate requires only 3^dim distance checks on average instead of O(n). The placement
24+
!! is fully deterministic given the per-bed seed, so all ranks produce an identical result without MPI.
25+
impure subroutine s_generate_particle_beds(particle_bed_ibs)
26+
27+
type(ib_patch_parameters), allocatable, intent(out), dimension(:) :: particle_bed_ibs
28+
integer :: b, ib_idx, geom
29+
integer :: n_placed, n_total_placed, n_total_particles
30+
integer(8) :: n_attempts, max_attempts
31+
real(wp) :: xmin, xmax, ymin, ymax, zmin, zmax, min_dist
32+
real(wp) :: rx, ry, rz, dist
33+
real(wp) :: t_start, t_end
34+
integer :: seed
35+
logical :: overlaps
36+
real(wp), allocatable :: placed(:,:)
3537

3638
! Spatial hash grid
3739
integer :: hash_size, slot
3840
integer :: bx, by, bz, nbx, nby, nbz
3941
integer :: dx_b, dy_b, dz_b, dz_lo, dz_hi, j
4042
integer, allocatable :: hash_head(:), chain_next(:)
4143

42-
if (num_particle_beds == 0) return
44+
if (num_particle_beds == 0) then
45+
allocate (particle_bed_ibs(0))
46+
return
47+
end if
4348

4449
call cpu_time(t_start)
4550
n_total_placed = 0
4651

52+
! Pre-count total particles across all beds so particle_bed_ibs can be allocated exactly once.
53+
n_total_particles = 0
54+
do b = 1, num_particle_beds
55+
n_total_particles = n_total_particles + particle_bed(b)%num_particles
56+
end do
57+
allocate (particle_bed_ibs(n_total_particles))
58+
59+
ib_idx = 0 ! index into particle_bed_ibs
60+
4761
do b = 1, num_particle_beds
4862
xmin = particle_bed(b)%x_centroid - 0.5_wp*particle_bed(b)%length_x
4963
xmax = particle_bed(b)%x_centroid + 0.5_wp*particle_bed(b)%length_x
@@ -133,32 +147,41 @@ contains
133147
chain_next(n_placed) = hash_head(slot)
134148
hash_head(slot) = n_placed
135149

136-
num_ibs = num_ibs + 1
137-
ib_idx = num_ibs
138-
139-
patch_ib(ib_idx)%gbl_patch_id = ib_idx
140-
patch_ib(ib_idx)%geometry = geom
141-
patch_ib(ib_idx)%x_centroid = rx
142-
patch_ib(ib_idx)%y_centroid = ry
143-
patch_ib(ib_idx)%z_centroid = rz
144-
patch_ib(ib_idx)%angles(1) = 0._wp
145-
patch_ib(ib_idx)%angles(2) = 0._wp
146-
patch_ib(ib_idx)%angles(3) = 0._wp
147-
patch_ib(ib_idx)%vel(1) = 0._wp
148-
patch_ib(ib_idx)%vel(2) = 0._wp
149-
patch_ib(ib_idx)%vel(3) = 0._wp
150-
patch_ib(ib_idx)%angular_vel(1) = 0._wp
151-
patch_ib(ib_idx)%angular_vel(2) = 0._wp
152-
patch_ib(ib_idx)%angular_vel(3) = 0._wp
153-
patch_ib(ib_idx)%radius = particle_bed(b)%radius
154-
patch_ib(ib_idx)%mass = particle_bed(b)%mass
155-
patch_ib(ib_idx)%moving_ibm = particle_bed(b)%moving_ibm
150+
ib_idx = ib_idx + 1
151+
152+
! gbl_patch_id is relative within particle_bed_ibs here; s_reduce_ib_patch_array adjusts to global indexing.
153+
particle_bed_ibs(ib_idx)%gbl_patch_id = ib_idx
154+
particle_bed_ibs(ib_idx)%geometry = geom
155+
particle_bed_ibs(ib_idx)%x_centroid = rx
156+
particle_bed_ibs(ib_idx)%y_centroid = ry
157+
particle_bed_ibs(ib_idx)%z_centroid = rz
158+
particle_bed_ibs(ib_idx)%step_x_centroid = rx
159+
particle_bed_ibs(ib_idx)%step_y_centroid = ry
160+
particle_bed_ibs(ib_idx)%step_z_centroid = rz
161+
particle_bed_ibs(ib_idx)%angles(:) = 0._wp
162+
particle_bed_ibs(ib_idx)%step_angles(:) = 0._wp
163+
particle_bed_ibs(ib_idx)%vel(:) = 0._wp
164+
particle_bed_ibs(ib_idx)%step_vel(:) = 0._wp
165+
particle_bed_ibs(ib_idx)%angular_vel(:) = 0._wp
166+
particle_bed_ibs(ib_idx)%step_angular_vel(:) = 0._wp
167+
particle_bed_ibs(ib_idx)%force(:) = 0._wp
168+
particle_bed_ibs(ib_idx)%torque(:) = 0._wp
169+
particle_bed_ibs(ib_idx)%centroid_offset(:) = 0._wp
170+
particle_bed_ibs(ib_idx)%rotation_matrix = 0._wp
171+
particle_bed_ibs(ib_idx)%rotation_matrix(1, 1) = 1._wp
172+
particle_bed_ibs(ib_idx)%rotation_matrix(2, 2) = 1._wp
173+
particle_bed_ibs(ib_idx)%rotation_matrix(3, 3) = 1._wp
174+
particle_bed_ibs(ib_idx)%rotation_matrix_inverse = particle_bed_ibs(ib_idx)%rotation_matrix
175+
particle_bed_ibs(ib_idx)%radius = particle_bed(b)%radius
176+
particle_bed_ibs(ib_idx)%mass = particle_bed(b)%mass
177+
particle_bed_ibs(ib_idx)%moment = dflt_real
178+
particle_bed_ibs(ib_idx)%moving_ibm = particle_bed(b)%moving_ibm
179+
particle_bed_ibs(ib_idx)%slip = .false.
156180
end if
157181
end do
158182

159183
if (n_placed < particle_bed(b)%num_particles) then
160-
print *, "Error :: Failed to place all IBs ib particle bed"
161-
stop
184+
call s_mpi_abort("Error :: Failed to place all particles in particle bed")
162185
end if
163186

164187
n_total_placed = n_total_placed + n_placed

src/simulation/m_start_up.fpp

Lines changed: 86 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -887,16 +887,22 @@ contains
887887

888888
if (model_eqns == 3) call s_initialize_internal_energy_equations(q_cons_ts(1)%vf)
889889
if (ib) then
890-
if (cfl_dt .and. n_start > 0) then
891-
call s_read_ib_restart_data(n_start)
892-
else if (t_step_start > 0) then
893-
call s_read_ib_restart_data(t_step_start)
894-
else
895-
! particle bed generated on first time step
896-
call s_generate_particle_beds()
897-
end if
898-
call s_instantiate_STL_models()
899-
call s_reduce_ib_patch_array()
890+
block
891+
type(ib_patch_parameters), allocatable :: particle_bed_ibs(:)
892+
893+
if (cfl_dt .and. n_start > 0) then
894+
call s_read_ib_restart_data(n_start)
895+
allocate (particle_bed_ibs(0))
896+
else if (t_step_start > 0) then
897+
call s_read_ib_restart_data(t_step_start)
898+
allocate (particle_bed_ibs(0))
899+
else
900+
call s_generate_particle_beds(particle_bed_ibs)
901+
end if
902+
call s_instantiate_STL_models()
903+
call s_reduce_ib_patch_array(particle_bed_ibs)
904+
deallocate (particle_bed_ibs)
905+
end block
900906
call s_ibm_setup()
901907
if (t_step_start == 0 .or. (cfl_dt .and. n_start == 0)) then
902908
call s_write_ib_data_file(0)
@@ -1169,80 +1175,103 @@ contains
11691175

11701176
end subroutine s_read_ib_restart_data
11711177

1172-
!> @brief takes the patch_ib struct array that contains all global IB patches and reduces to only contain patches that are in
1173-
!! the local computational domain.
1174-
subroutine s_reduce_ib_patch_array()
1178+
!> @brief Merges patch_ib (namelist patches, fixed at num_ib_patches_max_namelist) with particle_bed_ibs (CPU-only, exact size)
1179+
!! and reduces to only the patches in or near the local computational domain. patch_ib is never reallocated; the local subset is
1180+
!! written in-place from the front. particle_bed_ibs is owned by the caller and freed there after this returns.
1181+
subroutine s_reduce_ib_patch_array(particle_bed_ibs)
11751182

1176-
type(ib_patch_parameters), allocatable :: patch_ib_gbl(:)
1177-
real(wp), dimension(3) :: centroid
1178-
integer :: i, j
1179-
integer :: num_aware_ibs
1180-
logical :: is_in_neighborhood, is_local
1183+
type(ib_patch_parameters), intent(in), dimension(:) :: particle_bed_ibs
1184+
real(wp), dimension(3) :: centroid
1185+
integer :: i
1186+
integer :: num_namelist_ibs, num_bed_ibs
11811187

1182-
! do all set up for moving immersed boundaries
1188+
num_namelist_ibs = num_ibs
1189+
num_bed_ibs = size(particle_bed_ibs)
11831190

1191+
! Check for moving IBs across both namelist and particle bed patches.
11841192
moving_immersed_boundary_flag = .false.
1185-
do i = 1, num_ibs
1193+
do i = 1, num_namelist_ibs
11861194
if (patch_ib(i)%moving_ibm /= 0) then
11871195
moving_immersed_boundary_flag = .true.
11881196
exit
11891197
end if
11901198
end do
1199+
if (.not. moving_immersed_boundary_flag) then
1200+
do i = 1, num_bed_ibs
1201+
if (particle_bed_ibs(i)%moving_ibm /= 0) then
1202+
moving_immersed_boundary_flag = .true.
1203+
exit
1204+
end if
1205+
end do
1206+
end if
11911207

1192-
allocate (patch_ib_gbl(num_ibs))
1193-
patch_ib_gbl(1:num_ibs) = patch_ib(1:num_ibs)
1194-
call get_neighbor_bounds() ! make sure the bounds of the neighbors are correctly set up
1195-
call s_compute_ib_neighbor_ranks() ! build lookup of all neighbor MPI ranks
1196-
1197-
num_gbl_ibs = num_ibs
1198-
num_local_ibs = num_ibs
1199-
@:PROHIBIT(num_local_ibs > num_local_ibs_max, &
1200-
& "Too many IBs on a single processor rank. Modify case file or increase limit of num_local_ibs_max to resolve.")
1201-
do i = 1, num_local_ibs_max
1202-
local_ib_patch_ids(i) = i
1203-
end do
1208+
call get_neighbor_bounds()
1209+
call s_compute_ib_neighbor_ranks()
12041210

1205-
$:GPU_EXIT_DATA(delete='[patch_ib]')
1206-
deallocate (patch_ib)
1211+
num_gbl_ibs = num_namelist_ibs + num_bed_ibs
12071212

12081213
#ifdef MFC_MPI
12091214
if (num_procs == 1) then
1210-
! single-rank: every patch is local; allocate to exact size and copy
1211-
allocate (patch_ib(num_gbl_ibs))
1212-
patch_ib(1:num_gbl_ibs) = patch_ib_gbl(1:num_gbl_ibs)
1213-
deallocate (patch_ib_gbl)
1215+
! single-rank: all patches are local; append particle bed entries directly into patch_ib.
1216+
@:PROHIBIT(num_gbl_ibs > num_ib_patches_max_namelist, &
1217+
& "Total IB count exceeds patch_ib capacity. Increase num_ib_patches_max_namelist.")
1218+
do i = 1, num_bed_ibs
1219+
patch_ib(num_namelist_ibs + i) = particle_bed_ibs(i)
1220+
patch_ib(num_namelist_ibs + i)%gbl_patch_id = num_namelist_ibs + i
1221+
end do
1222+
num_ibs = num_gbl_ibs
1223+
num_local_ibs = num_gbl_ibs
1224+
do i = 1, num_gbl_ibs
1225+
local_ib_patch_ids(i) = i
1226+
end do
12141227
else
1215-
! multi-rank: carve out the local neighbourhood subset
1216-
num_aware_ibs = min(num_local_ibs_max*(2*ib_neighborhood_radius + 1)**num_dims, num_ib_patches_max)
1217-
allocate (patch_ib(num_aware_ibs))
1218-
1219-
num_local_ibs = 0
1228+
! multi-rank: compact namelist patches in-place (write_idx <= read_idx, no aliasing), then append local particle beds.
12201229
num_ibs = 0
1221-
do i = 1, num_gbl_ibs
1222-
! catch the edge case where th collision lies just outside the computational domain
1223-
is_in_neighborhood = .true.
1224-
is_local = .true.
1225-
centroid = [patch_ib_gbl(i)%x_centroid, patch_ib_gbl(i)%y_centroid, 0._wp]
1226-
if (num_dims == 3) centroid(3) = patch_ib_gbl(i)%z_centroid
1227-
1230+
num_local_ibs = 0
1231+
do i = 1, num_namelist_ibs
1232+
centroid = [patch_ib(i)%x_centroid, patch_ib(i)%y_centroid, 0._wp]
1233+
if (num_dims == 3) centroid(3) = patch_ib(i)%z_centroid
12281234
if (f_neighborhood_ranks_own_location(centroid)) then
12291235
num_ibs = num_ibs + 1
1230-
patch_ib(num_ibs) = patch_ib_gbl(i)
1236+
patch_ib(num_ibs) = patch_ib(i)
12311237
patch_ib(num_ibs)%gbl_patch_id = i
12321238
if (f_local_rank_owns_location(centroid)) then
12331239
num_local_ibs = num_local_ibs + 1
12341240
local_ib_patch_ids(num_local_ibs) = num_ibs
12351241
end if
12361242
end if
12371243
end do
1238-
1239-
deallocate (patch_ib_gbl)
1244+
do i = 1, num_bed_ibs
1245+
centroid = [particle_bed_ibs(i)%x_centroid, particle_bed_ibs(i)%y_centroid, 0._wp]
1246+
if (num_dims == 3) centroid(3) = particle_bed_ibs(i)%z_centroid
1247+
if (f_neighborhood_ranks_own_location(centroid)) then
1248+
num_ibs = num_ibs + 1
1249+
@:PROHIBIT(num_ibs > num_ib_patches_max_namelist, &
1250+
& "Local IB count exceeds patch_ib capacity. Increase num_ib_patches_max_namelist.")
1251+
patch_ib(num_ibs) = particle_bed_ibs(i)
1252+
patch_ib(num_ibs)%gbl_patch_id = num_namelist_ibs + i
1253+
if (f_local_rank_owns_location(centroid)) then
1254+
num_local_ibs = num_local_ibs + 1
1255+
local_ib_patch_ids(num_local_ibs) = num_ibs
1256+
end if
1257+
end if
1258+
end do
1259+
@:PROHIBIT(num_local_ibs > num_local_ibs_max, &
1260+
& "Too many IBs on a single processor rank. Modify case file or increase limit of num_local_ibs_max to resolve.")
12401261
end if
12411262
#else
1242-
! no-MPI: every patch is local; allocate to exact size and copy
1243-
allocate (patch_ib(num_gbl_ibs))
1244-
patch_ib(1:num_gbl_ibs) = patch_ib_gbl(1:num_gbl_ibs)
1245-
deallocate (patch_ib_gbl)
1263+
! no-MPI: all patches are local; append particle bed entries directly into patch_ib.
1264+
@:PROHIBIT(num_gbl_ibs > num_ib_patches_max_namelist, &
1265+
& "Total IB count exceeds patch_ib capacity. Increase num_ib_patches_max_namelist.")
1266+
do i = 1, num_bed_ibs
1267+
patch_ib(num_namelist_ibs + i) = particle_bed_ibs(i)
1268+
patch_ib(num_namelist_ibs + i)%gbl_patch_id = num_namelist_ibs + i
1269+
end do
1270+
num_ibs = num_gbl_ibs
1271+
num_local_ibs = num_gbl_ibs
1272+
do i = 1, num_gbl_ibs
1273+
local_ib_patch_ids(i) = i
1274+
end do
12461275
#endif
12471276

12481277
$:GPU_ENTER_DATA(create='[patch_ib]')

0 commit comments

Comments
 (0)