From 4273c849e8df48dd7ff2aa4ddf2dfd58f04f5ffc Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sun, 15 Mar 2026 07:51:06 -0400 Subject: [PATCH 01/15] Remove interior point conservative variable protection for stationary boundaries --- src/simulation/m_ibm.fpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index b87f5a1b19..d5eeb527cb 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -192,7 +192,7 @@ contains type(ghost_point) :: gp type(ghost_point) :: innerp - ! set the Moving IBM interior Pressure Values + ! set the Moving IBM interior conservative variables $:GPU_PARALLEL_LOOP(private='[i,j,k,patch_id,rho]', copyin='[E_idx,momxb]', collapse=3) do l = 0, p do k = 0, n @@ -200,18 +200,16 @@ contains patch_id = ib_markers%sf(j, k, l) if (patch_id /= 0) then q_prim_vf(E_idx)%sf(j, k, l) = 1._wp - if (patch_ib(patch_id)%moving_ibm > 0) then - rho = 0._wp - do i = 1, num_fluids - rho = rho + q_prim_vf(contxb + i - 1)%sf(j, k, l) - end do + rho = 0._wp + do i = 1, num_fluids + rho = rho + q_prim_vf(contxb + i - 1)%sf(j, k, l) + end do - ! Sets the momentum - do i = 1, num_dims - q_cons_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)*rho - q_prim_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i) - end do - end if + ! Sets the momentum + do i = 1, num_dims + q_cons_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)*rho + q_prim_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i) + end do end if end do end do From f6522744efd6419b4644dab325267e419c7d3f32 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Tue, 21 Apr 2026 14:11:22 -0400 Subject: [PATCH 02/15] Initial separation for patch_ibs --- docs/documentation/case.md | 4 ++-- src/common/m_constants.fpp | 1 + src/common/m_derived_types.fpp | 3 ++- src/simulation/m_global_parameters.fpp | 6 +++++- src/simulation/m_start_up.fpp | 13 +++++++++++++ 5 files changed, 23 insertions(+), 4 deletions(-) diff --git a/docs/documentation/case.md b/docs/documentation/case.md index 278c694266..0bab125436 100644 --- a/docs/documentation/case.md +++ b/docs/documentation/case.md @@ -638,7 +638,7 @@ To restart the simulation from $k$-th time step, see @ref running "Restarting Ca | `alpha_wrt(i)` | Logical | Add the volume fraction of fluid $i$ to the database | | `gamma_wrt` | Logical | Add the specific heat ratio function to the database | | `heat_ratio_wrt` | Logical | Add the specific heat ratio to the database | -| `ib_state_wrt` | Logical | Write IB state and loads to a datafile at each time step | +| `ib_state_wrt` | Logical | Parameter to handle writing IB state on saves and outputing the state as a point mesh to SILO files. | | `pi_inf_wrt` | Logical | Add the liquid stiffness function to the database | | `pres_inf_wrt` | Logical | Add the liquid stiffness to the formatted database | | `c_wrt` | Logical | Add the sound speed to the database | @@ -706,7 +706,7 @@ If `file_per_process` is true, then pre_process, simulation, and post_process mu - `probe_wrt` activates the output of state variables at coordinates specified by `probe(i)%[x;y,z]`. -- `ib_state_wrt` activates the output of data specified by patch_ib(i)%force(:) (and torque, vel, angular_vel, angles, [x,y,z]_centroid) into a single binary datafile for all IBs at all timesteps. During post_processing, this file is converted into separate time histories for each IB. +- `ib_state_wrt` is used to trigger post-processing of the IB state to be written out as a point mesh in the SILO files. When no IBs are moving, it also triggers force and torque calculation so that those values may be written to the output state files. - `output_partial_domain` activates the output of part of the domain specified by `[x,y,z]_output%%beg` and `[x,y,z]_output%%end`. This is useful for large domains where only a portion of the domain is of interest. diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index cacc50f528..d66fb10614 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -25,6 +25,7 @@ module m_constants integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation integer, parameter :: num_patches_max = 10 !< Maximum number of IC patches integer, parameter :: num_ib_patches_max = 50000 !< Maximum number of immersed boundary patches (patch_ib) + integer, parameter :: num_local_ibs_max = 2000 !< Maximum number of immersed boundary patches (patch_ib) integer, parameter :: num_bc_patches_max = 10 !< Maximum number of boundary condition patches integer, parameter :: max_2d_fourier_modes = 10 !< Max Fourier mode index for 2D modal patch (geometry 13) integer, parameter :: max_sph_harm_degree = 5 !< Max degree L for 3D spherical harmonic patch (geometry 14) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index b7de058c93..4f7f08e64a 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -270,9 +270,10 @@ module m_derived_types end type ic_patch_parameters type ib_patch_parameters - integer :: geometry !< Type of geometry for the patch + integer :: patch_id real(wp) :: x_centroid, y_centroid, z_centroid !< Geometric center coordinates of the patch + !> Centroid locations of intermediate steps in the time_stepper module real(wp) :: step_x_centroid, step_y_centroid, step_z_centroid real(wp), dimension(1:3) :: centroid_offset !< offset of center of mass from computed cell center for odd-shaped IBs diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index e9b5e092a2..ee1607df13 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -338,12 +338,14 @@ module m_global_parameters !> @{ logical :: ib integer :: num_ibs + integer :: num_local_ibs integer :: collision_model real(wp) :: coefficient_of_restitution real(wp) :: collision_time real(wp) :: ib_coefficient_of_friction logical :: ib_state_wrt - type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib !< Immersed boundary patch parameters + type(ib_patch_parameters), allocatable, dimension(:) :: patch_ib !< Immersed boundary patch parameters + integer, dimension(num_local_ibs_max) :: local_patch_ids !< lookup table of IBs in the local compute domain type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l integer :: Np @@ -780,7 +782,9 @@ contains relativity = .false. #:endif + allocate(patch_ib(num_ib_patches_max)) do i = 1, num_ib_patches_max + patch_ib(i)%patch_id = i patch_ib(i)%geometry = dflt_int patch_ib(i)%x_centroid = 0._wp patch_ib(i)%y_centroid = 0._wp diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 9ff90fbdd7..b97dfc0c0f 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1187,4 +1187,17 @@ contains end subroutine s_read_ib_restart_data + subroutine s_reduce_ib_patch_array() + + type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl + + patch_ib_gbl(:) = patch_ib(:) + + deallocate(patch_ib) + allocate(patch_ib(num_local_ibs_max)) + + + + end subroutine s_reduce_ib_patch_array + end module m_start_up From 3baf894b925a2bf22ad9059efbce3d1f2450064c Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 22 Apr 2026 15:42:43 -0400 Subject: [PATCH 03/15] Added IB patch reduction at the start of the simulation so that ranks are only locally aware --- docs/documentation/case.md | 2 +- src/simulation/m_global_parameters.fpp | 31 +++---- src/simulation/m_start_up.fpp | 117 +++++++++++++++++++++++-- 3 files changed, 129 insertions(+), 21 deletions(-) diff --git a/docs/documentation/case.md b/docs/documentation/case.md index 0bab125436..7b142663ed 100644 --- a/docs/documentation/case.md +++ b/docs/documentation/case.md @@ -638,7 +638,7 @@ To restart the simulation from $k$-th time step, see @ref running "Restarting Ca | `alpha_wrt(i)` | Logical | Add the volume fraction of fluid $i$ to the database | | `gamma_wrt` | Logical | Add the specific heat ratio function to the database | | `heat_ratio_wrt` | Logical | Add the specific heat ratio to the database | -| `ib_state_wrt` | Logical | Parameter to handle writing IB state on saves and outputing the state as a point mesh to SILO files. | +| `ib_state_wrt` | Logical | Parameter to handle writing IB state on saves and outputting the state as a point mesh to SILO files. | | `pi_inf_wrt` | Logical | Add the liquid stiffness function to the database | | `pres_inf_wrt` | Logical | Add the liquid stiffness to the formatted database | | `c_wrt` | Logical | Add the sound speed to the database | diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index ee1607df13..cc950782c8 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -336,20 +336,21 @@ module m_global_parameters !> @name Immersed Boundaries !> @{ - logical :: ib - integer :: num_ibs - integer :: num_local_ibs - integer :: collision_model - real(wp) :: coefficient_of_restitution - real(wp) :: collision_time - real(wp) :: ib_coefficient_of_friction - logical :: ib_state_wrt - type(ib_patch_parameters), allocatable, dimension(:) :: patch_ib !< Immersed boundary patch parameters - integer, dimension(num_local_ibs_max) :: local_patch_ids !< lookup table of IBs in the local compute domain - type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l - integer :: Np - - $:GPU_DECLARE(create='[ib, num_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l]') + logical :: ib + integer :: num_ibs !< number of IBs that the current processor is aware of + integer :: num_gbl !< number of IBs in the overall simulation + integer :: num_local_ibs !< number of IBs that lie inside the processor domain + integer :: collision_model + real(wp) :: coefficient_of_restitution + real(wp) :: collision_time + real(wp) :: ib_coefficient_of_friction + logical :: ib_state_wrt + type(ib_patch_parameters), allocatable, dimension(:) :: patch_ib !< Immersed boundary patch parameters + integer, dimension(num_local_ibs_max) :: local_ib_patch_ids !< lookup table of IBs in the local compute domain + type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l + integer :: Np + + $:GPU_DECLARE(create='[ib, num_ibs, num_gbl, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]') $:GPU_DECLARE(create='[ib_coefficient_of_friction]') !> @} @@ -782,7 +783,7 @@ contains relativity = .false. #:endif - allocate(patch_ib(num_ib_patches_max)) + allocate (patch_ib(num_ib_patches_max)) do i = 1, num_ib_patches_max patch_ib(i)%patch_id = i patch_ib(i)%geometry = dflt_int diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index b97dfc0c0f..52e43ecfc1 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -39,6 +39,7 @@ module m_start_up use m_nvtx use m_ibm + use m_collisions use m_compile_specific use m_checker_common use m_checker @@ -915,6 +916,7 @@ contains if (model_eqns == 3) call s_initialize_internal_energy_equations(q_cons_ts(1)%vf) if (ib) then if (t_step_start > 0) call s_read_ib_restart_data(t_step_start) + call s_reduce_ib_patch_array() call s_ibm_setup() if (t_step_start == 0) then call s_write_ib_data_file(0) @@ -1189,15 +1191,120 @@ contains subroutine s_reduce_ib_patch_array() - type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl + type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl + real(wp), dimension(3) :: collision_location + integer :: i, j + integer :: num_aware_ibs - patch_ib_gbl(:) = patch_ib(:) + patch_ib_gbl(:) = patch_ib(:) - deallocate(patch_ib) - allocate(patch_ib(num_local_ibs_max)) + deallocate (patch_ib) + if (num_dims == 3) then + num_aware_ibs = num_local_ibs_max*27 + else + num_aware_ibs = num_local_ibs_max*9 + end if + allocate (patch_ib(num_aware_ibs)) + +#ifdef MFC_MPI + ! fallback for 1-rank case + if (num_proc == 1) then + patch_ib(:) = patch_ib_gbl(1:num_aware_ibs) + else + ! determine the set of patches owned by local rank + num_local_ibs = 0 + do i = 1, num_ib_patches_max + collision_location = [patch_ib_gbl(i)%x_centroid, patch_ib_gbl(i)%y_centroid, 0._wp] + if (num_dims == 3) collision_location(3) = patch_ib_gbl(i)%z_centroid + if (f_local_rank_owns_collision(collision_location)) then + num_local_ibs = num_local_ibs + 1 + patch_ib(j) = patch_ib_gbl(i) + local_ib_patch_ids(j) = j + end if + end do + num_gbl_ibs = num_ibs + num_ibs = num_local_ibs - + ! collect the patches from neighboring + call s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs) + end if +#else + ! reduce the size of the array for local simulation in no-MPI case + patch_ib(:) = patch_ib_gbl(1:num_aware_ibs) +#endif end subroutine s_reduce_ib_patch_array + !> Exchanges local IB patch IDs with face-neighbors in each axis direction so that each rank acquires the patch data for IBs + !! owned by its immediate neighbors. + subroutine s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs) + + type(ib_patch_parameters), dimension(num_ib_patches_max), intent(in) :: patch_ib_gbl + integer, intent(in) :: num_aware_ibs + +#ifdef MFC_MPI + integer, dimension(num_aware_ibs) :: send_buf, recv_buf + integer :: i, k, recv_id, send_neighbor, recv_neighbor + integer :: ierr + logical :: found + + #:for X, ID, TAG in [('x', 1, 100), ('y', 2, 102), ('z', 3, 104)] + if (num_dims >= ${ID}$) then + ! Repack local patch IDs; sentinel -1 marks unused slots + send_buf = -1 + do i = 1, num_ibs + send_buf(i) = patch_ib(i)%patch_id + end do + + ! Step 1: send to +${X}$ neighbor, receive from -${X}$ neighbor + send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_buf = -1 + call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG}$, recv_buf, num_aware_ibs, & + & MPI_INTEGER, recv_neighbor, ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + do i = 1, num_aware_ibs + recv_id = recv_buf(i) + if (recv_id < 0) exit + found = .false. + do k = 1, num_ibs + if (patch_ib(k)%patch_id == recv_id) then + found = .true. + exit + end if + end do + if (.not. found) then + num_ibs = num_ibs + 1 + @:ASSERT(num_ibs <= num_aware_ibs, 'patch_ib overflow in ${X}$+ IB communication') + patch_ib(num_ibs) = patch_ib_gbl(recv_id) + end if + end do + + ! Step 2: send to -${X}$ neighbor, receive from +${X}$ neighbor (same send_buf: original local list) + send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_buf = -1 + call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG + 1}$, recv_buf, num_aware_ibs, & + & MPI_INTEGER, recv_neighbor, ${TAG + 1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + do i = 1, num_aware_ibs + recv_id = recv_buf(i) + if (recv_id < 0) exit + found = .false. + do k = 1, num_ibs + if (patch_ib(k)%patch_id == recv_id) then + found = .true. + exit + end if + end do + if (.not. found) then + num_ibs = num_ibs + 1 + @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in ${X}$- IB communication') + patch_ib(num_ibs) = patch_ib_gbl(recv_id) + end if + end do + end if + #:endfor +#endif + + end subroutine s_communicate_ib_patches + end module m_start_up From 2365f2c009b9f4e04c85bc286060fe83f8232f3f Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 22 Apr 2026 17:19:40 -0400 Subject: [PATCH 04/15] intermittent commit --- src/common/m_derived_types.fpp | 2 +- src/simulation/m_global_parameters.fpp | 4 ++-- src/simulation/m_ib_patches.fpp | 4 ++-- src/simulation/m_start_up.fpp | 6 +++--- src/simulation/m_time_steppers.fpp | 1 + 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 4f7f08e64a..14a25c3607 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -271,7 +271,7 @@ module m_derived_types type ib_patch_parameters integer :: geometry !< Type of geometry for the patch - integer :: patch_id + integer :: gbl_patch_id real(wp) :: x_centroid, y_centroid, z_centroid !< Geometric center coordinates of the patch !> Centroid locations of intermediate steps in the time_stepper module diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index cc950782c8..9f8da1d69c 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -338,7 +338,7 @@ module m_global_parameters !> @{ logical :: ib integer :: num_ibs !< number of IBs that the current processor is aware of - integer :: num_gbl !< number of IBs in the overall simulation + integer :: num_gbl_ibs !< number of IBs in the overall simulation integer :: num_local_ibs !< number of IBs that lie inside the processor domain integer :: collision_model real(wp) :: coefficient_of_restitution @@ -350,7 +350,7 @@ module m_global_parameters type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l integer :: Np - $:GPU_DECLARE(create='[ib, num_ibs, num_gbl, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]') + $:GPU_DECLARE(create='[ib, num_ibs, num_gbl_ibs, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]') $:GPU_DECLARE(create='[ib_coefficient_of_friction]') !> @} diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 3735d9a8a0..d932efa863 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -1039,7 +1039,7 @@ contains temp_y_per = y_periodicity; if (y_periodicity == -1) temp_y_per = 2 temp_z_per = z_periodicity; if (z_periodicity == -1) temp_z_per = 2 - offset = (num_ibs + 1)*temp_x_per + 3*(num_ibs + 1)*temp_y_per + 9*(num_ibs + 1)*temp_z_per + offset = (num_gbl_ibs + 1)*temp_x_per + 3*(num_gbl_ibs + 1)*temp_y_per + 9*(num_gbl_ibs + 1)*temp_z_per encoded_patch_id = patch_id + offset end subroutine s_encode_patch_periodicity @@ -1053,7 +1053,7 @@ contains integer, intent(out) :: patch_id, x_periodicity, y_periodicity, z_periodicity integer :: offset, remainder, xp, yp, zp, base - base = num_ibs + 1 + base = num_gbl_ibs + 1 patch_id = mod(encoded_patch_id - 1, base) + 1 offset = (encoded_patch_id - patch_id)/base diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 52e43ecfc1..f7a3d2f230 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1253,7 +1253,7 @@ contains ! Repack local patch IDs; sentinel -1 marks unused slots send_buf = -1 do i = 1, num_ibs - send_buf(i) = patch_ib(i)%patch_id + send_buf(i) = patch_ib(i)%gbl_patch_id end do ! Step 1: send to +${X}$ neighbor, receive from -${X}$ neighbor @@ -1267,7 +1267,7 @@ contains if (recv_id < 0) exit found = .false. do k = 1, num_ibs - if (patch_ib(k)%patch_id == recv_id) then + if (patch_ib(k)%gbl_patch_id == recv_id) then found = .true. exit end if @@ -1290,7 +1290,7 @@ contains if (recv_id < 0) exit found = .false. do k = 1, num_ibs - if (patch_ib(k)%patch_id == recv_id) then + if (patch_ib(k)%gbl_patch_id == recv_id) then found = .true. exit end if diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 88d27cb975..9e701a1887 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -567,6 +567,7 @@ contains ! if (ib) then if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs() + call send_updated_ib_list() if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then call s_compute_ib_forces(q_prim_vf, fluid_pp) end if From 71bae6a16741020dd699d145080f1174f33d1d34 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 22 Apr 2026 17:33:14 -0400 Subject: [PATCH 05/15] we now write the global IB index, not the local one to ib_markers, as intended --- src/simulation/m_ib_patches.fpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index d932efa863..032c790dc0 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -109,7 +109,7 @@ contains radius = patch_ib(patch_id)%radius ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -221,7 +221,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -376,7 +376,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -466,7 +466,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -527,7 +527,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -586,7 +586,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -656,7 +656,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -724,7 +724,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -781,7 +781,7 @@ contains threshold = patch_ib(patch_id)%model_threshold ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -858,7 +858,7 @@ contains rotation(:,:) = patch_ib(patch_id)%rotation_matrix(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 From 07790980caea6afd82c9f55d1d13dd3bec921db4 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 23 Apr 2026 13:58:14 -0400 Subject: [PATCH 06/15] Refactored ib reduction to use neighbor bounds --- src/common/m_derived_types.fpp | 10 +- src/simulation/m_global_parameters.fpp | 3 +- src/simulation/m_ibm.fpp | 43 +++++++++ src/simulation/m_start_up.fpp | 127 +++++++++++-------------- 4 files changed, 106 insertions(+), 77 deletions(-) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 14a25c3607..3ff1f156dd 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -200,12 +200,10 @@ module m_derived_types type :: t_model_array ! Original CPU-side fields (unchanged) - type(t_model), allocatable :: model !< STL/OBJ geometry model - real(wp), allocatable, dimension(:,:,:) :: boundary_v !< Boundary vertices - real(wp), allocatable, dimension(:,:) :: interpolated_boundary_v !< Interpolated boundary vertices - integer :: boundary_edge_count !< Number of boundary edges - integer :: total_vertices !< Total vertex count - integer :: interpolate !< Interpolation flag + type(t_model), allocatable :: model !< STL/OBJ geometry model + real(wp), allocatable, dimension(:,:,:) :: boundary_v !< Boundary vertices + integer :: boundary_edge_count !< Number of boundary edges + integer :: total_vertices !< Total vertex count ! GPU-friendly flattened arrays integer :: ntrs !< Copy of model%ntrs diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 9f8da1d69c..581a418aba 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -229,7 +229,8 @@ module m_global_parameters $:GPU_DECLARE(create='[ib_bc_x, ib_bc_y, ib_bc_z]') #endif type(bounds_info) :: x_domain, y_domain, z_domain - $:GPU_DECLARE(create='[x_domain, y_domain, z_domain]') + type(bounds_info) :: neighbor_domain_x, neighbor_domain_y, neighbor_domain_z + $:GPU_DECLARE(create='[x_domain, y_domain, z_domain, neighbor_domain_x, neighbor_domain_y, neighbor_domain_z]') real(wp) :: x_a, y_a, z_a real(wp) :: x_b, y_b, z_b logical :: parallel_io !< Format of the data files diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 469eecdabd..5d8cbd453d 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1223,4 +1223,47 @@ contains end subroutine s_wrap_periodic_ibs + !> @brief passes ownership of IBs to neighbor processors + subroutine s_handoff_ib_ownership() + + integer, dimension(num_ibs) :: communication_directions + integer :: i, ib_idx + real(wp) :: position + + #:if defined('MFC_MPI') + if (num_procs > 1) then + communication_directions = 0 + + ! identify particles that have left the local domain and log the direction of communication + do i = 1, num_local_ibs + ib_idx = local_ib_patch_ids(i) + #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + if (num_dims >= ${ID}$) then + position = patch_ib(ib_idx)%${X}$_centroid + if (bc_${X}$%beg < 0 .and. bc_${X}$%beg /= BC_PERIODIC) then + ! if it is outside the domain in one direction, project it somewhere inside so at least one rank + ! owns it + if (position < ${X}$_domain%beg) then + position = ${X}$_domain%beg + else if (${X}$_domain%end < position) then + position = ${X}$_domain%end - 1.0e-10_wp + end if + end if + + if (position < ${X}$_domain%beg) then + communication_directions(i) = -${ID}$ + else if (${X}$_domain%end < position) then + communication_directions(i) = ${ID}$ + end if + end if + #:endfor + end do + + #:for X, DIM in [('x', '1'), ('y', '2'), ('z', '3')] + #:endfor + end if + #:endif + + end subroutine s_handoff_ib_ownership + end module m_ibm diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index f7a3d2f230..15620476e2 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1189,14 +1189,18 @@ contains end subroutine s_read_ib_restart_data + !> @brief takes the patch_ib struct array that contains all global IB patches and reduces to only contain patches that are in + !! the local computational domain. subroutine s_reduce_ib_patch_array() type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl - real(wp), dimension(3) :: collision_location + real(wp) :: position integer :: i, j integer :: num_aware_ibs + logical :: is_in_neighborhood, is_local patch_ib_gbl(:) = patch_ib(:) + call get_neighbor_bounds() ! make sure the bounds of the neighbors are correctly set up deallocate (patch_ib) if (num_dims == 3) then @@ -1206,6 +1210,8 @@ contains end if allocate (patch_ib(num_aware_ibs)) + num_gbl_ibs = num_ibs + #ifdef MFC_MPI ! fallback for 1-rank case if (num_proc == 1) then @@ -1213,20 +1219,33 @@ contains else ! determine the set of patches owned by local rank num_local_ibs = 0 + num_ibs = 0 do i = 1, num_ib_patches_max - collision_location = [patch_ib_gbl(i)%x_centroid, patch_ib_gbl(i)%y_centroid, 0._wp] - if (num_dims == 3) collision_location(3) = patch_ib_gbl(i)%z_centroid - if (f_local_rank_owns_collision(collision_location)) then - num_local_ibs = num_local_ibs + 1 - patch_ib(j) = patch_ib_gbl(i) - local_ib_patch_ids(j) = j + ! catch the edge case where th collision lies just outside the computational domain + is_in_neighborhood = .true. + is_local = .true. + + #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + if (num_dims >= ${ID}$) then + position = patch_ib_gbl(i)%${X}$_centroid + if (neighbor_domain_${X}$%beg > position .or. position > neighbor_domain_${X}$%end) then + is_in_neighborhood = .false. + is_local = .false. + else if (${X}$_domain%beg > position .or. position > ${X}$_domain%end) then + is_local = .false. + end if + end if + #:endfor + + if (is_in_neighborhood) then + num_ibs = num_ibs + 1 + patch_ib(num_ibs) = patch_ib_gbl(i) + if (is_local) then + num_local_ibs = num_local_ibs + 1 + local_ib_patch_ids(num_local_ibs) = num_ibs + end if end if end do - num_gbl_ibs = num_ibs - num_ibs = num_local_ibs - - ! collect the patches from neighboring - call s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs) end if #else ! reduce the size of the array for local simulation in no-MPI case @@ -1235,76 +1254,44 @@ contains end subroutine s_reduce_ib_patch_array - !> Exchanges local IB patch IDs with face-neighbors in each axis direction so that each rank acquires the patch data for IBs - !! owned by its immediate neighbors. - subroutine s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs) + subroutine get_neighbor_bounds() - type(ib_patch_parameters), dimension(num_ib_patches_max), intent(in) :: patch_ib_gbl - integer, intent(in) :: num_aware_ibs + ! Default: no neighbor in any direction + neighbor_domain_x%beg = -huge(0._wp) + neighbor_domain_x%end = huge(0._wp) + neighbor_domain_y%beg = -huge(0._wp) + neighbor_domain_y%end = huge(0._wp) + neighbor_domain_z%beg = -huge(0._wp) + neighbor_domain_z%end = huge(0._wp) #ifdef MFC_MPI - integer, dimension(num_aware_ibs) :: send_buf, recv_buf - integer :: i, k, recv_id, send_neighbor, recv_neighbor - integer :: ierr - logical :: found + real(wp) :: send_val, recv_val + integer :: send_neighbor, recv_neighbor, ierr - #:for X, ID, TAG in [('x', 1, 100), ('y', 2, 102), ('z', 3, 104)] + #:for X, ID, TAG, DIM in [('x', 1, 100, 'm'), ('y', 2, 102, 'n'), ('z', 3, 104, 'p')] if (num_dims >= ${ID}$) then - ! Repack local patch IDs; sentinel -1 marks unused slots - send_buf = -1 - do i = 1, num_ibs - send_buf(i) = patch_ib(i)%gbl_patch_id - end do - - ! Step 1: send to +${X}$ neighbor, receive from -${X}$ neighbor + ! Step 1: broadcast left edge (-1 face) rightward; receive left neighbor's left edge -> neighbor_domain_${X}$%beg + send_val = ${X}$_cb(-1) send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) - recv_buf = -1 - call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG}$, recv_buf, num_aware_ibs, & - & MPI_INTEGER, recv_neighbor, ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - do i = 1, num_aware_ibs - recv_id = recv_buf(i) - if (recv_id < 0) exit - found = .false. - do k = 1, num_ibs - if (patch_ib(k)%gbl_patch_id == recv_id) then - found = .true. - exit - end if - end do - if (.not. found) then - num_ibs = num_ibs + 1 - @:ASSERT(num_ibs <= num_aware_ibs, 'patch_ib overflow in ${X}$+ IB communication') - patch_ib(num_ibs) = patch_ib_gbl(recv_id) - end if - end do - - ! Step 2: send to -${X}$ neighbor, receive from +${X}$ neighbor (same send_buf: original local list) + recv_val = -huge(0._wp) + call MPI_SENDRECV(send_val, 1, mpi_p, send_neighbor, ${TAG}$, recv_val, 1, mpi_p, recv_neighbor, ${TAG}$, & + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + neighbor_domain_${X}$%beg = recv_val + + ! Step 2: broadcast right edge (${DIM}$ face) leftward; receive right neighbor's right edge -> + ! neighbor_domain_${X}$%end + send_val = ${X}$_cb(${DIM}$) send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) - recv_buf = -1 - call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG + 1}$, recv_buf, num_aware_ibs, & - & MPI_INTEGER, recv_neighbor, ${TAG + 1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - do i = 1, num_aware_ibs - recv_id = recv_buf(i) - if (recv_id < 0) exit - found = .false. - do k = 1, num_ibs - if (patch_ib(k)%gbl_patch_id == recv_id) then - found = .true. - exit - end if - end do - if (.not. found) then - num_ibs = num_ibs + 1 - @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in ${X}$- IB communication') - patch_ib(num_ibs) = patch_ib_gbl(recv_id) - end if - end do + recv_val = huge(0._wp) + call MPI_SENDRECV(send_val, 1, mpi_p, send_neighbor, ${TAG + 1}$, recv_val, 1, mpi_p, recv_neighbor, ${TAG + 1}$, & + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + neighbor_domain_${X}$%end = recv_val end if #:endfor #endif - end subroutine s_communicate_ib_patches + end subroutine get_neighbor_bounds end module m_start_up From d9ac1c257d1b5c57f4f396660544bf2d80cdeaf2 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 23 Apr 2026 15:56:02 -0400 Subject: [PATCH 07/15] prototype of send-receive replacing all-to-all --- src/simulation/m_ibm.fpp | 304 ++++++++++++++++++++++++++--- src/simulation/m_time_steppers.fpp | 4 +- 2 files changed, 277 insertions(+), 31 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 5d8cbd453d..14b409d6d5 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1008,9 +1008,8 @@ contains call s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques) - ! reduce the forces across all MPI ranks - call s_mpi_allreduce_vectors_sum(forces, forces, num_ibs, 3) - call s_mpi_allreduce_vectors_sum(torques, torques, num_ibs, 3) + ! reduce the forces across local neighborhood ranks + call s_communicate_ib_forces(forces, torques) ! consider body forces after reducing to avoid double counting do i = 1, num_ibs @@ -1223,44 +1222,291 @@ contains end subroutine s_wrap_periodic_ibs - !> @brief passes ownership of IBs to neighbor processors + !> @brief reasseses ownership of IBs and passes ownership of IBs to neighbor processors + !> Reduces forces and torques across the local neighborhood without a global allreduce. Accumulation phase: 2 passes per + !! dimension receiving from the low-index (-X) neighbor. Pass 1: add received values; save what was received as recv_snap. Pass + !! 2: send current (post-pass-1) values; add received; subtract recv_snap to remove double-counting of the direct contribution + !! already added in pass 1. Back-propagation phase: 2 passes per dimension receiving from the high-index (+X) neighbor, each + !! overwriting local forces with the neighbor's accumulated total. + subroutine s_communicate_ib_forces(forces, torques) + + real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + +#ifdef MFC_MPI + integer :: i, j, pack_pos, unpack_pos, buf_size, ierr + integer :: send_neighbor, recv_neighbor, recv_count, pid + real(wp), dimension(3) :: fval, tval + real(wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:) + character(len=1), allocatable :: send_buf(:), recv_buf(:) + + if (num_procs == 1) return + + buf_size = storage_size(0)/8 + (storage_size(0)/8 + 6*storage_size(0._wp)/8)*size(patch_ib) + allocate (send_buf(buf_size), recv_buf(buf_size), recv_forces_snap(num_ibs, 3), recv_torques_snap(num_ibs, 3)) + + ! Accumulation phase: propagate contributions toward the high-index corner. + #:for X, ID, TAG1, TAG2 in [('x', 1, 300, 302), ('y', 2, 304, 306), ('z', 3, 308, 310)] + if (num_dims >= ${ID}$) then + send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + + ! Pass 1: send current forces to +${X}$ neighbor; receive from -${X}$ neighbor and add. Save what was received as + ! recv_snap for double-count removal in pass 2. + recv_forces_snap = 0._wp + recv_torques_snap = 0._wp + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG1}$, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, ${TAG1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == pid) then + recv_forces_snap(j,:) = fval(:) + recv_torques_snap(j,:) = tval(:) + forces(j,:) = forces(j,:) + fval(:) + torques(j,:) = torques(j,:) + tval(:) + exit + end if + end do + end do + end if + + ! Pass 2: send post-pass-1 forces to +${X}$ neighbor; receive from -${X}$ neighbor. Add received values then + ! subtract recv_snap to remove the pass-1 contribution that was already counted, leaving only the 2-hop delta. + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG2}$, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, ${TAG2}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == pid) then + forces(j,:) = forces(j,:) + fval(:) - recv_forces_snap(j,:) + torques(j,:) = torques(j,:) + tval(:) - recv_torques_snap(j,:) + exit + end if + end do + end do + end if + end if + #:endfor + + ! Back-propagation phase: for each dimension, 2 passes receiving from the high-index neighbor. Each pass overwrites local + ! forces with the neighbor's accumulated total. Two passes ensure the total reaches 2 hops back, covering the full + ! neighborhood. + #:for X, ID, TAG1, TAG2 in [('x', 1, 312, 314), ('y', 2, 316, 318), ('z', 3, 320, 322)] + if (num_dims >= ${ID}$) then + send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + + #:for TAG in [TAG1, TAG2] + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG}$, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == pid) then + forces(j,:) = fval(:) + torques(j,:) = tval(:) + exit + end if + end do + end do + end if + #:endfor + end if + #:endfor + + deallocate (send_buf, recv_buf, recv_forces_snap, recv_torques_snap) +#endif + + end subroutine s_communicate_ib_forces + subroutine s_handoff_ib_ownership() - integer, dimension(num_ibs) :: communication_directions - integer :: i, ib_idx - real(wp) :: position + integer :: i, j, k, output_idx, local_output_idx + integer :: old_num_local_ibs + integer :: new_count, recv_count + integer :: pack_pos, unpack_pos, buf_size, patch_bytes + integer :: send_neighbor, recv_neighbor, ierr + integer :: dx, dy, dz, tag, nbr_idx, nreqs + integer, dimension(3) :: nbr_coords + real(wp) :: position + real(wp), dimension(3) :: centroid + logical :: is_new, already_known + type(ib_patch_parameters) :: tmp_patch + integer, dimension(num_local_ibs_max) :: local_ib_idx_old + ! 26 neighbors max in 3D; each gets its own recv buffer and a request handle for send + recv + integer, parameter :: max_nbrs = 26 + character(len=1), allocatable :: send_buf(:), recv_bufs(:,:) + integer, dimension(2*max_nbrs) :: requests + integer, dimension(max_nbrs) :: recv_neighbor_list #:if defined('MFC_MPI') if (num_procs > 1) then - communication_directions = 0 - - ! identify particles that have left the local domain and log the direction of communication + ! save a copy of the local IB's global indices to cross-reference for later. + old_num_local_ibs = num_local_ibs do i = 1, num_local_ibs - ib_idx = local_ib_patch_ids(i) + local_ib_idx_old(i) = patch_ib(local_ib_patch_ids(i))%gbl_patch_id + end do + + ! delete any particles that no longer need to be tracked and coalesce the array + output_idx = 0 + local_output_idx = 0 + do i = 1, num_ibs #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] - if (num_dims >= ${ID}$) then - position = patch_ib(ib_idx)%${X}$_centroid - if (bc_${X}$%beg < 0 .and. bc_${X}$%beg /= BC_PERIODIC) then - ! if it is outside the domain in one direction, project it somewhere inside so at least one rank - ! owns it - if (position < ${X}$_domain%beg) then - position = ${X}$_domain%beg - else if (${X}$_domain%end < position) then - position = ${X}$_domain%end - 1.0e-10_wp - end if - end if + if (patch_ib(i)%${X}$_centroid < neighbor_domain_${X}$%beg .or. neighbor_domain_${X}$%end < patch_ib(i) & + & %${X}$_centroid) then + cycle + end if + #:endfor + + output_idx = output_idx + 1 + if (i /= output_idx) patch_ib(output_idx) = patch_ib(i) + centroid = [patch_ib(i)%x_centroid, patch_ib(i)%y_centroid, 0._wp] + if (num_dims == 3) centroid(3) = patch_ib(i)%z_centroid + if (f_local_rank_owns_collision(centroid)) then + local_output_idx = local_output_idx + 1 + local_ib_patch_ids(local_output_idx) = output_idx + end if + end do + num_ibs = output_idx + num_local_ibs = local_output_idx + + ! Broadcast newly-owned patches to all neighborhood neighbors (including corners/edges). + patch_bytes = storage_size(tmp_patch)/8 + buf_size = storage_size(0)/8 + patch_bytes*num_local_ibs_max + allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs)) + + ! Write placeholder count at position 0 + pack_pos = 0 + call MPI_PACK(0, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) - if (position < ${X}$_domain%beg) then - communication_directions(i) = -${ID}$ - else if (${X}$_domain%end < position) then - communication_directions(i) = ${ID}$ + ! Single pass: pack new patches and count them + new_count = 0 + do i = 1, num_local_ibs + k = local_ib_patch_ids(i) + is_new = .true. + do j = 1, old_num_local_ibs + if (patch_ib(k)%gbl_patch_id == local_ib_idx_old(j)) then + is_new = .false. + exit + end if + end do + if (is_new) then + call MPI_PACK(patch_ib(k), patch_bytes, MPI_BYTE, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + new_count = new_count + 1 + end if + end do + + ! Overwrite the placeholder with the real count + pack_pos = 0 + call MPI_PACK(new_count, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + pack_pos = storage_size(0)/8 + new_count*patch_bytes + + ! Post all receives first, then all sends, so they are all in flight together. Tags 200..226: tag = 200 + (dx+1)*9 + + ! (dy+1)*3 + (dz+1) + nreqs = 0 + nbr_idx = 0 + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + nbr_idx = nbr_idx + 1 + tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! Receive from the mirror direction + nbr_coords = proc_coords - [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) + if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL + recv_neighbor_list(nbr_idx) = recv_neighbor + + nreqs = nreqs + 1 + call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & + & requests(nreqs), ierr) + end do + end do + end do + + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + nbr_coords = proc_coords + [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) + if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + nreqs = nreqs + 1 + call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), & + & ierr) + end do + end do + end do + + call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! Unpack all received buffers + do nbr_idx = 1, merge(26, 8, num_dims == 3) + if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle + unpack_pos = 0 + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tmp_patch, patch_bytes, MPI_BYTE, & + & MPI_COMM_WORLD, ierr) + already_known = .false. + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == tmp_patch%gbl_patch_id) then + already_known = .true. + exit end if + end do + if (.not. already_known) then + num_ibs = num_ibs + 1 + @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in neighborhood handoff') + patch_ib(num_ibs) = tmp_patch end if - #:endfor + end do end do - #:for X, DIM in [('x', '1'), ('y', '2'), ('z', '3')] - #:endfor + deallocate (send_buf, recv_bufs) end if #:endif diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 9e701a1887..32e6005882 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -566,8 +566,8 @@ contains ! if (ib) then - if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs() - call send_updated_ib_list() + if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs() ! wraps the positions of IBs to the local proc + call s_handoff_ib_ownership() ! recomputes which ranks own which IBs and communicate to neighbors if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then call s_compute_ib_forces(q_prim_vf, fluid_pp) end if From ee0bc0cfc10a0b4c39660c2920613d23c38efb65 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Thu, 23 Apr 2026 16:01:50 -0400 Subject: [PATCH 08/15] Compilation errors resolved --- src/simulation/m_global_parameters.fpp | 2 +- src/simulation/m_start_up.fpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 581a418aba..0a63c0c496 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -786,7 +786,7 @@ contains allocate (patch_ib(num_ib_patches_max)) do i = 1, num_ib_patches_max - patch_ib(i)%patch_id = i + patch_ib(i)%gbl_patch_id = i patch_ib(i)%geometry = dflt_int patch_ib(i)%x_centroid = 0._wp patch_ib(i)%y_centroid = 0._wp diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 15620476e2..6cf36ca9ad 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1214,7 +1214,7 @@ contains #ifdef MFC_MPI ! fallback for 1-rank case - if (num_proc == 1) then + if (num_procs == 1) then patch_ib(:) = patch_ib_gbl(1:num_aware_ibs) else ! determine the set of patches owned by local rank @@ -1256,7 +1256,11 @@ contains subroutine get_neighbor_bounds() + real(wp) :: send_val, recv_val + integer :: send_neighbor, recv_neighbor, ierr + ! Default: no neighbor in any direction + neighbor_domain_x%beg = -huge(0._wp) neighbor_domain_x%end = huge(0._wp) neighbor_domain_y%beg = -huge(0._wp) @@ -1265,9 +1269,6 @@ contains neighbor_domain_z%end = huge(0._wp) #ifdef MFC_MPI - real(wp) :: send_val, recv_val - integer :: send_neighbor, recv_neighbor, ierr - #:for X, ID, TAG, DIM in [('x', 1, 100, 'm'), ('y', 2, 102, 'n'), ('z', 3, 104, 'p')] if (num_dims >= ${ID}$) then ! Step 1: broadcast left edge (-1 face) rightward; receive left neighbor's left edge -> neighbor_domain_${X}$%beg From 8303cea4d8bea10ff3caae5a9fabcb4f4f18ef22 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Fri, 24 Apr 2026 08:31:49 -0400 Subject: [PATCH 09/15] Resolved out of bounds error --- src/simulation/m_start_up.fpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 6cf36ca9ad..96e96bdb4f 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1204,9 +1204,9 @@ contains deallocate (patch_ib) if (num_dims == 3) then - num_aware_ibs = num_local_ibs_max*27 + num_aware_ibs = min(num_local_ibs_max*27, num_ib_patches_max) else - num_aware_ibs = num_local_ibs_max*9 + num_aware_ibs = min(num_local_ibs_max*9, num_ib_patches_max) end if allocate (patch_ib(num_aware_ibs)) From 1c1801c3346d256c357ab4893ee6e70792e29556 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Fri, 24 Apr 2026 09:43:04 -0400 Subject: [PATCH 10/15] added send test algorithm for alternative MPI communication --- src/simulation/m_ibm.fpp | 197 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 14b409d6d5..1da48e17f1 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1358,6 +1358,203 @@ contains end subroutine s_communicate_ib_forces + !> Alternative force reduction using two non-blocking all-to-neighbor broadcasts. Phase 1: every rank sends its full force array + !! to all 26 neighborhood neighbors simultaneously. After MPI_WAITALL, each rank sums contributions from neighbors for its owned + !! particles. Phase 2: each rank sends its finalized owned-particle forces (by gbl_patch_id) back to all neighbors + !! simultaneously. After MPI_WAITALL, each rank overwrites ghost-particle forces with the authoritative values from the owning + !! rank. Not currently called - available for benchmarking against s_communicate_ib_forces. + subroutine s_communicate_ib_forces_scatter(forces, torques) + + real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + +#ifdef MFC_MPI + integer, parameter :: max_nbrs = 26 + integer :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos + integer :: buf_size, entry_bytes, ierr, recv_count, pid + integer :: send_neighbor, recv_neighbor, dx, dy, dz, tag + integer, dimension(3) :: nbr_coords + logical :: is_owned + real(wp), dimension(3) :: fval, tval + real(wp), dimension(num_ibs, 3) :: forces_total, torques_total + integer, dimension(max_nbrs) :: recv_neighbor_list + integer, dimension(2*max_nbrs) :: requests + character(len=1), allocatable :: send_buf(:), recv_bufs(:,:) + character(len=1), allocatable :: owned_send_buf(:), owned_recv_bufs(:,:) + integer :: owned_buf_size + + if (num_procs == 1) return + + ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle + entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8 + buf_size = storage_size(0)/8 + entry_bytes*num_ibs + owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max + allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), & + & owned_recv_bufs(owned_buf_size, max_nbrs)) + + ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors. + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + + nreqs = 0 + nbr_idx = 0 + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + nbr_idx = nbr_idx + 1 + tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + nbr_coords = proc_coords - [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) + if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL + recv_neighbor_list(nbr_idx) = recv_neighbor + + nreqs = nreqs + 1 + call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & + & requests(nreqs), ierr) + end do + end do + end do + + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + nbr_coords = proc_coords + [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) + if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + nreqs = nreqs + 1 + call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) + end do + end do + end do + + call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! Local reduction: for each owned particle, sum contributions from all neighbors. + forces_total = forces + torques_total = torques + do nbr_idx = 1, merge(26, 8, num_dims == 3) + if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle + unpack_pos = 0 + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + ! Only accumulate for particles this rank owns + do k = 1, num_local_ibs + j = local_ib_patch_ids(k) + if (patch_ib(j)%gbl_patch_id == pid) then + forces_total(j,:) = forces_total(j,:) + fval(:) + torques_total(j,:) = torques_total(j,:) + tval(:) + exit + end if + end do + end do + end do + + ! Write totals back for owned particles only + do k = 1, num_local_ibs + j = local_ib_patch_ids(k) + forces(j,:) = forces_total(j,:) + torques(j,:) = torques_total(j,:) + end do + + ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors. + pack_pos = 0 + call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do k = 1, num_local_ibs + j = local_ib_patch_ids(k) + call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(j,:); tval(:) = torques(j,:) + call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + + nreqs = 0 + nbr_idx = 0 + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + nbr_idx = nbr_idx + 1 + tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + nbr_coords = proc_coords - [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) + if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL + recv_neighbor_list(nbr_idx) = recv_neighbor + + nreqs = nreqs + 1 + call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & + & requests(nreqs), ierr) + end do + end do + end do + + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + nbr_coords = proc_coords + [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) + if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + nreqs = nreqs + 1 + call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) + end do + end do + end do + + call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! Overwrite ghost-particle forces with authoritative values from the owning rank. + do nbr_idx = 1, merge(26, 8, num_dims == 3) + if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle + unpack_pos = 0 + call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, & + & ierr) + do i = 1, recv_count + call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative) + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == pid) then + is_owned = .false. + do k = 1, num_local_ibs + if (local_ib_patch_ids(k) == j) then + is_owned = .true. + exit + end if + end do + if (.not. is_owned) then + forces(j,:) = fval(:) + torques(j,:) = tval(:) + end if + exit + end if + end do + end do + end do + + deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs) +#endif + + end subroutine s_communicate_ib_forces_scatter + subroutine s_handoff_ib_ownership() integer :: i, j, k, output_idx, local_output_idx From f014ca9c42a98f5a7bc617cd1a6be225cc1349e1 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Fri, 24 Apr 2026 12:31:41 -0400 Subject: [PATCH 11/15] Fixed early segfault due to uninitialized IB patch array --- src/simulation/m_ibm.fpp | 382 +++++++++++++++++----------------- src/simulation/m_start_up.fpp | 3 + 2 files changed, 194 insertions(+), 191 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 1da48e17f1..4bb008a2eb 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1363,197 +1363,197 @@ contains !! particles. Phase 2: each rank sends its finalized owned-particle forces (by gbl_patch_id) back to all neighbors !! simultaneously. After MPI_WAITALL, each rank overwrites ghost-particle forces with the authoritative values from the owning !! rank. Not currently called - available for benchmarking against s_communicate_ib_forces. - subroutine s_communicate_ib_forces_scatter(forces, torques) - - real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques - -#ifdef MFC_MPI - integer, parameter :: max_nbrs = 26 - integer :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos - integer :: buf_size, entry_bytes, ierr, recv_count, pid - integer :: send_neighbor, recv_neighbor, dx, dy, dz, tag - integer, dimension(3) :: nbr_coords - logical :: is_owned - real(wp), dimension(3) :: fval, tval - real(wp), dimension(num_ibs, 3) :: forces_total, torques_total - integer, dimension(max_nbrs) :: recv_neighbor_list - integer, dimension(2*max_nbrs) :: requests - character(len=1), allocatable :: send_buf(:), recv_bufs(:,:) - character(len=1), allocatable :: owned_send_buf(:), owned_recv_bufs(:,:) - integer :: owned_buf_size - - if (num_procs == 1) return - - ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle - entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8 - buf_size = storage_size(0)/8 + entry_bytes*num_ibs - owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max - allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), & - & owned_recv_bufs(owned_buf_size, max_nbrs)) - - ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors. - pack_pos = 0 - call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) - do i = 1, num_ibs - call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) - fval(:) = forces(i,:); tval(:) = torques(i,:) - call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) - call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) - end do - - nreqs = 0 - nbr_idx = 0 - do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) - do dy = -1, 1 - do dx = -1, 1 - if (dx == 0 .and. dy == 0 .and. dz == 0) cycle - nbr_idx = nbr_idx + 1 - tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - - nbr_coords = proc_coords - [dx, dy, dz] - call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) - if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL - recv_neighbor_list(nbr_idx) = recv_neighbor - - nreqs = nreqs + 1 - call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & - & requests(nreqs), ierr) - end do - end do - end do - - do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) - do dy = -1, 1 - do dx = -1, 1 - if (dx == 0 .and. dy == 0 .and. dz == 0) cycle - tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - - nbr_coords = proc_coords + [dx, dy, dz] - call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) - if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL - - nreqs = nreqs + 1 - call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) - end do - end do - end do - - call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) - - ! Local reduction: for each owned particle, sum contributions from all neighbors. - forces_total = forces - torques_total = torques - do nbr_idx = 1, merge(26, 8, num_dims == 3) - if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle - unpack_pos = 0 - call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - do i = 1, recv_count - call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) - ! Only accumulate for particles this rank owns - do k = 1, num_local_ibs - j = local_ib_patch_ids(k) - if (patch_ib(j)%gbl_patch_id == pid) then - forces_total(j,:) = forces_total(j,:) + fval(:) - torques_total(j,:) = torques_total(j,:) + tval(:) - exit - end if - end do - end do - end do - - ! Write totals back for owned particles only - do k = 1, num_local_ibs - j = local_ib_patch_ids(k) - forces(j,:) = forces_total(j,:) - torques(j,:) = torques_total(j,:) - end do - - ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors. - pack_pos = 0 - call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) - do k = 1, num_local_ibs - j = local_ib_patch_ids(k) - call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) - fval(:) = forces(j,:); tval(:) = torques(j,:) - call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) - call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) - end do - - nreqs = 0 - nbr_idx = 0 - do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) - do dy = -1, 1 - do dx = -1, 1 - if (dx == 0 .and. dy == 0 .and. dz == 0) cycle - nbr_idx = nbr_idx + 1 - tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - - nbr_coords = proc_coords - [dx, dy, dz] - call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) - if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL - recv_neighbor_list(nbr_idx) = recv_neighbor - - nreqs = nreqs + 1 - call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & - & requests(nreqs), ierr) - end do - end do - end do - - do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) - do dy = -1, 1 - do dx = -1, 1 - if (dx == 0 .and. dy == 0 .and. dz == 0) cycle - tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - - nbr_coords = proc_coords + [dx, dy, dz] - call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) - if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL - - nreqs = nreqs + 1 - call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) - end do - end do - end do - - call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) - - ! Overwrite ghost-particle forces with authoritative values from the owning rank. - do nbr_idx = 1, merge(26, 8, num_dims == 3) - if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle - unpack_pos = 0 - call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, & - & ierr) - do i = 1, recv_count - call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) - call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) - call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) - ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative) - do j = 1, num_ibs - if (patch_ib(j)%gbl_patch_id == pid) then - is_owned = .false. - do k = 1, num_local_ibs - if (local_ib_patch_ids(k) == j) then - is_owned = .true. - exit - end if - end do - if (.not. is_owned) then - forces(j,:) = fval(:) - torques(j,:) = tval(:) - end if - exit - end if - end do - end do - end do - - deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs) -#endif - - end subroutine s_communicate_ib_forces_scatter +! subroutine s_communicate_ib_forces_scatter(forces, torques) + +! real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + +! #ifdef MFC_MPI +! integer, parameter :: max_nbrs = 26 +! integer :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos +! integer :: buf_size, entry_bytes, ierr, recv_count, pid +! integer :: send_neighbor, recv_neighbor, dx, dy, dz, tag +! integer, dimension(3) :: nbr_coords +! logical :: is_owned +! real(wp), dimension(3) :: fval, tval +! real(wp), dimension(num_ibs, 3) :: forces_total, torques_total +! integer, dimension(max_nbrs) :: recv_neighbor_list +! integer, dimension(2*max_nbrs) :: requests +! character(len=1), allocatable :: send_buf(:), recv_bufs(:,:) +! character(len=1), allocatable :: owned_send_buf(:), owned_recv_bufs(:,:) +! integer :: owned_buf_size + +! if (num_procs == 1) return + +! ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle +! entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8 +! buf_size = storage_size(0)/8 + entry_bytes*num_ibs +! owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max +! allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), & +! & owned_recv_bufs(owned_buf_size, max_nbrs)) + +! ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors. +! pack_pos = 0 +! call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! do i = 1, num_ibs +! call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! fval(:) = forces(i,:); tval(:) = torques(i,:) +! call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! end do + +! nreqs = 0 +! nbr_idx = 0 +! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) +! do dy = -1, 1 +! do dx = -1, 1 +! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle +! nbr_idx = nbr_idx + 1 +! tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + +! nbr_coords = proc_coords - [dx, dy, dz] +! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) +! if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL +! recv_neighbor_list(nbr_idx) = recv_neighbor + +! nreqs = nreqs + 1 +! call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & +! & requests(nreqs), ierr) +! end do +! end do +! end do + +! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) +! do dy = -1, 1 +! do dx = -1, 1 +! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle +! tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + +! nbr_coords = proc_coords + [dx, dy, dz] +! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) +! if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + +! nreqs = nreqs + 1 +! call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) +! end do +! end do +! end do + +! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + +! ! Local reduction: for each owned particle, sum contributions from all neighbors. +! forces_total = forces +! torques_total = torques +! do nbr_idx = 1, merge(26, 8, num_dims == 3) +! if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle +! unpack_pos = 0 +! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) +! do i = 1, recv_count +! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) +! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) +! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) +! ! Only accumulate for particles this rank owns +! do k = 1, num_local_ibs +! j = local_ib_patch_ids(k) +! if (patch_ib(j)%gbl_patch_id == pid) then +! forces_total(j,:) = forces_total(j,:) + fval(:) +! torques_total(j,:) = torques_total(j,:) + tval(:) +! exit +! end if +! end do +! end do +! end do + +! ! Write totals back for owned particles only +! do k = 1, num_local_ibs +! j = local_ib_patch_ids(k) +! forces(j,:) = forces_total(j,:) +! torques(j,:) = torques_total(j,:) +! end do + +! ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors. +! pack_pos = 0 +! call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! do k = 1, num_local_ibs +! j = local_ib_patch_ids(k) +! call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! fval(:) = forces(j,:); tval(:) = torques(j,:) +! call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) +! end do + +! nreqs = 0 +! nbr_idx = 0 +! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) +! do dy = -1, 1 +! do dx = -1, 1 +! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle +! nbr_idx = nbr_idx + 1 +! tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + +! nbr_coords = proc_coords - [dx, dy, dz] +! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) +! if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL +! recv_neighbor_list(nbr_idx) = recv_neighbor + +! nreqs = nreqs + 1 +! call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & +! & requests(nreqs), ierr) +! end do +! end do +! end do + +! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) +! do dy = -1, 1 +! do dx = -1, 1 +! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle +! tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + +! nbr_coords = proc_coords + [dx, dy, dz] +! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) +! if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + +! nreqs = nreqs + 1 +! call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) +! end do +! end do +! end do + +! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + +! ! Overwrite ghost-particle forces with authoritative values from the owning rank. +! do nbr_idx = 1, merge(26, 8, num_dims == 3) +! if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle +! unpack_pos = 0 +! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, & +! & ierr) +! do i = 1, recv_count +! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) +! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) +! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) +! ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative) +! do j = 1, num_ibs +! if (patch_ib(j)%gbl_patch_id == pid) then +! is_owned = .false. +! do k = 1, num_local_ibs +! if (local_ib_patch_ids(k) == j) then +! is_owned = .true. +! exit +! end if +! end do +! if (.not. is_owned) then +! forces(j,:) = fval(:) +! torques(j,:) = tval(:) +! end if +! exit +! end if +! end do +! end do +! end do + +! deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs) +! #endif + +! end subroutine s_communicate_ib_forces_scatter subroutine s_handoff_ib_ownership() diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 96e96bdb4f..e655728cd8 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1012,6 +1012,8 @@ contains #else "on CPUs" #endif + else + allocate (patch_ib(num_ib_patches_max)) end if call s_mpi_bcast_user_inputs() @@ -1200,6 +1202,7 @@ contains logical :: is_in_neighborhood, is_local patch_ib_gbl(:) = patch_ib(:) + print *, "Starting" call get_neighbor_bounds() ! make sure the bounds of the neighbors are correctly set up deallocate (patch_ib) From d36fe0bbc319a684c0c29b14f7327da18020232f Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Fri, 24 Apr 2026 13:09:40 -0400 Subject: [PATCH 12/15] Debugged rank ownership bug and invalid number of global IBs --- src/common/m_model.fpp | 3 +- src/simulation/m_ibm.fpp | 287 ++++++++++++---------------------- src/simulation/m_start_up.fpp | 9 +- 3 files changed, 102 insertions(+), 197 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 0dab036f9b..f02474f8d0 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -983,10 +983,11 @@ contains dx_local = minval(dx); dy_local = minval(dy) if (p /= 0) dz_local = minval(dz) - allocate (stl_bounding_boxes(num_ibs,1:3,1:3)) + allocate (stl_bounding_boxes(num_gbl_ibs,1:3,1:3)) do patch_id = 1, num_ibs if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then + print *, proc_rank, patch_id, num_ibs, patch_ib(patch_id)%geometry allocate (models(patch_id)%model) print *, " * Reading model: " // trim(patch_ib(patch_id)%model_filepath) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 4bb008a2eb..0348668835 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1363,197 +1363,102 @@ contains !! particles. Phase 2: each rank sends its finalized owned-particle forces (by gbl_patch_id) back to all neighbors !! simultaneously. After MPI_WAITALL, each rank overwrites ghost-particle forces with the authoritative values from the owning !! rank. Not currently called - available for benchmarking against s_communicate_ib_forces. -! subroutine s_communicate_ib_forces_scatter(forces, torques) - -! real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques - -! #ifdef MFC_MPI -! integer, parameter :: max_nbrs = 26 -! integer :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos -! integer :: buf_size, entry_bytes, ierr, recv_count, pid -! integer :: send_neighbor, recv_neighbor, dx, dy, dz, tag -! integer, dimension(3) :: nbr_coords -! logical :: is_owned -! real(wp), dimension(3) :: fval, tval -! real(wp), dimension(num_ibs, 3) :: forces_total, torques_total -! integer, dimension(max_nbrs) :: recv_neighbor_list -! integer, dimension(2*max_nbrs) :: requests -! character(len=1), allocatable :: send_buf(:), recv_bufs(:,:) -! character(len=1), allocatable :: owned_send_buf(:), owned_recv_bufs(:,:) -! integer :: owned_buf_size - -! if (num_procs == 1) return - -! ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle -! entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8 -! buf_size = storage_size(0)/8 + entry_bytes*num_ibs -! owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max -! allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), & -! & owned_recv_bufs(owned_buf_size, max_nbrs)) - -! ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors. -! pack_pos = 0 -! call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! do i = 1, num_ibs -! call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! fval(:) = forces(i,:); tval(:) = torques(i,:) -! call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! end do - -! nreqs = 0 -! nbr_idx = 0 -! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) -! do dy = -1, 1 -! do dx = -1, 1 -! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle -! nbr_idx = nbr_idx + 1 -! tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - -! nbr_coords = proc_coords - [dx, dy, dz] -! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) -! if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL -! recv_neighbor_list(nbr_idx) = recv_neighbor - -! nreqs = nreqs + 1 -! call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & -! & requests(nreqs), ierr) -! end do -! end do -! end do - -! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) -! do dy = -1, 1 -! do dx = -1, 1 -! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle -! tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - -! nbr_coords = proc_coords + [dx, dy, dz] -! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) -! if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL - -! nreqs = nreqs + 1 -! call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) -! end do -! end do -! end do - -! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) - -! ! Local reduction: for each owned particle, sum contributions from all neighbors. -! forces_total = forces -! torques_total = torques -! do nbr_idx = 1, merge(26, 8, num_dims == 3) -! if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle -! unpack_pos = 0 -! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) -! do i = 1, recv_count -! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) -! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) -! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) -! ! Only accumulate for particles this rank owns -! do k = 1, num_local_ibs -! j = local_ib_patch_ids(k) -! if (patch_ib(j)%gbl_patch_id == pid) then -! forces_total(j,:) = forces_total(j,:) + fval(:) -! torques_total(j,:) = torques_total(j,:) + tval(:) -! exit -! end if -! end do -! end do -! end do - -! ! Write totals back for owned particles only -! do k = 1, num_local_ibs -! j = local_ib_patch_ids(k) -! forces(j,:) = forces_total(j,:) -! torques(j,:) = torques_total(j,:) -! end do - -! ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors. -! pack_pos = 0 -! call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! do k = 1, num_local_ibs -! j = local_ib_patch_ids(k) -! call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! fval(:) = forces(j,:); tval(:) = torques(j,:) -! call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) -! end do - -! nreqs = 0 -! nbr_idx = 0 -! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) -! do dy = -1, 1 -! do dx = -1, 1 -! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle -! nbr_idx = nbr_idx + 1 -! tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - -! nbr_coords = proc_coords - [dx, dy, dz] -! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) -! if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL -! recv_neighbor_list(nbr_idx) = recv_neighbor - -! nreqs = nreqs + 1 -! call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & -! & requests(nreqs), ierr) -! end do -! end do -! end do - -! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) -! do dy = -1, 1 -! do dx = -1, 1 -! if (dx == 0 .and. dy == 0 .and. dz == 0) cycle -! tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) - -! nbr_coords = proc_coords + [dx, dy, dz] -! call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) -! if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL - -! nreqs = nreqs + 1 -! call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) -! end do -! end do -! end do - -! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) - -! ! Overwrite ghost-particle forces with authoritative values from the owning rank. -! do nbr_idx = 1, merge(26, 8, num_dims == 3) -! if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle -! unpack_pos = 0 -! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, & -! & ierr) -! do i = 1, recv_count -! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) -! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) -! call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) -! ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative) -! do j = 1, num_ibs -! if (patch_ib(j)%gbl_patch_id == pid) then -! is_owned = .false. -! do k = 1, num_local_ibs -! if (local_ib_patch_ids(k) == j) then -! is_owned = .true. -! exit -! end if -! end do -! if (.not. is_owned) then -! forces(j,:) = fval(:) -! torques(j,:) = tval(:) -! end if -! exit -! end if -! end do -! end do -! end do - -! deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs) -! #endif - -! end subroutine s_communicate_ib_forces_scatter + ! subroutine s_communicate_ib_forces_scatter(forces, torques) + + ! real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + + ! #ifdef MFC_MPI integer, parameter :: max_nbrs = 26 integer :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos integer :: + ! buf_size, entry_bytes, ierr, recv_count, pid integer :: send_neighbor, recv_neighbor, dx, dy, dz, tag integer, dimension(3) :: + ! nbr_coords logical :: is_owned real(wp), dimension(3) :: fval, tval real(wp), dimension(num_ibs, 3) :: forces_total, + ! torques_total integer, dimension(max_nbrs) :: recv_neighbor_list integer, dimension(2*max_nbrs) :: requests character(len=1), + ! allocatable :: send_buf(:), recv_bufs(:,:) character(len=1), allocatable :: owned_send_buf(:), owned_recv_bufs(:,:) integer :: + ! owned_buf_size + + ! if (num_procs == 1) return + + ! ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle entry_bytes = storage_size(0)/8 + + ! 6*storage_size(0._wp)/8 buf_size = storage_size(0)/8 + entry_bytes*num_ibs owned_buf_size = storage_size(0)/8 + + ! entry_bytes*num_local_ibs_max allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), & & + ! owned_recv_bufs(owned_buf_size, max_nbrs)) + + ! ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors. pack_pos = 0 call MPI_PACK(num_ibs, 1, + ! MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) do i = 1, num_ibs call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, + ! MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) fval(:) = forces(i,:); tval(:) = torques(i,:) call + ! MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, + ! pack_pos, MPI_COMM_WORLD, ierr) end do + + ! nreqs = 0 nbr_idx = 0 do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 + ! .and. dy == 0 .and. dz == 0) cycle nbr_idx = nbr_idx + 1 tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords - [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL recv_neighbor_list(nbr_idx) = recv_neighbor + + ! nreqs = nreqs + 1 call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & & + ! requests(nreqs), ierr) end do end do end do + + ! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 .and. dy == 0 .and. dz + ! == 0) cycle tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords + [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + ! nreqs = nreqs + 1 call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) + ! end do end do end do + + ! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! ! Local reduction: for each owned particle, sum contributions from all neighbors. forces_total = forces torques_total = + ! torques do nbr_idx = 1, merge(26, 8, num_dims == 3) if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle unpack_pos = 0 + ! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) do i = 1, + ! recv_count call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) ! Only accumulate for particles + ! this rank owns do k = 1, num_local_ibs j = local_ib_patch_ids(k) if (patch_ib(j)%gbl_patch_id == pid) then forces_total(j,:) = + ! forces_total(j,:) + fval(:) torques_total(j,:) = torques_total(j,:) + tval(:) exit end if end do end do end do + + ! ! Write totals back for owned particles only do k = 1, num_local_ibs j = local_ib_patch_ids(k) forces(j,:) = forces_total(j,:) + ! torques(j,:) = torques_total(j,:) end do + + ! ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors. pack_pos = 0 call MPI_PACK(num_local_ibs, + ! 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) do k = 1, num_local_ibs j = + ! local_ib_patch_ids(k) call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, + ! MPI_COMM_WORLD, ierr) fval(:) = forces(j,:); tval(:) = torques(j,:) call MPI_PACK(fval, 3, mpi_p, owned_send_buf, + ! owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, + ! MPI_COMM_WORLD, ierr) end do + + ! nreqs = 0 nbr_idx = 0 do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 + ! .and. dy == 0 .and. dz == 0) cycle nbr_idx = nbr_idx + 1 tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords - [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL recv_neighbor_list(nbr_idx) = recv_neighbor + + ! nreqs = nreqs + 1 call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & + ! & requests(nreqs), ierr) end do end do end do + + ! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 .and. dy == 0 .and. dz + ! == 0) cycle tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords + [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + ! nreqs = nreqs + 1 call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), + ! ierr) end do end do end do + + ! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! ! Overwrite ghost-particle forces with authoritative values from the owning rank. do nbr_idx = 1, merge(26, 8, num_dims == 3) + ! if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle unpack_pos = 0 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), + ! owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, & & ierr) do i = 1, recv_count call + ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) ! Only overwrite + ! ghost particles (not owned ones - this rank's total is authoritative) do j = 1, num_ibs if (patch_ib(j)%gbl_patch_id == pid) + ! then is_owned = .false. do k = 1, num_local_ibs if (local_ib_patch_ids(k) == j) then is_owned = .true. exit end if end do if + ! (.not. is_owned) then forces(j,:) = fval(:) torques(j,:) = tval(:) end if exit end if end do end do end do + + ! deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs) #endif + + ! end subroutine s_communicate_ib_forces_scatter subroutine s_handoff_ib_ownership() diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index e655728cd8..0fe1810c24 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1012,7 +1012,7 @@ contains #else "on CPUs" #endif - else + else allocate (patch_ib(num_ib_patches_max)) end if @@ -1202,7 +1202,6 @@ contains logical :: is_in_neighborhood, is_local patch_ib_gbl(:) = patch_ib(:) - print *, "Starting" call get_neighbor_bounds() ! make sure the bounds of the neighbors are correctly set up deallocate (patch_ib) @@ -1223,18 +1222,18 @@ contains ! determine the set of patches owned by local rank num_local_ibs = 0 num_ibs = 0 - do i = 1, num_ib_patches_max + do i = 1, num_gbl_ibs ! catch the edge case where th collision lies just outside the computational domain is_in_neighborhood = .true. is_local = .true. - #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + #:for X, ID, DIM in [('x', 1, 'm'), ('y', 2, 'n'), ('z', 3, 'p')] if (num_dims >= ${ID}$) then position = patch_ib_gbl(i)%${X}$_centroid if (neighbor_domain_${X}$%beg > position .or. position > neighbor_domain_${X}$%end) then is_in_neighborhood = .false. is_local = .false. - else if (${X}$_domain%beg > position .or. position > ${X}$_domain%end) then + else if (${X}$_cb(-1) > position .or. position > ${X}$_cb(${DIM}$)) then is_local = .false. end if end if From cc76bf39869025d7e6e16dd51bafe1438972e80c Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Fri, 24 Apr 2026 14:21:44 -0400 Subject: [PATCH 13/15] Fixed global patch ID not being present on other ranks --- src/simulation/m_ib_patches.fpp | 16 +++------------- src/simulation/m_start_up.fpp | 1 + src/simulation/m_time_steppers.fpp | 7 ++++--- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 032c790dc0..222d7b6796 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -510,16 +510,12 @@ contains real(wp) :: radius real(wp), dimension(1:3) :: center - ! Variables to initialize the pressure field that corresponds to the bubble-collapse test case found in Tiwari et al. (2013) - - ! Transferring spherical patch's radius, centroid, smoothing patch identity and smoothing coefficient information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) radius = patch_ib(patch_id)%radius - ! completely skip particles no in the domain + ! completely skip particles not in the domain if (center(1) - radius > x_cc(m + gp_layers + 1) .or. center(1) + radius < x_cc(-gp_layers - 1) .or. center(2) & & - radius > y_cc(n + gp_layers + 1) .or. center(2) + radius < y_cc(-gp_layers - 1) .or. center(3) - radius > z_cc(p & & + gp_layers + 1) .or. center(3) + radius < z_cc(-gp_layers - 1)) then @@ -542,18 +538,12 @@ contains ! Checking whether the sphere covers a particular cell in the domain and verifying whether the current patch has permission ! to write to that cell. If both queries check out, the primitive variables of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i, j, k, cart_y, cart_z]', copyin='[encoded_patch_id, center, radius]', collapse=3) + $:GPU_PARALLEL_LOOP(private='[i, j, k]', copyin='[encoded_patch_id, center, radius]', collapse=3) do k = kl, kr do j = jl, jr do i = il, ir - if (grid_geometry == 3) then - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if ! Updating the patch identities bookkeeping variable - if (((x_cc(i) - center(1))**2 + (cart_y - center(2))**2 + (cart_z - center(3))**2 <= radius**2)) then + if (((x_cc(i) - center(1))**2 + (y_cc(j) - center(2))**2 + (z_cc(k) - center(3))**2 <= radius**2)) then ib_markers%sf(i, j, k) = encoded_patch_id end if end do diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 0fe1810c24..02b3999ebd 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1242,6 +1242,7 @@ contains if (is_in_neighborhood) then num_ibs = num_ibs + 1 patch_ib(num_ibs) = patch_ib_gbl(i) + patch_ib(num_ibs)%gbl_patch_id = i if (is_local) then num_local_ibs = num_local_ibs + 1 local_ib_patch_ids(num_local_ibs) = num_ibs diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 32e6005882..db6c43b944 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -566,9 +566,10 @@ contains ! if (ib) then - if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs() ! wraps the positions of IBs to the local proc - call s_handoff_ib_ownership() ! recomputes which ranks own which IBs and communicate to neighbors - if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then + if (moving_immersed_boundary_flag) then + call s_wrap_periodic_ibs() ! wraps the positions of IBs to the local proc + call s_handoff_ib_ownership() ! recomputes which ranks own which IBs and communicate to neighbors + else if (ib_state_wrt) then call s_compute_ib_forces(q_prim_vf, fluid_pp) end if end if From 13ad7d0fb154f63c9ad2ca7c55d1444fa9a79478 Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Tue, 28 Apr 2026 12:09:40 -0400 Subject: [PATCH 14/15] Updating restart data --- src/simulation/m_data_output.fpp | 36 +++++++++++--------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index ca8a6c0677..a5f6a17b50 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -922,19 +922,6 @@ contains end if call s_mpi_barrier() - ! Divide num_ibs across num_procs - nibs_per_rank = num_ibs/num_procs - remainder = mod(num_ibs, num_procs) - - ! Ranks < remainder get one extra IB - if (proc_rank < remainder) then - ib_start = proc_rank*(nibs_per_rank + 1) + 1 - ib_end = ib_start + nibs_per_rank ! nibs_per_rank + 1 total - else - ib_start = remainder*(nibs_per_rank + 1) + (proc_rank - remainder)*nibs_per_rank + 1 - ib_end = ib_start + nibs_per_rank - 1 - end if - write (file_loc, '(A,I0,A)') '/restart_data/ib_state_', t_step, '.dat' file_loc = trim(case_dir) // trim(file_loc) @@ -946,20 +933,21 @@ contains call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) - do i = ib_start, ib_end + do i = 1, num_local_ibs + ib_idx = local_ib_patch_ids(i) ib_buf(1) = mytime - ib_buf(2:4) = patch_ib(i)%force(1:3) - ib_buf(5:7) = patch_ib(i)%torque(1:3) - ib_buf(8:10) = patch_ib(i)%vel(1:3) - ib_buf(11:13) = patch_ib(i)%angular_vel(1:3) - ib_buf(14:16) = patch_ib(i)%angles(1:3) - ib_buf(17) = patch_ib(i)%x_centroid - ib_buf(18) = patch_ib(i)%y_centroid - ib_buf(19) = patch_ib(i)%z_centroid - ib_buf(20) = patch_ib(i)%radius + ib_buf(2:4) = patch_ib(ib_idx)%force(1:3) + ib_buf(5:7) = patch_ib(ib_idx)%torque(1:3) + ib_buf(8:10) = patch_ib(ib_idx)%vel(1:3) + ib_buf(11:13) = patch_ib(ib_idx)%angular_vel(1:3) + ib_buf(14:16) = patch_ib(ib_idx)%angles(1:3) + ib_buf(17) = patch_ib(ib_idx)%x_centroid + ib_buf(18) = patch_ib(ib_idx)%y_centroid + ib_buf(19) = patch_ib(ib_idx)%z_centroid + ib_buf(20) = patch_ib(ib_idx)%radius ! Global IB index (i) determines position in file - disp = int(i - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK + disp = int(patch_ib(ib_idx)%gbl_patch_id - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK call MPI_FILE_WRITE_AT(ifile, disp, ib_buf, NFIELDS_PER_IB, mpi_p, status, ierr) end do From 35b786480501fdabd2e34c42344e35e08a0513cf Mon Sep 17 00:00:00 2001 From: "Daniel J. Vickers" Date: Tue, 28 Apr 2026 12:11:30 -0400 Subject: [PATCH 15/15] add integer declaration --- src/simulation/m_data_output.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index aaa94acb91..effc020325 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -909,7 +909,7 @@ contains integer :: ifile, ierr integer, dimension(MPI_STATUS_SIZE) :: status logical :: file_exist - integer :: i + integer :: i, ib_idx integer, parameter :: NFIELDS_PER_IB = 20 real(wp) :: ib_buf(NFIELDS_PER_IB)