From 4273c849e8df48dd7ff2aa4ddf2dfd58f04f5ffc Mon Sep 17 00:00:00 2001
From: danieljvickers <dnlvickers5@gmail.com>
Date: Sun, 15 Mar 2026 07:51:06 -0400
Subject: [PATCH 01/15] Remove interior point conservative variable protection
 for stationary boundaries

---
 src/simulation/m_ibm.fpp | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index b87f5a1b19..d5eeb527cb 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -192,7 +192,7 @@ contains
         type(ghost_point) :: gp
         type(ghost_point) :: innerp
 
-        ! set the Moving IBM interior Pressure Values
+        ! set the Moving IBM interior conservative variables
         $:GPU_PARALLEL_LOOP(private='[i,j,k,patch_id,rho]', copyin='[E_idx,momxb]', collapse=3)
         do l = 0, p
             do k = 0, n
@@ -200,18 +200,16 @@ contains
                     patch_id = ib_markers%sf(j, k, l)
                     if (patch_id /= 0) then
                         q_prim_vf(E_idx)%sf(j, k, l) = 1._wp
-                        if (patch_ib(patch_id)%moving_ibm > 0) then
-                            rho = 0._wp
-                            do i = 1, num_fluids
-                                rho = rho + q_prim_vf(contxb + i - 1)%sf(j, k, l)
-                            end do
+                        rho = 0._wp
+                        do i = 1, num_fluids
+                            rho = rho + q_prim_vf(contxb + i - 1)%sf(j, k, l)
+                        end do
 
-                            ! Sets the momentum
-                            do i = 1, num_dims
-                                q_cons_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)*rho
-                                q_prim_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)
-                            end do
-                        end if
+                        ! Sets the momentum
+                        do i = 1, num_dims
+                            q_cons_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)*rho
+                            q_prim_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)
+                        end do
                     end if
                 end do
             end do

From f6522744efd6419b4644dab325267e419c7d3f32 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Tue, 21 Apr 2026 14:11:22 -0400
Subject: [PATCH 02/15] Initial separation for patch_ibs

---
 docs/documentation/case.md             |  4 ++--
 src/common/m_constants.fpp             |  1 +
 src/common/m_derived_types.fpp         |  3 ++-
 src/simulation/m_global_parameters.fpp |  6 +++++-
 src/simulation/m_start_up.fpp          | 13 +++++++++++++
 5 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/docs/documentation/case.md b/docs/documentation/case.md
index 278c694266..0bab125436 100644
--- a/docs/documentation/case.md
+++ b/docs/documentation/case.md
@@ -638,7 +638,7 @@ To restart the simulation from $k$-th time step, see @ref running "Restarting Ca
 | `alpha_wrt(i)`          | Logical | Add the volume fraction of fluid $i$ to the database	|
 | `gamma_wrt`             | Logical | Add the specific heat ratio function to the database	|
 | `heat_ratio_wrt`        | Logical | Add the specific heat ratio to the database	|
-| `ib_state_wrt`          | Logical | Write IB state and loads to a datafile at each time step |
+| `ib_state_wrt`          | Logical | Parameter to handle writing IB state on saves and outputing the state as a point mesh to SILO files. |
 | `pi_inf_wrt`            | Logical | Add the liquid stiffness function to the database |
 | `pres_inf_wrt`          | Logical | Add the liquid stiffness to the formatted database	 |
 | `c_wrt`                 | Logical | Add the sound speed to the database	 |
@@ -706,7 +706,7 @@ If `file_per_process` is true, then pre_process, simulation, and post_process mu
 
 - `probe_wrt` activates the output of state variables at coordinates specified by `probe(i)%[x;y,z]`.
 
-- `ib_state_wrt` activates the output of data specified by patch_ib(i)%force(:) (and torque, vel, angular_vel, angles, [x,y,z]_centroid) into a single binary datafile for all IBs at all timesteps. During post_processing, this file is converted into separate time histories for each IB.
+- `ib_state_wrt` is used to trigger post-processing of the IB state to be written out as a point mesh in the SILO files. When no IBs are moving, it also triggers force and torque calculation so that those values may be written to the output state files.
 
 - `output_partial_domain` activates the output of part of the domain specified by `[x,y,z]_output%%beg` and `[x,y,z]_output%%end`.
 This is useful for large domains where only a portion of the domain is of interest.
diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp
index cacc50f528..d66fb10614 100644
--- a/src/common/m_constants.fpp
+++ b/src/common/m_constants.fpp
@@ -25,6 +25,7 @@ module m_constants
     integer, parameter  :: num_probes_max = 10                 !< Maximum number of flow probes in the simulation
     integer, parameter  :: num_patches_max = 10                !< Maximum number of IC patches
     integer, parameter  :: num_ib_patches_max = 50000          !< Maximum number of immersed boundary patches (patch_ib)
+    integer, parameter  :: num_local_ibs_max = 2000            !< Maximum number of immersed boundary patches (patch_ib)
     integer, parameter  :: num_bc_patches_max = 10             !< Maximum number of boundary condition patches
     integer, parameter  :: max_2d_fourier_modes = 10           !< Max Fourier mode index for 2D modal patch (geometry 13)
     integer, parameter  :: max_sph_harm_degree = 5             !< Max degree L for 3D spherical harmonic patch (geometry 14)
diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index b7de058c93..4f7f08e64a 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -270,9 +270,10 @@ module m_derived_types
     end type ic_patch_parameters
 
     type ib_patch_parameters
-
         integer  :: geometry                            !< Type of geometry for the patch
+        integer  :: patch_id
         real(wp) :: x_centroid, y_centroid, z_centroid  !< Geometric center coordinates of the patch
+
         !> Centroid locations of intermediate steps in the time_stepper module
         real(wp)                 :: step_x_centroid, step_y_centroid, step_z_centroid
         real(wp), dimension(1:3) :: centroid_offset  !< offset of center of mass from computed cell center for odd-shaped IBs
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index e9b5e092a2..ee1607df13 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -338,12 +338,14 @@ module m_global_parameters
     !> @{
     logical                                                  :: ib
     integer                                                  :: num_ibs
+    integer                                                  :: num_local_ibs
     integer                                                  :: collision_model
     real(wp)                                                 :: coefficient_of_restitution
     real(wp)                                                 :: collision_time
     real(wp)                                                 :: ib_coefficient_of_friction
     logical                                                  :: ib_state_wrt
-    type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib  !< Immersed boundary patch parameters
+    type(ib_patch_parameters), allocatable, dimension(:)     :: patch_ib  !< Immersed boundary patch parameters
+    integer, dimension(num_local_ibs_max)                    :: local_patch_ids !< lookup table of IBs in the local compute domain
     type(vec3_dt), allocatable, dimension(:)                 :: airfoil_grid_u, airfoil_grid_l
     integer                                                  :: Np
 
@@ -780,7 +782,9 @@ contains
             relativity = .false.
         #:endif
 
+        allocate(patch_ib(num_ib_patches_max))
         do i = 1, num_ib_patches_max
+            patch_ib(i)%patch_id = i
             patch_ib(i)%geometry = dflt_int
             patch_ib(i)%x_centroid = 0._wp
             patch_ib(i)%y_centroid = 0._wp
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 9ff90fbdd7..b97dfc0c0f 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1187,4 +1187,17 @@ contains
 
     end subroutine s_read_ib_restart_data
 
+    subroutine s_reduce_ib_patch_array()
+
+      type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl
+
+      patch_ib_gbl(:) = patch_ib(:)
+
+      deallocate(patch_ib)
+      allocate(patch_ib(num_local_ibs_max))
+
+      
+
+    end subroutine s_reduce_ib_patch_array
+
 end module m_start_up

From 3baf894b925a2bf22ad9059efbce3d1f2450064c Mon Sep 17 00:00:00 2001
From: danieljvickers <dnlvickers5@gmail.com>
Date: Wed, 22 Apr 2026 15:42:43 -0400
Subject: [PATCH 03/15] Added IB patch reduction at the start of the simulation
 so that ranks are only locally aware

---
 docs/documentation/case.md             |   2 +-
 src/simulation/m_global_parameters.fpp |  31 +++----
 src/simulation/m_start_up.fpp          | 117 +++++++++++++++++++++++--
 3 files changed, 129 insertions(+), 21 deletions(-)

diff --git a/docs/documentation/case.md b/docs/documentation/case.md
index 0bab125436..7b142663ed 100644
--- a/docs/documentation/case.md
+++ b/docs/documentation/case.md
@@ -638,7 +638,7 @@ To restart the simulation from $k$-th time step, see @ref running "Restarting Ca
 | `alpha_wrt(i)`          | Logical | Add the volume fraction of fluid $i$ to the database	|
 | `gamma_wrt`             | Logical | Add the specific heat ratio function to the database	|
 | `heat_ratio_wrt`        | Logical | Add the specific heat ratio to the database	|
-| `ib_state_wrt`          | Logical | Parameter to handle writing IB state on saves and outputing the state as a point mesh to SILO files. |
+| `ib_state_wrt`          | Logical | Parameter to handle writing IB state on saves and outputting the state as a point mesh to SILO files. |
 | `pi_inf_wrt`            | Logical | Add the liquid stiffness function to the database |
 | `pres_inf_wrt`          | Logical | Add the liquid stiffness to the formatted database	 |
 | `c_wrt`                 | Logical | Add the sound speed to the database	 |
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index ee1607df13..cc950782c8 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -336,20 +336,21 @@ module m_global_parameters
 
     !> @name Immersed Boundaries
     !> @{
-    logical                                                  :: ib
-    integer                                                  :: num_ibs
-    integer                                                  :: num_local_ibs
-    integer                                                  :: collision_model
-    real(wp)                                                 :: coefficient_of_restitution
-    real(wp)                                                 :: collision_time
-    real(wp)                                                 :: ib_coefficient_of_friction
-    logical                                                  :: ib_state_wrt
-    type(ib_patch_parameters), allocatable, dimension(:)     :: patch_ib  !< Immersed boundary patch parameters
-    integer, dimension(num_local_ibs_max)                    :: local_patch_ids !< lookup table of IBs in the local compute domain
-    type(vec3_dt), allocatable, dimension(:)                 :: airfoil_grid_u, airfoil_grid_l
-    integer                                                  :: Np
-
-    $:GPU_DECLARE(create='[ib, num_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l]')
+    logical                                              :: ib
+    integer                                              :: num_ibs  !< number of IBs that the current processor is aware of
+    integer                                              :: num_gbl  !< number of IBs in the overall simulation
+    integer                                              :: num_local_ibs  !< number of IBs that lie inside the processor domain
+    integer                                              :: collision_model
+    real(wp)                                             :: coefficient_of_restitution
+    real(wp)                                             :: collision_time
+    real(wp)                                             :: ib_coefficient_of_friction
+    logical                                              :: ib_state_wrt
+    type(ib_patch_parameters), allocatable, dimension(:) :: patch_ib  !< Immersed boundary patch parameters
+    integer, dimension(num_local_ibs_max)                :: local_ib_patch_ids  !< lookup table of IBs in the local compute domain
+    type(vec3_dt), allocatable, dimension(:)             :: airfoil_grid_u, airfoil_grid_l
+    integer                                              :: Np
+
+    $:GPU_DECLARE(create='[ib, num_ibs, num_gbl, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]')
     $:GPU_DECLARE(create='[ib_coefficient_of_friction]')
     !> @}
 
@@ -782,7 +783,7 @@ contains
             relativity = .false.
         #:endif
 
-        allocate(patch_ib(num_ib_patches_max))
+        allocate (patch_ib(num_ib_patches_max))
         do i = 1, num_ib_patches_max
             patch_ib(i)%patch_id = i
             patch_ib(i)%geometry = dflt_int
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index b97dfc0c0f..52e43ecfc1 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -39,6 +39,7 @@ module m_start_up
 
     use m_nvtx
     use m_ibm
+    use m_collisions
     use m_compile_specific
     use m_checker_common
     use m_checker
@@ -915,6 +916,7 @@ contains
         if (model_eqns == 3) call s_initialize_internal_energy_equations(q_cons_ts(1)%vf)
         if (ib) then
             if (t_step_start > 0) call s_read_ib_restart_data(t_step_start)
+            call s_reduce_ib_patch_array()
             call s_ibm_setup()
             if (t_step_start == 0) then
                 call s_write_ib_data_file(0)
@@ -1189,15 +1191,120 @@ contains
 
     subroutine s_reduce_ib_patch_array()
 
-      type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl
+        type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl
+        real(wp), dimension(3)                                   :: collision_location
+        integer                                                  :: i, j
+        integer                                                  :: num_aware_ibs
 
-      patch_ib_gbl(:) = patch_ib(:)
+        patch_ib_gbl(:) = patch_ib(:)
 
-      deallocate(patch_ib)
-      allocate(patch_ib(num_local_ibs_max))
+        deallocate (patch_ib)
+        if (num_dims == 3) then
+            num_aware_ibs = num_local_ibs_max*27
+        else
+            num_aware_ibs = num_local_ibs_max*9
+        end if
+        allocate (patch_ib(num_aware_ibs))
+
+#ifdef MFC_MPI
+        ! fallback for 1-rank case
+        if (num_proc == 1) then
+            patch_ib(:) = patch_ib_gbl(1:num_aware_ibs)
+        else
+            ! determine the set of patches owned by local rank
+            num_local_ibs = 0
+            do i = 1, num_ib_patches_max
+                collision_location = [patch_ib_gbl(i)%x_centroid, patch_ib_gbl(i)%y_centroid, 0._wp]
+                if (num_dims == 3) collision_location(3) = patch_ib_gbl(i)%z_centroid
+                if (f_local_rank_owns_collision(collision_location)) then
+                    num_local_ibs = num_local_ibs + 1
+                    patch_ib(j) = patch_ib_gbl(i)
+                    local_ib_patch_ids(j) = j
+                end if
+            end do
+            num_gbl_ibs = num_ibs
+            num_ibs = num_local_ibs
 
-      
+            ! collect the patches from neighboring
+            call s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs)
+        end if
+#else
+        ! reduce the size of the array for local simulation in no-MPI case
+        patch_ib(:) = patch_ib_gbl(1:num_aware_ibs)
+#endif
 
     end subroutine s_reduce_ib_patch_array
 
+    !> Exchanges local IB patch IDs with face-neighbors in each axis direction so that each rank acquires the patch data for IBs
+    !! owned by its immediate neighbors.
+    subroutine s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs)
+
+        type(ib_patch_parameters), dimension(num_ib_patches_max), intent(in) :: patch_ib_gbl
+        integer, intent(in)                                                  :: num_aware_ibs
+
+#ifdef MFC_MPI
+        integer, dimension(num_aware_ibs) :: send_buf, recv_buf
+        integer                           :: i, k, recv_id, send_neighbor, recv_neighbor
+        integer                           :: ierr
+        logical                           :: found
+
+        #:for X, ID, TAG in [('x', 1, 100), ('y', 2, 102), ('z', 3, 104)]
+            if (num_dims >= ${ID}$) then
+                ! Repack local patch IDs; sentinel -1 marks unused slots
+                send_buf = -1
+                do i = 1, num_ibs
+                    send_buf(i) = patch_ib(i)%patch_id
+                end do
+
+                ! Step 1: send to +${X}$ neighbor, receive from -${X}$ neighbor
+                send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0)
+                recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0)
+                recv_buf = -1
+                call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG}$, recv_buf, num_aware_ibs, &
+                                  & MPI_INTEGER, recv_neighbor, ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                do i = 1, num_aware_ibs
+                    recv_id = recv_buf(i)
+                    if (recv_id < 0) exit
+                    found = .false.
+                    do k = 1, num_ibs
+                        if (patch_ib(k)%patch_id == recv_id) then
+                            found = .true.
+                            exit
+                        end if
+                    end do
+                    if (.not. found) then
+                        num_ibs = num_ibs + 1
+                        @:ASSERT(num_ibs <= num_aware_ibs, 'patch_ib overflow in ${X}$+ IB communication')
+                        patch_ib(num_ibs) = patch_ib_gbl(recv_id)
+                    end if
+                end do
+
+                ! Step 2: send to -${X}$ neighbor, receive from +${X}$ neighbor (same send_buf: original local list)
+                send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0)
+                recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0)
+                recv_buf = -1
+                call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG + 1}$, recv_buf, num_aware_ibs, &
+                                  & MPI_INTEGER, recv_neighbor, ${TAG + 1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                do i = 1, num_aware_ibs
+                    recv_id = recv_buf(i)
+                    if (recv_id < 0) exit
+                    found = .false.
+                    do k = 1, num_ibs
+                        if (patch_ib(k)%patch_id == recv_id) then
+                            found = .true.
+                            exit
+                        end if
+                    end do
+                    if (.not. found) then
+                        num_ibs = num_ibs + 1
+                        @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in ${X}$- IB communication')
+                        patch_ib(num_ibs) = patch_ib_gbl(recv_id)
+                    end if
+                end do
+            end if
+        #:endfor
+#endif
+
+    end subroutine s_communicate_ib_patches
+
 end module m_start_up

From 2365f2c009b9f4e04c85bc286060fe83f8232f3f Mon Sep 17 00:00:00 2001
From: danieljvickers <dnlvickers5@gmail.com>
Date: Wed, 22 Apr 2026 17:19:40 -0400
Subject: [PATCH 04/15] intermittent commit

---
 src/common/m_derived_types.fpp         | 2 +-
 src/simulation/m_global_parameters.fpp | 4 ++--
 src/simulation/m_ib_patches.fpp        | 4 ++--
 src/simulation/m_start_up.fpp          | 6 +++---
 src/simulation/m_time_steppers.fpp     | 1 +
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index 4f7f08e64a..14a25c3607 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -271,7 +271,7 @@ module m_derived_types
 
     type ib_patch_parameters
         integer  :: geometry                            !< Type of geometry for the patch
-        integer  :: patch_id
+        integer  :: gbl_patch_id
         real(wp) :: x_centroid, y_centroid, z_centroid  !< Geometric center coordinates of the patch
 
         !> Centroid locations of intermediate steps in the time_stepper module
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index cc950782c8..9f8da1d69c 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -338,7 +338,7 @@ module m_global_parameters
     !> @{
     logical                                              :: ib
     integer                                              :: num_ibs  !< number of IBs that the current processor is aware of
-    integer                                              :: num_gbl  !< number of IBs in the overall simulation
+    integer                                              :: num_gbl_ibs  !< number of IBs in the overall simulation
     integer                                              :: num_local_ibs  !< number of IBs that lie inside the processor domain
     integer                                              :: collision_model
     real(wp)                                             :: coefficient_of_restitution
@@ -350,7 +350,7 @@ module m_global_parameters
     type(vec3_dt), allocatable, dimension(:)             :: airfoil_grid_u, airfoil_grid_l
     integer                                              :: Np
 
-    $:GPU_DECLARE(create='[ib, num_ibs, num_gbl, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]')
+    $:GPU_DECLARE(create='[ib, num_ibs, num_gbl_ibs, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]')
     $:GPU_DECLARE(create='[ib_coefficient_of_friction]')
     !> @}
 
diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp
index 3735d9a8a0..d932efa863 100644
--- a/src/simulation/m_ib_patches.fpp
+++ b/src/simulation/m_ib_patches.fpp
@@ -1039,7 +1039,7 @@ contains
         temp_y_per = y_periodicity; if (y_periodicity == -1) temp_y_per = 2
         temp_z_per = z_periodicity; if (z_periodicity == -1) temp_z_per = 2
 
-        offset = (num_ibs + 1)*temp_x_per + 3*(num_ibs + 1)*temp_y_per + 9*(num_ibs + 1)*temp_z_per
+        offset = (num_gbl_ibs + 1)*temp_x_per + 3*(num_gbl_ibs + 1)*temp_y_per + 9*(num_gbl_ibs + 1)*temp_z_per
         encoded_patch_id = patch_id + offset
 
     end subroutine s_encode_patch_periodicity
@@ -1053,7 +1053,7 @@ contains
         integer, intent(out) :: patch_id, x_periodicity, y_periodicity, z_periodicity
         integer              :: offset, remainder, xp, yp, zp, base
 
-        base = num_ibs + 1
+        base = num_gbl_ibs + 1
 
         patch_id = mod(encoded_patch_id - 1, base) + 1
         offset = (encoded_patch_id - patch_id)/base
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 52e43ecfc1..f7a3d2f230 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1253,7 +1253,7 @@ contains
                 ! Repack local patch IDs; sentinel -1 marks unused slots
                 send_buf = -1
                 do i = 1, num_ibs
-                    send_buf(i) = patch_ib(i)%patch_id
+                    send_buf(i) = patch_ib(i)%gbl_patch_id
                 end do
 
                 ! Step 1: send to +${X}$ neighbor, receive from -${X}$ neighbor
@@ -1267,7 +1267,7 @@ contains
                     if (recv_id < 0) exit
                     found = .false.
                     do k = 1, num_ibs
-                        if (patch_ib(k)%patch_id == recv_id) then
+                        if (patch_ib(k)%gbl_patch_id == recv_id) then
                             found = .true.
                             exit
                         end if
@@ -1290,7 +1290,7 @@ contains
                     if (recv_id < 0) exit
                     found = .false.
                     do k = 1, num_ibs
-                        if (patch_ib(k)%patch_id == recv_id) then
+                        if (patch_ib(k)%gbl_patch_id == recv_id) then
                             found = .true.
                             exit
                         end if
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 88d27cb975..9e701a1887 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -567,6 +567,7 @@ contains
         !
         if (ib) then
             if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs()
+            call send_updated_ib_list()
             if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then
                 call s_compute_ib_forces(q_prim_vf, fluid_pp)
             end if

From 71bae6a16741020dd699d145080f1174f33d1d34 Mon Sep 17 00:00:00 2001
From: danieljvickers <dnlvickers5@gmail.com>
Date: Wed, 22 Apr 2026 17:33:14 -0400
Subject: [PATCH 05/15] we now write the global IB index, not the local one to
 ib_markers, as intended

---
 src/simulation/m_ib_patches.fpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp
index d932efa863..032c790dc0 100644
--- a/src/simulation/m_ib_patches.fpp
+++ b/src/simulation/m_ib_patches.fpp
@@ -109,7 +109,7 @@ contains
         radius = patch_ib(patch_id)%radius
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id)
 
         ! find the indices to the left and right of the IB in i, j, k
         il = -gp_layers - 1
@@ -221,7 +221,7 @@ contains
         end if
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id)
 
         ! find the indices to the left and right of the IB in i, j, k
         il = -gp_layers - 1
@@ -376,7 +376,7 @@ contains
         end if
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id)
 
         ! find the indices to the left and right of the IB in i, j, k
         il = -gp_layers - 1
@@ -466,7 +466,7 @@ contains
         inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:)
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id)
 
         ! find the indices to the left and right of the IB in i, j, k
         il = -gp_layers - 1
@@ -527,7 +527,7 @@ contains
         end if
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id)
 
         ! find the indices to the left and right of the IB in i, j, k
         il = -gp_layers - 1
@@ -586,7 +586,7 @@ contains
         inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:)
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id)
 
         ! find the indices to the left and right of the IB in i, j, k
         il = -gp_layers - 1
@@ -656,7 +656,7 @@ contains
         inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:)
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id)
 
         il = -gp_layers - 1
         jl = -gp_layers - 1
@@ -724,7 +724,7 @@ contains
         inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:)
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id)
 
         ! find the indices to the left and right of the IB in i, j, k
         il = -gp_layers - 1
@@ -781,7 +781,7 @@ contains
         threshold = patch_ib(patch_id)%model_threshold
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id)
 
         il = -gp_layers - 1
         jl = -gp_layers - 1
@@ -858,7 +858,7 @@ contains
         rotation(:,:) = patch_ib(patch_id)%rotation_matrix(:,:)
 
         ! encode the periodicity information into the patch_id
-        call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id)
+        call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id)
 
         il = -gp_layers - 1
         jl = -gp_layers - 1

From 07790980caea6afd82c9f55d1d13dd3bec921db4 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Thu, 23 Apr 2026 13:58:14 -0400
Subject: [PATCH 06/15] Refactored ib reduction to use neighbor bounds

---
 src/common/m_derived_types.fpp         |  10 +-
 src/simulation/m_global_parameters.fpp |   3 +-
 src/simulation/m_ibm.fpp               |  43 +++++++++
 src/simulation/m_start_up.fpp          | 127 +++++++++++--------------
 4 files changed, 106 insertions(+), 77 deletions(-)

diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index 14a25c3607..3ff1f156dd 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -200,12 +200,10 @@ module m_derived_types
 
     type :: t_model_array
         ! Original CPU-side fields (unchanged)
-        type(t_model), allocatable              :: model                    !< STL/OBJ geometry model
-        real(wp), allocatable, dimension(:,:,:) :: boundary_v               !< Boundary vertices
-        real(wp), allocatable, dimension(:,:)   :: interpolated_boundary_v  !< Interpolated boundary vertices
-        integer                                 :: boundary_edge_count      !< Number of boundary edges
-        integer                                 :: total_vertices           !< Total vertex count
-        integer                                 :: interpolate              !< Interpolation flag
+        type(t_model), allocatable              :: model                !< STL/OBJ geometry model
+        real(wp), allocatable, dimension(:,:,:) :: boundary_v           !< Boundary vertices
+        integer                                 :: boundary_edge_count  !< Number of boundary edges
+        integer                                 :: total_vertices       !< Total vertex count
 
         ! GPU-friendly flattened arrays
         integer                                 :: ntrs   !< Copy of model%ntrs
diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 9f8da1d69c..581a418aba 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -229,7 +229,8 @@ module m_global_parameters
     $:GPU_DECLARE(create='[ib_bc_x, ib_bc_y, ib_bc_z]')
 #endif
     type(bounds_info) :: x_domain, y_domain, z_domain
-    $:GPU_DECLARE(create='[x_domain, y_domain, z_domain]')
+    type(bounds_info) :: neighbor_domain_x, neighbor_domain_y, neighbor_domain_z
+    $:GPU_DECLARE(create='[x_domain, y_domain, z_domain, neighbor_domain_x, neighbor_domain_y, neighbor_domain_z]')
     real(wp) :: x_a, y_a, z_a
     real(wp) :: x_b, y_b, z_b
     logical  :: parallel_io       !< Format of the data files
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 469eecdabd..5d8cbd453d 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -1223,4 +1223,47 @@ contains
 
     end subroutine s_wrap_periodic_ibs
 
+    !> @brief passes ownership of IBs to neighbor processors
+    subroutine s_handoff_ib_ownership()
+
+        integer, dimension(num_ibs) :: communication_directions
+        integer                     :: i, ib_idx
+        real(wp)                    :: position
+
+        #:if defined('MFC_MPI')
+            if (num_procs > 1) then
+                communication_directions = 0
+
+                ! identify particles that have left the local domain and log the direction of communication
+                do i = 1, num_local_ibs
+                    ib_idx = local_ib_patch_ids(i)
+                    #:for X, ID in [('x', 1), ('y', 2), ('z', 3)]
+                        if (num_dims >= ${ID}$) then
+                            position = patch_ib(ib_idx)%${X}$_centroid
+                            if (bc_${X}$%beg < 0 .and. bc_${X}$%beg /= BC_PERIODIC) then
+                                ! if it is outside the domain in one direction, project it somewhere inside so at least one rank
+                                ! owns it
+                                if (position < ${X}$_domain%beg) then
+                                    position = ${X}$_domain%beg
+                                else if (${X}$_domain%end < position) then
+                                    position = ${X}$_domain%end - 1.0e-10_wp
+                                end if
+                            end if
+
+                            if (position < ${X}$_domain%beg) then
+                                communication_directions(i) = -${ID}$
+                            else if (${X}$_domain%end < position) then
+                                communication_directions(i) = ${ID}$
+                            end if
+                        end if
+                    #:endfor
+                end do
+
+                #:for X, DIM in [('x', '1'), ('y', '2'), ('z', '3')]
+                #:endfor
+            end if
+        #:endif
+
+    end subroutine s_handoff_ib_ownership
+
 end module m_ibm
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index f7a3d2f230..15620476e2 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1189,14 +1189,18 @@ contains
 
     end subroutine s_read_ib_restart_data
 
+    !> @brief takes the patch_ib struct array that contains all global IB patches and reduces to only contain patches that are in
+    !! the local computational domain.
     subroutine s_reduce_ib_patch_array()
 
         type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl
-        real(wp), dimension(3)                                   :: collision_location
+        real(wp)                                                 :: position
         integer                                                  :: i, j
         integer                                                  :: num_aware_ibs
+        logical                                                  :: is_in_neighborhood, is_local
 
         patch_ib_gbl(:) = patch_ib(:)
+        call get_neighbor_bounds()  ! make sure the bounds of the neighbors are correctly set up
 
         deallocate (patch_ib)
         if (num_dims == 3) then
@@ -1206,6 +1210,8 @@ contains
         end if
         allocate (patch_ib(num_aware_ibs))
 
+        num_gbl_ibs = num_ibs
+
 #ifdef MFC_MPI
         ! fallback for 1-rank case
         if (num_proc == 1) then
@@ -1213,20 +1219,33 @@ contains
         else
             ! determine the set of patches owned by local rank
             num_local_ibs = 0
+            num_ibs = 0
             do i = 1, num_ib_patches_max
-                collision_location = [patch_ib_gbl(i)%x_centroid, patch_ib_gbl(i)%y_centroid, 0._wp]
-                if (num_dims == 3) collision_location(3) = patch_ib_gbl(i)%z_centroid
-                if (f_local_rank_owns_collision(collision_location)) then
-                    num_local_ibs = num_local_ibs + 1
-                    patch_ib(j) = patch_ib_gbl(i)
-                    local_ib_patch_ids(j) = j
+                ! catch the edge case where th collision lies just outside the computational domain
+                is_in_neighborhood = .true.
+                is_local = .true.
+
+                #:for X, ID in [('x', 1), ('y', 2), ('z', 3)]
+                    if (num_dims >= ${ID}$) then
+                        position = patch_ib_gbl(i)%${X}$_centroid
+                        if (neighbor_domain_${X}$%beg > position .or. position > neighbor_domain_${X}$%end) then
+                            is_in_neighborhood = .false.
+                            is_local = .false.
+                        else if (${X}$_domain%beg > position .or. position > ${X}$_domain%end) then
+                            is_local = .false.
+                        end if
+                    end if
+                #:endfor
+
+                if (is_in_neighborhood) then
+                    num_ibs = num_ibs + 1
+                    patch_ib(num_ibs) = patch_ib_gbl(i)
+                    if (is_local) then
+                        num_local_ibs = num_local_ibs + 1
+                        local_ib_patch_ids(num_local_ibs) = num_ibs
+                    end if
                 end if
             end do
-            num_gbl_ibs = num_ibs
-            num_ibs = num_local_ibs
-
-            ! collect the patches from neighboring
-            call s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs)
         end if
 #else
         ! reduce the size of the array for local simulation in no-MPI case
@@ -1235,76 +1254,44 @@ contains
 
     end subroutine s_reduce_ib_patch_array
 
-    !> Exchanges local IB patch IDs with face-neighbors in each axis direction so that each rank acquires the patch data for IBs
-    !! owned by its immediate neighbors.
-    subroutine s_communicate_ib_patches(patch_ib_gbl, num_aware_ibs)
+    subroutine get_neighbor_bounds()
 
-        type(ib_patch_parameters), dimension(num_ib_patches_max), intent(in) :: patch_ib_gbl
-        integer, intent(in)                                                  :: num_aware_ibs
+        ! Default: no neighbor in any direction
+        neighbor_domain_x%beg = -huge(0._wp)
+        neighbor_domain_x%end = huge(0._wp)
+        neighbor_domain_y%beg = -huge(0._wp)
+        neighbor_domain_y%end = huge(0._wp)
+        neighbor_domain_z%beg = -huge(0._wp)
+        neighbor_domain_z%end = huge(0._wp)
 
 #ifdef MFC_MPI
-        integer, dimension(num_aware_ibs) :: send_buf, recv_buf
-        integer                           :: i, k, recv_id, send_neighbor, recv_neighbor
-        integer                           :: ierr
-        logical                           :: found
+        real(wp) :: send_val, recv_val
+        integer  :: send_neighbor, recv_neighbor, ierr
 
-        #:for X, ID, TAG in [('x', 1, 100), ('y', 2, 102), ('z', 3, 104)]
+        #:for X, ID, TAG, DIM in [('x', 1, 100, 'm'), ('y', 2, 102, 'n'), ('z', 3, 104, 'p')]
             if (num_dims >= ${ID}$) then
-                ! Repack local patch IDs; sentinel -1 marks unused slots
-                send_buf = -1
-                do i = 1, num_ibs
-                    send_buf(i) = patch_ib(i)%gbl_patch_id
-                end do
-
-                ! Step 1: send to +${X}$ neighbor, receive from -${X}$ neighbor
+                ! Step 1: broadcast left edge (-1 face) rightward; receive left neighbor's left edge -> neighbor_domain_${X}$%beg
+                send_val = ${X}$_cb(-1)
                 send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0)
                 recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0)
-                recv_buf = -1
-                call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG}$, recv_buf, num_aware_ibs, &
-                                  & MPI_INTEGER, recv_neighbor, ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-                do i = 1, num_aware_ibs
-                    recv_id = recv_buf(i)
-                    if (recv_id < 0) exit
-                    found = .false.
-                    do k = 1, num_ibs
-                        if (patch_ib(k)%gbl_patch_id == recv_id) then
-                            found = .true.
-                            exit
-                        end if
-                    end do
-                    if (.not. found) then
-                        num_ibs = num_ibs + 1
-                        @:ASSERT(num_ibs <= num_aware_ibs, 'patch_ib overflow in ${X}$+ IB communication')
-                        patch_ib(num_ibs) = patch_ib_gbl(recv_id)
-                    end if
-                end do
-
-                ! Step 2: send to -${X}$ neighbor, receive from +${X}$ neighbor (same send_buf: original local list)
+                recv_val = -huge(0._wp)
+                call MPI_SENDRECV(send_val, 1, mpi_p, send_neighbor, ${TAG}$, recv_val, 1, mpi_p, recv_neighbor, ${TAG}$, &
+                                  & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                neighbor_domain_${X}$%beg = recv_val
+
+                ! Step 2: broadcast right edge (${DIM}$ face) leftward; receive right neighbor's right edge ->
+                ! neighbor_domain_${X}$%end
+                send_val = ${X}$_cb(${DIM}$)
                 send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0)
                 recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0)
-                recv_buf = -1
-                call MPI_SENDRECV(send_buf, num_aware_ibs, MPI_INTEGER, send_neighbor, ${TAG + 1}$, recv_buf, num_aware_ibs, &
-                                  & MPI_INTEGER, recv_neighbor, ${TAG + 1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
-                do i = 1, num_aware_ibs
-                    recv_id = recv_buf(i)
-                    if (recv_id < 0) exit
-                    found = .false.
-                    do k = 1, num_ibs
-                        if (patch_ib(k)%gbl_patch_id == recv_id) then
-                            found = .true.
-                            exit
-                        end if
-                    end do
-                    if (.not. found) then
-                        num_ibs = num_ibs + 1
-                        @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in ${X}$- IB communication')
-                        patch_ib(num_ibs) = patch_ib_gbl(recv_id)
-                    end if
-                end do
+                recv_val = huge(0._wp)
+                call MPI_SENDRECV(send_val, 1, mpi_p, send_neighbor, ${TAG + 1}$, recv_val, 1, mpi_p, recv_neighbor, ${TAG + 1}$, &
+                                  & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                neighbor_domain_${X}$%end = recv_val
             end if
         #:endfor
 #endif
 
-    end subroutine s_communicate_ib_patches
+    end subroutine get_neighbor_bounds
 
 end module m_start_up

From d9ac1c257d1b5c57f4f396660544bf2d80cdeaf2 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Thu, 23 Apr 2026 15:56:02 -0400
Subject: [PATCH 07/15] prototype of send-receive replacing all-to-all

---
 src/simulation/m_ibm.fpp           | 304 ++++++++++++++++++++++++++---
 src/simulation/m_time_steppers.fpp |   4 +-
 2 files changed, 277 insertions(+), 31 deletions(-)

diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 5d8cbd453d..14b409d6d5 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -1008,9 +1008,8 @@ contains
 
         call s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques)
 
-        ! reduce the forces across all MPI ranks
-        call s_mpi_allreduce_vectors_sum(forces, forces, num_ibs, 3)
-        call s_mpi_allreduce_vectors_sum(torques, torques, num_ibs, 3)
+        ! reduce the forces across local neighborhood ranks
+        call s_communicate_ib_forces(forces, torques)
 
         ! consider body forces after reducing to avoid double counting
         do i = 1, num_ibs
@@ -1223,44 +1222,291 @@ contains
 
     end subroutine s_wrap_periodic_ibs
 
-    !> @brief passes ownership of IBs to neighbor processors
+    !> @brief reasseses ownership of IBs and passes ownership of IBs to neighbor processors
+    !> Reduces forces and torques across the local neighborhood without a global allreduce. Accumulation phase: 2 passes per
+    !! dimension receiving from the low-index (-X) neighbor. Pass 1: add received values; save what was received as recv_snap. Pass
+    !! 2: send current (post-pass-1) values; add received; subtract recv_snap to remove double-counting of the direct contribution
+    !! already added in pass 1. Back-propagation phase: 2 passes per dimension receiving from the high-index (+X) neighbor, each
+    !! overwriting local forces with the neighbor's accumulated total.
+    subroutine s_communicate_ib_forces(forces, torques)
+
+        real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
+
+#ifdef MFC_MPI
+        integer                       :: i, j, pack_pos, unpack_pos, buf_size, ierr
+        integer                       :: send_neighbor, recv_neighbor, recv_count, pid
+        real(wp), dimension(3)        :: fval, tval
+        real(wp), allocatable         :: recv_forces_snap(:,:), recv_torques_snap(:,:)
+        character(len=1), allocatable :: send_buf(:), recv_buf(:)
+
+        if (num_procs == 1) return
+
+        buf_size = storage_size(0)/8 + (storage_size(0)/8 + 6*storage_size(0._wp)/8)*size(patch_ib)
+        allocate (send_buf(buf_size), recv_buf(buf_size), recv_forces_snap(num_ibs, 3), recv_torques_snap(num_ibs, 3))
+
+        ! Accumulation phase: propagate contributions toward the high-index corner.
+        #:for X, ID, TAG1, TAG2 in [('x', 1, 300, 302), ('y', 2, 304, 306), ('z', 3, 308, 310)]
+            if (num_dims >= ${ID}$) then
+                send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0)
+                recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0)
+
+                ! Pass 1: send current forces to +${X}$ neighbor; receive from -${X}$ neighbor and add. Save what was received as
+                ! recv_snap for double-count removal in pass 2.
+                recv_forces_snap = 0._wp
+                recv_torques_snap = 0._wp
+                pack_pos = 0
+                call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                do i = 1, num_ibs
+                    call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                    fval(:) = forces(i,:); tval(:) = torques(i,:)
+                    call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                    call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                end do
+                call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG1}$, recv_buf, buf_size, MPI_PACKED, &
+                                  & recv_neighbor, ${TAG1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                if (recv_neighbor /= MPI_PROC_NULL) then
+                    unpack_pos = 0
+                    call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                    do i = 1, recv_count
+                        call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                        call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        do j = 1, num_ibs
+                            if (patch_ib(j)%gbl_patch_id == pid) then
+                                recv_forces_snap(j,:) = fval(:)
+                                recv_torques_snap(j,:) = tval(:)
+                                forces(j,:) = forces(j,:) + fval(:)
+                                torques(j,:) = torques(j,:) + tval(:)
+                                exit
+                            end if
+                        end do
+                    end do
+                end if
+
+                ! Pass 2: send post-pass-1 forces to +${X}$ neighbor; receive from -${X}$ neighbor. Add received values then
+                ! subtract recv_snap to remove the pass-1 contribution that was already counted, leaving only the 2-hop delta.
+                pack_pos = 0
+                call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                do i = 1, num_ibs
+                    call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                    fval(:) = forces(i,:); tval(:) = torques(i,:)
+                    call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                    call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                end do
+                call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG2}$, recv_buf, buf_size, MPI_PACKED, &
+                                  & recv_neighbor, ${TAG2}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                if (recv_neighbor /= MPI_PROC_NULL) then
+                    unpack_pos = 0
+                    call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                    do i = 1, recv_count
+                        call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                        call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                        do j = 1, num_ibs
+                            if (patch_ib(j)%gbl_patch_id == pid) then
+                                forces(j,:) = forces(j,:) + fval(:) - recv_forces_snap(j,:)
+                                torques(j,:) = torques(j,:) + tval(:) - recv_torques_snap(j,:)
+                                exit
+                            end if
+                        end do
+                    end do
+                end if
+            end if
+        #:endfor
+
+        ! Back-propagation phase: for each dimension, 2 passes receiving from the high-index neighbor. Each pass overwrites local
+        ! forces with the neighbor's accumulated total. Two passes ensure the total reaches 2 hops back, covering the full
+        ! neighborhood.
+        #:for X, ID, TAG1, TAG2 in [('x', 1, 312, 314), ('y', 2, 316, 318), ('z', 3, 320, 322)]
+            if (num_dims >= ${ID}$) then
+                send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0)
+                recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0)
+
+                #:for TAG in [TAG1, TAG2]
+                    pack_pos = 0
+                    call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                    do i = 1, num_ibs
+                        call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                        fval(:) = forces(i,:); tval(:) = torques(i,:)
+                        call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                        call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                    end do
+                    call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG}$, recv_buf, buf_size, MPI_PACKED, &
+                                      & recv_neighbor, ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
+                    if (recv_neighbor /= MPI_PROC_NULL) then
+                        unpack_pos = 0
+                        call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                        do i = 1, recv_count
+                            call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                            call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                            call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                            do j = 1, num_ibs
+                                if (patch_ib(j)%gbl_patch_id == pid) then
+                                    forces(j,:) = fval(:)
+                                    torques(j,:) = tval(:)
+                                    exit
+                                end if
+                            end do
+                        end do
+                    end if
+                #:endfor
+            end if
+        #:endfor
+
+        deallocate (send_buf, recv_buf, recv_forces_snap, recv_torques_snap)
+#endif
+
+    end subroutine s_communicate_ib_forces
+
     subroutine s_handoff_ib_ownership()
 
-        integer, dimension(num_ibs) :: communication_directions
-        integer                     :: i, ib_idx
-        real(wp)                    :: position
+        integer                               :: i, j, k, output_idx, local_output_idx
+        integer                               :: old_num_local_ibs
+        integer                               :: new_count, recv_count
+        integer                               :: pack_pos, unpack_pos, buf_size, patch_bytes
+        integer                               :: send_neighbor, recv_neighbor, ierr
+        integer                               :: dx, dy, dz, tag, nbr_idx, nreqs
+        integer, dimension(3)                 :: nbr_coords
+        real(wp)                              :: position
+        real(wp), dimension(3)                :: centroid
+        logical                               :: is_new, already_known
+        type(ib_patch_parameters)             :: tmp_patch
+        integer, dimension(num_local_ibs_max) :: local_ib_idx_old
+        ! 26 neighbors max in 3D; each gets its own recv buffer and a request handle for send + recv
+        integer, parameter             :: max_nbrs = 26
+        character(len=1), allocatable  :: send_buf(:), recv_bufs(:,:)
+        integer, dimension(2*max_nbrs) :: requests
+        integer, dimension(max_nbrs)   :: recv_neighbor_list
 
         #:if defined('MFC_MPI')
             if (num_procs > 1) then
-                communication_directions = 0
-
-                ! identify particles that have left the local domain and log the direction of communication
+                ! save a copy of the local IB's global indices to cross-reference for later.
+                old_num_local_ibs = num_local_ibs
                 do i = 1, num_local_ibs
-                    ib_idx = local_ib_patch_ids(i)
+                    local_ib_idx_old(i) = patch_ib(local_ib_patch_ids(i))%gbl_patch_id
+                end do
+
+                ! delete any particles that no longer need to be tracked and coalesce the array
+                output_idx = 0
+                local_output_idx = 0
+                do i = 1, num_ibs
                     #:for X, ID in [('x', 1), ('y', 2), ('z', 3)]
-                        if (num_dims >= ${ID}$) then
-                            position = patch_ib(ib_idx)%${X}$_centroid
-                            if (bc_${X}$%beg < 0 .and. bc_${X}$%beg /= BC_PERIODIC) then
-                                ! if it is outside the domain in one direction, project it somewhere inside so at least one rank
-                                ! owns it
-                                if (position < ${X}$_domain%beg) then
-                                    position = ${X}$_domain%beg
-                                else if (${X}$_domain%end < position) then
-                                    position = ${X}$_domain%end - 1.0e-10_wp
-                                end if
-                            end if
+                        if (patch_ib(i)%${X}$_centroid < neighbor_domain_${X}$%beg .or. neighbor_domain_${X}$%end < patch_ib(i) &
+                            & %${X}$_centroid) then
+                            cycle
+                        end if
+                    #:endfor
+
+                    output_idx = output_idx + 1
+                    if (i /= output_idx) patch_ib(output_idx) = patch_ib(i)
+                    centroid = [patch_ib(i)%x_centroid, patch_ib(i)%y_centroid, 0._wp]
+                    if (num_dims == 3) centroid(3) = patch_ib(i)%z_centroid
+                    if (f_local_rank_owns_collision(centroid)) then
+                        local_output_idx = local_output_idx + 1
+                        local_ib_patch_ids(local_output_idx) = output_idx
+                    end if
+                end do
+                num_ibs = output_idx
+                num_local_ibs = local_output_idx
+
+                ! Broadcast newly-owned patches to all neighborhood neighbors (including corners/edges).
+                patch_bytes = storage_size(tmp_patch)/8
+                buf_size = storage_size(0)/8 + patch_bytes*num_local_ibs_max
+                allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs))
+
+                ! Write placeholder count at position 0
+                pack_pos = 0
+                call MPI_PACK(0, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
 
-                            if (position < ${X}$_domain%beg) then
-                                communication_directions(i) = -${ID}$
-                            else if (${X}$_domain%end < position) then
-                                communication_directions(i) = ${ID}$
+                ! Single pass: pack new patches and count them
+                new_count = 0
+                do i = 1, num_local_ibs
+                    k = local_ib_patch_ids(i)
+                    is_new = .true.
+                    do j = 1, old_num_local_ibs
+                        if (patch_ib(k)%gbl_patch_id == local_ib_idx_old(j)) then
+                            is_new = .false.
+                            exit
+                        end if
+                    end do
+                    if (is_new) then
+                        call MPI_PACK(patch_ib(k), patch_bytes, MPI_BYTE, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                        new_count = new_count + 1
+                    end if
+                end do
+
+                ! Overwrite the placeholder with the real count
+                pack_pos = 0
+                call MPI_PACK(new_count, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+                pack_pos = storage_size(0)/8 + new_count*patch_bytes
+
+                ! Post all receives first, then all sends, so they are all in flight together. Tags 200..226: tag = 200 + (dx+1)*9 +
+                ! (dy+1)*3 + (dz+1)
+                nreqs = 0
+                nbr_idx = 0
+                do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+                    do dy = -1, 1
+                        do dx = -1, 1
+                            if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+                            nbr_idx = nbr_idx + 1
+                            tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+                            ! Receive from the mirror direction
+                            nbr_coords = proc_coords - [dx, dy, dz]
+                            call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
+                            if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
+                            recv_neighbor_list(nbr_idx) = recv_neighbor
+
+                            nreqs = nreqs + 1
+                            call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
+                                           & requests(nreqs), ierr)
+                        end do
+                    end do
+                end do
+
+                do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+                    do dy = -1, 1
+                        do dx = -1, 1
+                            if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+                            tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+                            nbr_coords = proc_coords + [dx, dy, dz]
+                            call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
+                            if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
+
+                            nreqs = nreqs + 1
+                            call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), &
+                                           & ierr)
+                        end do
+                    end do
+                end do
+
+                call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
+
+                ! Unpack all received buffers
+                do nbr_idx = 1, merge(26, 8, num_dims == 3)
+                    if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
+                    unpack_pos = 0
+                    call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                    do i = 1, recv_count
+                        call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tmp_patch, patch_bytes, MPI_BYTE, &
+                                        & MPI_COMM_WORLD, ierr)
+                        already_known = .false.
+                        do j = 1, num_ibs
+                            if (patch_ib(j)%gbl_patch_id == tmp_patch%gbl_patch_id) then
+                                already_known = .true.
+                                exit
                             end if
+                        end do
+                        if (.not. already_known) then
+                            num_ibs = num_ibs + 1
+                            @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in neighborhood handoff')
+                            patch_ib(num_ibs) = tmp_patch
                         end if
-                    #:endfor
+                    end do
                 end do
 
-                #:for X, DIM in [('x', '1'), ('y', '2'), ('z', '3')]
-                #:endfor
+                deallocate (send_buf, recv_bufs)
             end if
         #:endif
 
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 9e701a1887..32e6005882 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -566,8 +566,8 @@ contains
 
         !
         if (ib) then
-            if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs()
-            call send_updated_ib_list()
+            if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs()  ! wraps the positions of IBs to the local proc
+            call s_handoff_ib_ownership()  ! recomputes which ranks own which IBs and communicate to neighbors
             if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then
                 call s_compute_ib_forces(q_prim_vf, fluid_pp)
             end if

From ee0bc0cfc10a0b4c39660c2920613d23c38efb65 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Thu, 23 Apr 2026 16:01:50 -0400
Subject: [PATCH 08/15] Compilation errors resolved

---
 src/simulation/m_global_parameters.fpp | 2 +-
 src/simulation/m_start_up.fpp          | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp
index 581a418aba..0a63c0c496 100644
--- a/src/simulation/m_global_parameters.fpp
+++ b/src/simulation/m_global_parameters.fpp
@@ -786,7 +786,7 @@ contains
 
         allocate (patch_ib(num_ib_patches_max))
         do i = 1, num_ib_patches_max
-            patch_ib(i)%patch_id = i
+            patch_ib(i)%gbl_patch_id = i
             patch_ib(i)%geometry = dflt_int
             patch_ib(i)%x_centroid = 0._wp
             patch_ib(i)%y_centroid = 0._wp
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 15620476e2..6cf36ca9ad 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1214,7 +1214,7 @@ contains
 
 #ifdef MFC_MPI
         ! fallback for 1-rank case
-        if (num_proc == 1) then
+        if (num_procs == 1) then
             patch_ib(:) = patch_ib_gbl(1:num_aware_ibs)
         else
             ! determine the set of patches owned by local rank
@@ -1256,7 +1256,11 @@ contains
 
     subroutine get_neighbor_bounds()
 
+        real(wp) :: send_val, recv_val
+        integer  :: send_neighbor, recv_neighbor, ierr
+
         ! Default: no neighbor in any direction
+
         neighbor_domain_x%beg = -huge(0._wp)
         neighbor_domain_x%end = huge(0._wp)
         neighbor_domain_y%beg = -huge(0._wp)
@@ -1265,9 +1269,6 @@ contains
         neighbor_domain_z%end = huge(0._wp)
 
 #ifdef MFC_MPI
-        real(wp) :: send_val, recv_val
-        integer  :: send_neighbor, recv_neighbor, ierr
-
         #:for X, ID, TAG, DIM in [('x', 1, 100, 'm'), ('y', 2, 102, 'n'), ('z', 3, 104, 'p')]
             if (num_dims >= ${ID}$) then
                 ! Step 1: broadcast left edge (-1 face) rightward; receive left neighbor's left edge -> neighbor_domain_${X}$%beg

From 8303cea4d8bea10ff3caae5a9fabcb4f4f18ef22 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Fri, 24 Apr 2026 08:31:49 -0400
Subject: [PATCH 09/15] Resolved out of bounds error

---
 src/simulation/m_start_up.fpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 6cf36ca9ad..96e96bdb4f 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1204,9 +1204,9 @@ contains
 
         deallocate (patch_ib)
         if (num_dims == 3) then
-            num_aware_ibs = num_local_ibs_max*27
+            num_aware_ibs = min(num_local_ibs_max*27, num_ib_patches_max)
         else
-            num_aware_ibs = num_local_ibs_max*9
+            num_aware_ibs = min(num_local_ibs_max*9, num_ib_patches_max)
         end if
         allocate (patch_ib(num_aware_ibs))
 

From 1c1801c3346d256c357ab4893ee6e70792e29556 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Fri, 24 Apr 2026 09:43:04 -0400
Subject: [PATCH 10/15] added send test algorithm for alternative MPI
 communication

---
 src/simulation/m_ibm.fpp | 197 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 197 insertions(+)

diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 14b409d6d5..1da48e17f1 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -1358,6 +1358,203 @@ contains
 
     end subroutine s_communicate_ib_forces
 
+    !> Alternative force reduction using two non-blocking all-to-neighbor broadcasts. Phase 1: every rank sends its full force array
+    !! to all 26 neighborhood neighbors simultaneously. After MPI_WAITALL, each rank sums contributions from neighbors for its owned
+    !! particles. Phase 2: each rank sends its finalized owned-particle forces (by gbl_patch_id) back to all neighbors
+    !! simultaneously. After MPI_WAITALL, each rank overwrites ghost-particle forces with the authoritative values from the owning
+    !! rank. Not currently called - available for benchmarking against s_communicate_ib_forces.
+    subroutine s_communicate_ib_forces_scatter(forces, torques)
+
+        real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
+
+#ifdef MFC_MPI
+        integer, parameter              :: max_nbrs = 26
+        integer                         :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos
+        integer                         :: buf_size, entry_bytes, ierr, recv_count, pid
+        integer                         :: send_neighbor, recv_neighbor, dx, dy, dz, tag
+        integer, dimension(3)           :: nbr_coords
+        logical                         :: is_owned
+        real(wp), dimension(3)          :: fval, tval
+        real(wp), dimension(num_ibs, 3) :: forces_total, torques_total
+        integer, dimension(max_nbrs)    :: recv_neighbor_list
+        integer, dimension(2*max_nbrs)  :: requests
+        character(len=1), allocatable   :: send_buf(:), recv_bufs(:,:)
+        character(len=1), allocatable   :: owned_send_buf(:), owned_recv_bufs(:,:)
+        integer                         :: owned_buf_size
+
+        if (num_procs == 1) return
+
+        ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle
+        entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8
+        buf_size = storage_size(0)/8 + entry_bytes*num_ibs
+        owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max
+        allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), &
+                  & owned_recv_bufs(owned_buf_size, max_nbrs))
+
+        ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors.
+        pack_pos = 0
+        call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+        do i = 1, num_ibs
+            call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+            fval(:) = forces(i,:); tval(:) = torques(i,:)
+            call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+            call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+        end do
+
+        nreqs = 0
+        nbr_idx = 0
+        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+            do dy = -1, 1
+                do dx = -1, 1
+                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+                    nbr_idx = nbr_idx + 1
+                    tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+                    nbr_coords = proc_coords - [dx, dy, dz]
+                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
+                    if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
+                    recv_neighbor_list(nbr_idx) = recv_neighbor
+
+                    nreqs = nreqs + 1
+                    call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
+                                   & requests(nreqs), ierr)
+                end do
+            end do
+        end do
+
+        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+            do dy = -1, 1
+                do dx = -1, 1
+                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+                    tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+                    nbr_coords = proc_coords + [dx, dy, dz]
+                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
+                    if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
+
+                    nreqs = nreqs + 1
+                    call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
+                end do
+            end do
+        end do
+
+        call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
+
+        ! Local reduction: for each owned particle, sum contributions from all neighbors.
+        forces_total = forces
+        torques_total = torques
+        do nbr_idx = 1, merge(26, 8, num_dims == 3)
+            if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
+            unpack_pos = 0
+            call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+            do i = 1, recv_count
+                call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                ! Only accumulate for particles this rank owns
+                do k = 1, num_local_ibs
+                    j = local_ib_patch_ids(k)
+                    if (patch_ib(j)%gbl_patch_id == pid) then
+                        forces_total(j,:) = forces_total(j,:) + fval(:)
+                        torques_total(j,:) = torques_total(j,:) + tval(:)
+                        exit
+                    end if
+                end do
+            end do
+        end do
+
+        ! Write totals back for owned particles only
+        do k = 1, num_local_ibs
+            j = local_ib_patch_ids(k)
+            forces(j,:) = forces_total(j,:)
+            torques(j,:) = torques_total(j,:)
+        end do
+
+        ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors.
+        pack_pos = 0
+        call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+        do k = 1, num_local_ibs
+            j = local_ib_patch_ids(k)
+            call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+            fval(:) = forces(j,:); tval(:) = torques(j,:)
+            call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+            call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+        end do
+
+        nreqs = 0
+        nbr_idx = 0
+        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+            do dy = -1, 1
+                do dx = -1, 1
+                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+                    nbr_idx = nbr_idx + 1
+                    tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+                    nbr_coords = proc_coords - [dx, dy, dz]
+                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
+                    if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
+                    recv_neighbor_list(nbr_idx) = recv_neighbor
+
+                    nreqs = nreqs + 1
+                    call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
+                                   & requests(nreqs), ierr)
+                end do
+            end do
+        end do
+
+        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+            do dy = -1, 1
+                do dx = -1, 1
+                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+                    tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+                    nbr_coords = proc_coords + [dx, dy, dz]
+                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
+                    if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
+
+                    nreqs = nreqs + 1
+                    call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
+                end do
+            end do
+        end do
+
+        call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
+
+        ! Overwrite ghost-particle forces with authoritative values from the owning rank.
+        do nbr_idx = 1, merge(26, 8, num_dims == 3)
+            if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
+            unpack_pos = 0
+            call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, &
+                            & ierr)
+            do i = 1, recv_count
+                call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+                call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+                ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative)
+                do j = 1, num_ibs
+                    if (patch_ib(j)%gbl_patch_id == pid) then
+                        is_owned = .false.
+                        do k = 1, num_local_ibs
+                            if (local_ib_patch_ids(k) == j) then
+                                is_owned = .true.
+                                exit
+                            end if
+                        end do
+                        if (.not. is_owned) then
+                            forces(j,:) = fval(:)
+                            torques(j,:) = tval(:)
+                        end if
+                        exit
+                    end if
+                end do
+            end do
+        end do
+
+        deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs)
+#endif
+
+    end subroutine s_communicate_ib_forces_scatter
+
     subroutine s_handoff_ib_ownership()
 
         integer                               :: i, j, k, output_idx, local_output_idx

From f014ca9c42a98f5a7bc617cd1a6be225cc1349e1 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Fri, 24 Apr 2026 12:31:41 -0400
Subject: [PATCH 11/15] Fixed early segfault due to uninitialized IB patch
 array

---
 src/simulation/m_ibm.fpp      | 382 +++++++++++++++++-----------------
 src/simulation/m_start_up.fpp |   3 +
 2 files changed, 194 insertions(+), 191 deletions(-)

diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 1da48e17f1..4bb008a2eb 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -1363,197 +1363,197 @@ contains
     !! particles. Phase 2: each rank sends its finalized owned-particle forces (by gbl_patch_id) back to all neighbors
     !! simultaneously. After MPI_WAITALL, each rank overwrites ghost-particle forces with the authoritative values from the owning
     !! rank. Not currently called - available for benchmarking against s_communicate_ib_forces.
-    subroutine s_communicate_ib_forces_scatter(forces, torques)
-
-        real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
-
-#ifdef MFC_MPI
-        integer, parameter              :: max_nbrs = 26
-        integer                         :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos
-        integer                         :: buf_size, entry_bytes, ierr, recv_count, pid
-        integer                         :: send_neighbor, recv_neighbor, dx, dy, dz, tag
-        integer, dimension(3)           :: nbr_coords
-        logical                         :: is_owned
-        real(wp), dimension(3)          :: fval, tval
-        real(wp), dimension(num_ibs, 3) :: forces_total, torques_total
-        integer, dimension(max_nbrs)    :: recv_neighbor_list
-        integer, dimension(2*max_nbrs)  :: requests
-        character(len=1), allocatable   :: send_buf(:), recv_bufs(:,:)
-        character(len=1), allocatable   :: owned_send_buf(:), owned_recv_bufs(:,:)
-        integer                         :: owned_buf_size
-
-        if (num_procs == 1) return
-
-        ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle
-        entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8
-        buf_size = storage_size(0)/8 + entry_bytes*num_ibs
-        owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max
-        allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), &
-                  & owned_recv_bufs(owned_buf_size, max_nbrs))
-
-        ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors.
-        pack_pos = 0
-        call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-        do i = 1, num_ibs
-            call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-            fval(:) = forces(i,:); tval(:) = torques(i,:)
-            call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-            call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-        end do
-
-        nreqs = 0
-        nbr_idx = 0
-        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-            do dy = -1, 1
-                do dx = -1, 1
-                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-                    nbr_idx = nbr_idx + 1
-                    tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-                    nbr_coords = proc_coords - [dx, dy, dz]
-                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
-                    if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
-                    recv_neighbor_list(nbr_idx) = recv_neighbor
-
-                    nreqs = nreqs + 1
-                    call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
-                                   & requests(nreqs), ierr)
-                end do
-            end do
-        end do
-
-        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-            do dy = -1, 1
-                do dx = -1, 1
-                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-                    tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-                    nbr_coords = proc_coords + [dx, dy, dz]
-                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
-                    if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
-
-                    nreqs = nreqs + 1
-                    call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
-                end do
-            end do
-        end do
-
-        call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
-
-        ! Local reduction: for each owned particle, sum contributions from all neighbors.
-        forces_total = forces
-        torques_total = torques
-        do nbr_idx = 1, merge(26, 8, num_dims == 3)
-            if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
-            unpack_pos = 0
-            call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-            do i = 1, recv_count
-                call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-                call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-                call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-                ! Only accumulate for particles this rank owns
-                do k = 1, num_local_ibs
-                    j = local_ib_patch_ids(k)
-                    if (patch_ib(j)%gbl_patch_id == pid) then
-                        forces_total(j,:) = forces_total(j,:) + fval(:)
-                        torques_total(j,:) = torques_total(j,:) + tval(:)
-                        exit
-                    end if
-                end do
-            end do
-        end do
-
-        ! Write totals back for owned particles only
-        do k = 1, num_local_ibs
-            j = local_ib_patch_ids(k)
-            forces(j,:) = forces_total(j,:)
-            torques(j,:) = torques_total(j,:)
-        end do
-
-        ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors.
-        pack_pos = 0
-        call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-        do k = 1, num_local_ibs
-            j = local_ib_patch_ids(k)
-            call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-            fval(:) = forces(j,:); tval(:) = torques(j,:)
-            call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-            call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-        end do
-
-        nreqs = 0
-        nbr_idx = 0
-        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-            do dy = -1, 1
-                do dx = -1, 1
-                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-                    nbr_idx = nbr_idx + 1
-                    tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-                    nbr_coords = proc_coords - [dx, dy, dz]
-                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
-                    if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
-                    recv_neighbor_list(nbr_idx) = recv_neighbor
-
-                    nreqs = nreqs + 1
-                    call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
-                                   & requests(nreqs), ierr)
-                end do
-            end do
-        end do
-
-        do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-            do dy = -1, 1
-                do dx = -1, 1
-                    if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-                    tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-                    nbr_coords = proc_coords + [dx, dy, dz]
-                    call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
-                    if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
-
-                    nreqs = nreqs + 1
-                    call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
-                end do
-            end do
-        end do
-
-        call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
-
-        ! Overwrite ghost-particle forces with authoritative values from the owning rank.
-        do nbr_idx = 1, merge(26, 8, num_dims == 3)
-            if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
-            unpack_pos = 0
-            call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, &
-                            & ierr)
-            do i = 1, recv_count
-                call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-                call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-                call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-                ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative)
-                do j = 1, num_ibs
-                    if (patch_ib(j)%gbl_patch_id == pid) then
-                        is_owned = .false.
-                        do k = 1, num_local_ibs
-                            if (local_ib_patch_ids(k) == j) then
-                                is_owned = .true.
-                                exit
-                            end if
-                        end do
-                        if (.not. is_owned) then
-                            forces(j,:) = fval(:)
-                            torques(j,:) = tval(:)
-                        end if
-                        exit
-                    end if
-                end do
-            end do
-        end do
-
-        deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs)
-#endif
-
-    end subroutine s_communicate_ib_forces_scatter
+!     subroutine s_communicate_ib_forces_scatter(forces, torques)
+
+!         real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
+
+! #ifdef MFC_MPI
+!         integer, parameter              :: max_nbrs = 26
+!         integer                         :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos
+!         integer                         :: buf_size, entry_bytes, ierr, recv_count, pid
+!         integer                         :: send_neighbor, recv_neighbor, dx, dy, dz, tag
+!         integer, dimension(3)           :: nbr_coords
+!         logical                         :: is_owned
+!         real(wp), dimension(3)          :: fval, tval
+!         real(wp), dimension(num_ibs, 3) :: forces_total, torques_total
+!         integer, dimension(max_nbrs)    :: recv_neighbor_list
+!         integer, dimension(2*max_nbrs)  :: requests
+!         character(len=1), allocatable   :: send_buf(:), recv_bufs(:,:)
+!         character(len=1), allocatable   :: owned_send_buf(:), owned_recv_bufs(:,:)
+!         integer                         :: owned_buf_size
+
+!         if (num_procs == 1) return
+
+!         ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle
+!         entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8
+!         buf_size = storage_size(0)/8 + entry_bytes*num_ibs
+!         owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max
+!         allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), &
+!                   & owned_recv_bufs(owned_buf_size, max_nbrs))
+
+!         ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors.
+!         pack_pos = 0
+!         call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!         do i = 1, num_ibs
+!             call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!             fval(:) = forces(i,:); tval(:) = torques(i,:)
+!             call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!             call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!         end do
+
+!         nreqs = 0
+!         nbr_idx = 0
+!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+!             do dy = -1, 1
+!                 do dx = -1, 1
+!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+!                     nbr_idx = nbr_idx + 1
+!                     tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+!                     nbr_coords = proc_coords - [dx, dy, dz]
+!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
+!                     if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
+!                     recv_neighbor_list(nbr_idx) = recv_neighbor
+
+!                     nreqs = nreqs + 1
+!                     call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
+!                                    & requests(nreqs), ierr)
+!                 end do
+!             end do
+!         end do
+
+!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+!             do dy = -1, 1
+!                 do dx = -1, 1
+!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+!                     tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+!                     nbr_coords = proc_coords + [dx, dy, dz]
+!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
+!                     if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
+
+!                     nreqs = nreqs + 1
+!                     call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
+!                 end do
+!             end do
+!         end do
+
+!         call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
+
+!         ! Local reduction: for each owned particle, sum contributions from all neighbors.
+!         forces_total = forces
+!         torques_total = torques
+!         do nbr_idx = 1, merge(26, 8, num_dims == 3)
+!             if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
+!             unpack_pos = 0
+!             call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+!             do i = 1, recv_count
+!                 call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+!                 call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+!                 call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+!                 ! Only accumulate for particles this rank owns
+!                 do k = 1, num_local_ibs
+!                     j = local_ib_patch_ids(k)
+!                     if (patch_ib(j)%gbl_patch_id == pid) then
+!                         forces_total(j,:) = forces_total(j,:) + fval(:)
+!                         torques_total(j,:) = torques_total(j,:) + tval(:)
+!                         exit
+!                     end if
+!                 end do
+!             end do
+!         end do
+
+!         ! Write totals back for owned particles only
+!         do k = 1, num_local_ibs
+!             j = local_ib_patch_ids(k)
+!             forces(j,:) = forces_total(j,:)
+!             torques(j,:) = torques_total(j,:)
+!         end do
+
+!         ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors.
+!         pack_pos = 0
+!         call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!         do k = 1, num_local_ibs
+!             j = local_ib_patch_ids(k)
+!             call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!             fval(:) = forces(j,:); tval(:) = torques(j,:)
+!             call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!             call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
+!         end do
+
+!         nreqs = 0
+!         nbr_idx = 0
+!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+!             do dy = -1, 1
+!                 do dx = -1, 1
+!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+!                     nbr_idx = nbr_idx + 1
+!                     tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+!                     nbr_coords = proc_coords - [dx, dy, dz]
+!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
+!                     if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
+!                     recv_neighbor_list(nbr_idx) = recv_neighbor
+
+!                     nreqs = nreqs + 1
+!                     call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
+!                                    & requests(nreqs), ierr)
+!                 end do
+!             end do
+!         end do
+
+!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
+!             do dy = -1, 1
+!                 do dx = -1, 1
+!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
+!                     tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+!                     nbr_coords = proc_coords + [dx, dy, dz]
+!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
+!                     if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
+
+!                     nreqs = nreqs + 1
+!                     call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
+!                 end do
+!             end do
+!         end do
+
+!         call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
+
+!         ! Overwrite ghost-particle forces with authoritative values from the owning rank.
+!         do nbr_idx = 1, merge(26, 8, num_dims == 3)
+!             if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
+!             unpack_pos = 0
+!             call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, &
+!                             & ierr)
+!             do i = 1, recv_count
+!                 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
+!                 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+!                 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
+!                 ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative)
+!                 do j = 1, num_ibs
+!                     if (patch_ib(j)%gbl_patch_id == pid) then
+!                         is_owned = .false.
+!                         do k = 1, num_local_ibs
+!                             if (local_ib_patch_ids(k) == j) then
+!                                 is_owned = .true.
+!                                 exit
+!                             end if
+!                         end do
+!                         if (.not. is_owned) then
+!                             forces(j,:) = fval(:)
+!                             torques(j,:) = tval(:)
+!                         end if
+!                         exit
+!                     end if
+!                 end do
+!             end do
+!         end do
+
+!         deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs)
+! #endif
+
+!     end subroutine s_communicate_ib_forces_scatter
 
     subroutine s_handoff_ib_ownership()
 
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 96e96bdb4f..e655728cd8 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1012,6 +1012,8 @@ contains
 #else
             "on CPUs"
 #endif
+        else 
+            allocate (patch_ib(num_ib_patches_max))
         end if
 
         call s_mpi_bcast_user_inputs()
@@ -1200,6 +1202,7 @@ contains
         logical                                                  :: is_in_neighborhood, is_local
 
         patch_ib_gbl(:) = patch_ib(:)
+        print *, "Starting"
         call get_neighbor_bounds()  ! make sure the bounds of the neighbors are correctly set up
 
         deallocate (patch_ib)

From d36fe0bbc319a684c0c29b14f7327da18020232f Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Fri, 24 Apr 2026 13:09:40 -0400
Subject: [PATCH 12/15] Debugged rank ownership bug and invalid number of
 global IBs

---
 src/common/m_model.fpp        |   3 +-
 src/simulation/m_ibm.fpp      | 287 ++++++++++++----------------------
 src/simulation/m_start_up.fpp |   9 +-
 3 files changed, 102 insertions(+), 197 deletions(-)

diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp
index 0dab036f9b..f02474f8d0 100644
--- a/src/common/m_model.fpp
+++ b/src/common/m_model.fpp
@@ -983,10 +983,11 @@ contains
         dx_local = minval(dx); dy_local = minval(dy)
         if (p /= 0) dz_local = minval(dz)
 
-        allocate (stl_bounding_boxes(num_ibs,1:3,1:3))
+        allocate (stl_bounding_boxes(num_gbl_ibs,1:3,1:3))
 
         do patch_id = 1, num_ibs
             if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then
+                print *, proc_rank, patch_id, num_ibs, patch_ib(patch_id)%geometry
                 allocate (models(patch_id)%model)
                 print *, " * Reading model: " // trim(patch_ib(patch_id)%model_filepath)
 
diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp
index 4bb008a2eb..0348668835 100644
--- a/src/simulation/m_ibm.fpp
+++ b/src/simulation/m_ibm.fpp
@@ -1363,197 +1363,102 @@ contains
     !! particles. Phase 2: each rank sends its finalized owned-particle forces (by gbl_patch_id) back to all neighbors
     !! simultaneously. After MPI_WAITALL, each rank overwrites ghost-particle forces with the authoritative values from the owning
     !! rank. Not currently called - available for benchmarking against s_communicate_ib_forces.
-!     subroutine s_communicate_ib_forces_scatter(forces, torques)
-
-!         real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
-
-! #ifdef MFC_MPI
-!         integer, parameter              :: max_nbrs = 26
-!         integer                         :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos
-!         integer                         :: buf_size, entry_bytes, ierr, recv_count, pid
-!         integer                         :: send_neighbor, recv_neighbor, dx, dy, dz, tag
-!         integer, dimension(3)           :: nbr_coords
-!         logical                         :: is_owned
-!         real(wp), dimension(3)          :: fval, tval
-!         real(wp), dimension(num_ibs, 3) :: forces_total, torques_total
-!         integer, dimension(max_nbrs)    :: recv_neighbor_list
-!         integer, dimension(2*max_nbrs)  :: requests
-!         character(len=1), allocatable   :: send_buf(:), recv_bufs(:,:)
-!         character(len=1), allocatable   :: owned_send_buf(:), owned_recv_bufs(:,:)
-!         integer                         :: owned_buf_size
-
-!         if (num_procs == 1) return
-
-!         ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle
-!         entry_bytes = storage_size(0)/8 + 6*storage_size(0._wp)/8
-!         buf_size = storage_size(0)/8 + entry_bytes*num_ibs
-!         owned_buf_size = storage_size(0)/8 + entry_bytes*num_local_ibs_max
-!         allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), &
-!                   & owned_recv_bufs(owned_buf_size, max_nbrs))
-
-!         ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors.
-!         pack_pos = 0
-!         call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!         do i = 1, num_ibs
-!             call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!             fval(:) = forces(i,:); tval(:) = torques(i,:)
-!             call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!             call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!         end do
-
-!         nreqs = 0
-!         nbr_idx = 0
-!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-!             do dy = -1, 1
-!                 do dx = -1, 1
-!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-!                     nbr_idx = nbr_idx + 1
-!                     tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-!                     nbr_coords = proc_coords - [dx, dy, dz]
-!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
-!                     if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
-!                     recv_neighbor_list(nbr_idx) = recv_neighbor
-
-!                     nreqs = nreqs + 1
-!                     call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
-!                                    & requests(nreqs), ierr)
-!                 end do
-!             end do
-!         end do
-
-!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-!             do dy = -1, 1
-!                 do dx = -1, 1
-!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-!                     tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-!                     nbr_coords = proc_coords + [dx, dy, dz]
-!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
-!                     if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
-
-!                     nreqs = nreqs + 1
-!                     call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
-!                 end do
-!             end do
-!         end do
-
-!         call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
-
-!         ! Local reduction: for each owned particle, sum contributions from all neighbors.
-!         forces_total = forces
-!         torques_total = torques
-!         do nbr_idx = 1, merge(26, 8, num_dims == 3)
-!             if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
-!             unpack_pos = 0
-!             call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-!             do i = 1, recv_count
-!                 call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-!                 call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-!                 call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-!                 ! Only accumulate for particles this rank owns
-!                 do k = 1, num_local_ibs
-!                     j = local_ib_patch_ids(k)
-!                     if (patch_ib(j)%gbl_patch_id == pid) then
-!                         forces_total(j,:) = forces_total(j,:) + fval(:)
-!                         torques_total(j,:) = torques_total(j,:) + tval(:)
-!                         exit
-!                     end if
-!                 end do
-!             end do
-!         end do
-
-!         ! Write totals back for owned particles only
-!         do k = 1, num_local_ibs
-!             j = local_ib_patch_ids(k)
-!             forces(j,:) = forces_total(j,:)
-!             torques(j,:) = torques_total(j,:)
-!         end do
-
-!         ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors.
-!         pack_pos = 0
-!         call MPI_PACK(num_local_ibs, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!         do k = 1, num_local_ibs
-!             j = local_ib_patch_ids(k)
-!             call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!             fval(:) = forces(j,:); tval(:) = torques(j,:)
-!             call MPI_PACK(fval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!             call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr)
-!         end do
-
-!         nreqs = 0
-!         nbr_idx = 0
-!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-!             do dy = -1, 1
-!                 do dx = -1, 1
-!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-!                     nbr_idx = nbr_idx + 1
-!                     tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-!                     nbr_coords = proc_coords - [dx, dy, dz]
-!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr)
-!                     if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL
-!                     recv_neighbor_list(nbr_idx) = recv_neighbor
-
-!                     nreqs = nreqs + 1
-!                     call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
-!                                    & requests(nreqs), ierr)
-!                 end do
-!             end do
-!         end do
-
-!         do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3)
-!             do dy = -1, 1
-!                 do dx = -1, 1
-!                     if (dx == 0 .and. dy == 0 .and. dz == 0) cycle
-!                     tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
-
-!                     nbr_coords = proc_coords + [dx, dy, dz]
-!                     call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr)
-!                     if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
-
-!                     nreqs = nreqs + 1
-!                     call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
-!                 end do
-!             end do
-!         end do
-
-!         call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
-
-!         ! Overwrite ghost-particle forces with authoritative values from the owning rank.
-!         do nbr_idx = 1, merge(26, 8, num_dims == 3)
-!             if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle
-!             unpack_pos = 0
-!             call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, &
-!                             & ierr)
-!             do i = 1, recv_count
-!                 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
-!                 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-!                 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr)
-!                 ! Only overwrite ghost particles (not owned ones - this rank's total is authoritative)
-!                 do j = 1, num_ibs
-!                     if (patch_ib(j)%gbl_patch_id == pid) then
-!                         is_owned = .false.
-!                         do k = 1, num_local_ibs
-!                             if (local_ib_patch_ids(k) == j) then
-!                                 is_owned = .true.
-!                                 exit
-!                             end if
-!                         end do
-!                         if (.not. is_owned) then
-!                             forces(j,:) = fval(:)
-!                             torques(j,:) = tval(:)
-!                         end if
-!                         exit
-!                     end if
-!                 end do
-!             end do
-!         end do
-
-!         deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs)
-! #endif
-
-!     end subroutine s_communicate_ib_forces_scatter
+    !     subroutine s_communicate_ib_forces_scatter(forces, torques)
+
+    !         real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques
+
+    ! #ifdef MFC_MPI integer, parameter :: max_nbrs = 26 integer :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos integer ::
+    ! buf_size, entry_bytes, ierr, recv_count, pid integer :: send_neighbor, recv_neighbor, dx, dy, dz, tag integer, dimension(3) ::
+    ! nbr_coords logical :: is_owned real(wp), dimension(3) :: fval, tval real(wp), dimension(num_ibs, 3) :: forces_total,
+    ! torques_total integer, dimension(max_nbrs) :: recv_neighbor_list integer, dimension(2*max_nbrs) :: requests character(len=1),
+    ! allocatable :: send_buf(:), recv_bufs(:,:) character(len=1), allocatable :: owned_send_buf(:), owned_recv_bufs(:,:) integer ::
+    ! owned_buf_size
+
+    !         if (num_procs == 1) return
+
+    ! ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle entry_bytes = storage_size(0)/8 +
+    ! 6*storage_size(0._wp)/8 buf_size = storage_size(0)/8 + entry_bytes*num_ibs owned_buf_size = storage_size(0)/8 +
+    ! entry_bytes*num_local_ibs_max allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), & &
+    ! owned_recv_bufs(owned_buf_size, max_nbrs))
+
+    ! ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors. pack_pos = 0 call MPI_PACK(num_ibs, 1,
+    ! MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) do i = 1, num_ibs call MPI_PACK(patch_ib(i)%gbl_patch_id, 1,
+    ! MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) fval(:) = forces(i,:); tval(:) = torques(i,:) call
+    ! MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size,
+    ! pack_pos, MPI_COMM_WORLD, ierr) end do
+
+    ! nreqs = 0 nbr_idx = 0 do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0
+    ! .and. dy == 0 .and. dz == 0) cycle nbr_idx = nbr_idx + 1 tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+    ! nbr_coords = proc_coords - [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) if (ierr /=
+    ! MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL recv_neighbor_list(nbr_idx) = recv_neighbor
+
+    ! nreqs = nreqs + 1 call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & &
+    ! requests(nreqs), ierr) end do end do end do
+
+    ! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 .and. dy == 0 .and. dz
+    ! == 0) cycle tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+    ! nbr_coords = proc_coords + [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) if (ierr /=
+    ! MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
+
+    ! nreqs = nreqs + 1 call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr)
+    ! end do end do end do
+
+    !         call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
+
+    ! ! Local reduction: for each owned particle, sum contributions from all neighbors. forces_total = forces torques_total =
+    ! torques do nbr_idx = 1, merge(26, 8, num_dims == 3) if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle unpack_pos = 0
+    ! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) do i = 1,
+    ! recv_count call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) call
+    ! MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) call
+    ! MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) ! Only accumulate for particles
+    ! this rank owns do k = 1, num_local_ibs j = local_ib_patch_ids(k) if (patch_ib(j)%gbl_patch_id == pid) then forces_total(j,:) =
+    ! forces_total(j,:) + fval(:) torques_total(j,:) = torques_total(j,:) + tval(:) exit end if end do end do end do
+
+    ! ! Write totals back for owned particles only do k = 1, num_local_ibs j = local_ib_patch_ids(k) forces(j,:) = forces_total(j,:)
+    ! torques(j,:) = torques_total(j,:) end do
+
+    ! ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors. pack_pos = 0 call MPI_PACK(num_local_ibs,
+    ! 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) do k = 1, num_local_ibs j =
+    ! local_ib_patch_ids(k) call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos,
+    ! MPI_COMM_WORLD, ierr) fval(:) = forces(j,:); tval(:) = torques(j,:) call MPI_PACK(fval, 3, mpi_p, owned_send_buf,
+    ! owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos,
+    ! MPI_COMM_WORLD, ierr) end do
+
+    ! nreqs = 0 nbr_idx = 0 do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0
+    ! .and. dy == 0 .and. dz == 0) cycle nbr_idx = nbr_idx + 1 tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+    ! nbr_coords = proc_coords - [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) if (ierr /=
+    ! MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL recv_neighbor_list(nbr_idx) = recv_neighbor
+
+    ! nreqs = nreqs + 1 call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, &
+    ! & requests(nreqs), ierr) end do end do end do
+
+    ! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 .and. dy == 0 .and. dz
+    ! == 0) cycle tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1)
+
+    ! nbr_coords = proc_coords + [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) if (ierr /=
+    ! MPI_SUCCESS) send_neighbor = MPI_PROC_NULL
+
+    ! nreqs = nreqs + 1 call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs),
+    ! ierr) end do end do end do
+
+    !         call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr)
+
+    ! ! Overwrite ghost-particle forces with authoritative values from the owning rank. do nbr_idx = 1, merge(26, 8, num_dims == 3)
+    ! if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle unpack_pos = 0 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx),
+    ! owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, & & ierr) do i = 1, recv_count call
+    ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) call
+    ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) call
+    ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) ! Only overwrite
+    ! ghost particles (not owned ones - this rank's total is authoritative) do j = 1, num_ibs if (patch_ib(j)%gbl_patch_id == pid)
+    ! then is_owned = .false. do k = 1, num_local_ibs if (local_ib_patch_ids(k) == j) then is_owned = .true. exit end if end do if
+    ! (.not. is_owned) then forces(j,:) = fval(:) torques(j,:) = tval(:) end if exit end if end do end do end do
+
+    ! deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs) #endif
+
+    !     end subroutine s_communicate_ib_forces_scatter
 
     subroutine s_handoff_ib_ownership()
 
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index e655728cd8..0fe1810c24 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1012,7 +1012,7 @@ contains
 #else
             "on CPUs"
 #endif
-        else 
+        else
             allocate (patch_ib(num_ib_patches_max))
         end if
 
@@ -1202,7 +1202,6 @@ contains
         logical                                                  :: is_in_neighborhood, is_local
 
         patch_ib_gbl(:) = patch_ib(:)
-        print *, "Starting"
         call get_neighbor_bounds()  ! make sure the bounds of the neighbors are correctly set up
 
         deallocate (patch_ib)
@@ -1223,18 +1222,18 @@ contains
             ! determine the set of patches owned by local rank
             num_local_ibs = 0
             num_ibs = 0
-            do i = 1, num_ib_patches_max
+            do i = 1, num_gbl_ibs
                 ! catch the edge case where th collision lies just outside the computational domain
                 is_in_neighborhood = .true.
                 is_local = .true.
 
-                #:for X, ID in [('x', 1), ('y', 2), ('z', 3)]
+                #:for X, ID, DIM in [('x', 1, 'm'), ('y', 2, 'n'), ('z', 3, 'p')]
                     if (num_dims >= ${ID}$) then
                         position = patch_ib_gbl(i)%${X}$_centroid
                         if (neighbor_domain_${X}$%beg > position .or. position > neighbor_domain_${X}$%end) then
                             is_in_neighborhood = .false.
                             is_local = .false.
-                        else if (${X}$_domain%beg > position .or. position > ${X}$_domain%end) then
+                        else if (${X}$_cb(-1) > position .or. position > ${X}$_cb(${DIM}$)) then
                             is_local = .false.
                         end if
                     end if

From cc76bf39869025d7e6e16dd51bafe1438972e80c Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Fri, 24 Apr 2026 14:21:44 -0400
Subject: [PATCH 13/15] Fixed global patch ID not being present on other ranks

---
 src/simulation/m_ib_patches.fpp    | 16 +++-------------
 src/simulation/m_start_up.fpp      |  1 +
 src/simulation/m_time_steppers.fpp |  7 ++++---
 3 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp
index 032c790dc0..222d7b6796 100644
--- a/src/simulation/m_ib_patches.fpp
+++ b/src/simulation/m_ib_patches.fpp
@@ -510,16 +510,12 @@ contains
         real(wp)                 :: radius
         real(wp), dimension(1:3) :: center
 
-        ! Variables to initialize the pressure field that corresponds to the bubble-collapse test case found in Tiwari et al. (2013)
-
-        ! Transferring spherical patch's radius, centroid, smoothing patch identity and smoothing coefficient information
-
         center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg)
         center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg)
         center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg)
         radius = patch_ib(patch_id)%radius
 
-        ! completely skip particles no in the domain
+        ! completely skip particles not in the domain
         if (center(1) - radius > x_cc(m + gp_layers + 1) .or. center(1) + radius < x_cc(-gp_layers - 1) .or. center(2) &
             & - radius > y_cc(n + gp_layers + 1) .or. center(2) + radius < y_cc(-gp_layers - 1) .or. center(3) - radius > z_cc(p &
             & + gp_layers + 1) .or. center(3) + radius < z_cc(-gp_layers - 1)) then
@@ -542,18 +538,12 @@ contains
 
         ! Checking whether the sphere covers a particular cell in the domain and verifying whether the current patch has permission
         ! to write to that cell. If both queries check out, the primitive variables of the current patch are assigned to this cell.
-        $:GPU_PARALLEL_LOOP(private='[i, j, k, cart_y, cart_z]', copyin='[encoded_patch_id, center, radius]', collapse=3)
+        $:GPU_PARALLEL_LOOP(private='[i, j, k]', copyin='[encoded_patch_id, center, radius]', collapse=3)
         do k = kl, kr
             do j = jl, jr
                 do i = il, ir
-                    if (grid_geometry == 3) then
-                        call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k))
-                    else
-                        cart_y = y_cc(j)
-                        cart_z = z_cc(k)
-                    end if
                     ! Updating the patch identities bookkeeping variable
-                    if (((x_cc(i) - center(1))**2 + (cart_y - center(2))**2 + (cart_z - center(3))**2 <= radius**2)) then
+                    if (((x_cc(i) - center(1))**2 + (y_cc(j) - center(2))**2 + (z_cc(k) - center(3))**2 <= radius**2)) then
                         ib_markers%sf(i, j, k) = encoded_patch_id
                     end if
                 end do
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 0fe1810c24..02b3999ebd 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1242,6 +1242,7 @@ contains
                 if (is_in_neighborhood) then
                     num_ibs = num_ibs + 1
                     patch_ib(num_ibs) = patch_ib_gbl(i)
+                    patch_ib(num_ibs)%gbl_patch_id = i
                     if (is_local) then
                         num_local_ibs = num_local_ibs + 1
                         local_ib_patch_ids(num_local_ibs) = num_ibs
diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp
index 32e6005882..db6c43b944 100644
--- a/src/simulation/m_time_steppers.fpp
+++ b/src/simulation/m_time_steppers.fpp
@@ -566,9 +566,10 @@ contains
 
         !
         if (ib) then
-            if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs()  ! wraps the positions of IBs to the local proc
-            call s_handoff_ib_ownership()  ! recomputes which ranks own which IBs and communicate to neighbors
-            if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then
+            if (moving_immersed_boundary_flag) then
+                call s_wrap_periodic_ibs()  ! wraps the positions of IBs to the local proc
+                call s_handoff_ib_ownership()  ! recomputes which ranks own which IBs and communicate to neighbors
+            else if (ib_state_wrt) then
                 call s_compute_ib_forces(q_prim_vf, fluid_pp)
             end if
         end if

From 13ad7d0fb154f63c9ad2ca7c55d1444fa9a79478 Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Tue, 28 Apr 2026 12:09:40 -0400
Subject: [PATCH 14/15] Updating restart data

---
 src/simulation/m_data_output.fpp | 36 +++++++++++---------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index ca8a6c0677..a5f6a17b50 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -922,19 +922,6 @@ contains
         end if
         call s_mpi_barrier()
 
-        ! Divide num_ibs across num_procs
-        nibs_per_rank = num_ibs/num_procs
-        remainder = mod(num_ibs, num_procs)
-
-        ! Ranks < remainder get one extra IB
-        if (proc_rank < remainder) then
-            ib_start = proc_rank*(nibs_per_rank + 1) + 1
-            ib_end = ib_start + nibs_per_rank  ! nibs_per_rank + 1 total
-        else
-            ib_start = remainder*(nibs_per_rank + 1) + (proc_rank - remainder)*nibs_per_rank + 1
-            ib_end = ib_start + nibs_per_rank - 1
-        end if
-
         write (file_loc, '(A,I0,A)') '/restart_data/ib_state_', t_step, '.dat'
         file_loc = trim(case_dir) // trim(file_loc)
 
@@ -946,20 +933,21 @@ contains
 
         call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr)
 
-        do i = ib_start, ib_end
+        do i = 1, num_local_ibs
+            ib_idx = local_ib_patch_ids(i)
             ib_buf(1) = mytime
-            ib_buf(2:4) = patch_ib(i)%force(1:3)
-            ib_buf(5:7) = patch_ib(i)%torque(1:3)
-            ib_buf(8:10) = patch_ib(i)%vel(1:3)
-            ib_buf(11:13) = patch_ib(i)%angular_vel(1:3)
-            ib_buf(14:16) = patch_ib(i)%angles(1:3)
-            ib_buf(17) = patch_ib(i)%x_centroid
-            ib_buf(18) = patch_ib(i)%y_centroid
-            ib_buf(19) = patch_ib(i)%z_centroid
-            ib_buf(20) = patch_ib(i)%radius
+            ib_buf(2:4) = patch_ib(ib_idx)%force(1:3)
+            ib_buf(5:7) = patch_ib(ib_idx)%torque(1:3)
+            ib_buf(8:10) = patch_ib(ib_idx)%vel(1:3)
+            ib_buf(11:13) = patch_ib(ib_idx)%angular_vel(1:3)
+            ib_buf(14:16) = patch_ib(ib_idx)%angles(1:3)
+            ib_buf(17) = patch_ib(ib_idx)%x_centroid
+            ib_buf(18) = patch_ib(ib_idx)%y_centroid
+            ib_buf(19) = patch_ib(ib_idx)%z_centroid
+            ib_buf(20) = patch_ib(ib_idx)%radius
 
             ! Global IB index (i) determines position in file
-            disp = int(i - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK
+            disp = int(patch_ib(ib_idx)%gbl_patch_id - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK
 
             call MPI_FILE_WRITE_AT(ifile, disp, ib_buf, NFIELDS_PER_IB, mpi_p, status, ierr)
         end do

From 35b786480501fdabd2e34c42344e35e08a0513cf Mon Sep 17 00:00:00 2001
From: "Daniel J. Vickers" <dnlvickers5@gmail.com>
Date: Tue, 28 Apr 2026 12:11:30 -0400
Subject: [PATCH 15/15] add integer declaration

---
 src/simulation/m_data_output.fpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp
index aaa94acb91..effc020325 100644
--- a/src/simulation/m_data_output.fpp
+++ b/src/simulation/m_data_output.fpp
@@ -909,7 +909,7 @@ contains
         integer                              :: ifile, ierr
         integer, dimension(MPI_STATUS_SIZE)  :: status
         logical                              :: file_exist
-        integer                              :: i
+        integer                              :: i, ib_idx
         integer, parameter                   :: NFIELDS_PER_IB = 20
         real(wp)                             :: ib_buf(NFIELDS_PER_IB)