diff --git a/docs/documentation/case.md b/docs/documentation/case.md index 5acd85a7a3..dbd127f23c 100644 --- a/docs/documentation/case.md +++ b/docs/documentation/case.md @@ -638,7 +638,7 @@ To restart the simulation from $k$-th time step, see @ref running "Restarting Ca | `alpha_wrt(i)` | Logical | Add the volume fraction of fluid $i$ to the database | | `gamma_wrt` | Logical | Add the specific heat ratio function to the database | | `heat_ratio_wrt` | Logical | Add the specific heat ratio to the database | -| `ib_state_wrt` | Logical | Write IB state and loads to a datafile at each time step | +| `ib_state_wrt` | Logical | Parameter to handle writing IB state on saves and outputting the state as a point mesh to SILO files. | | `pi_inf_wrt` | Logical | Add the liquid stiffness function to the database | | `pres_inf_wrt` | Logical | Add the liquid stiffness to the formatted database | | `c_wrt` | Logical | Add the sound speed to the database | @@ -706,7 +706,7 @@ If `file_per_process` is true, then pre_process, simulation, and post_process mu - `probe_wrt` activates the output of state variables at coordinates specified by `probe(i)%[x;y,z]`. -- `ib_state_wrt` activates the output of data specified by patch_ib(i)%force(:) (and torque, vel, angular_vel, angles, [x,y,z]_centroid) into a single binary datafile for all IBs at all timesteps. During post_processing, this file is converted into separate time histories for each IB. +- `ib_state_wrt` is used to trigger post-processing of the IB state to be written out as a point mesh in the SILO files. When no IBs are moving, it also triggers force and torque calculation so that those values may be written to the output state files. - `output_partial_domain` activates the output of part of the domain specified by `[x,y,z]_output%%beg` and `[x,y,z]_output%%end`. This is useful for large domains where only a portion of the domain is of interest. diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index cacc50f528..d66fb10614 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -25,6 +25,7 @@ module m_constants integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation integer, parameter :: num_patches_max = 10 !< Maximum number of IC patches integer, parameter :: num_ib_patches_max = 50000 !< Maximum number of immersed boundary patches (patch_ib) + integer, parameter :: num_local_ibs_max = 2000 !< Maximum number of immersed boundary patches (patch_ib) integer, parameter :: num_bc_patches_max = 10 !< Maximum number of boundary condition patches integer, parameter :: max_2d_fourier_modes = 10 !< Max Fourier mode index for 2D modal patch (geometry 13) integer, parameter :: max_sph_harm_degree = 5 !< Max degree L for 3D spherical harmonic patch (geometry 14) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 6ee2e836a5..c4e5873ead 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -202,12 +202,10 @@ module m_derived_types type :: t_model_array ! Original CPU-side fields (unchanged) - type(t_model), allocatable :: model !< STL/OBJ geometry model - real(wp), allocatable, dimension(:,:,:) :: boundary_v !< Boundary vertices - real(wp), allocatable, dimension(:,:) :: interpolated_boundary_v !< Interpolated boundary vertices - integer :: boundary_edge_count !< Number of boundary edges - integer :: total_vertices !< Total vertex count - integer :: interpolate !< Interpolation flag + type(t_model), allocatable :: model !< STL/OBJ geometry model + real(wp), allocatable, dimension(:,:,:) :: boundary_v !< Boundary vertices + integer :: boundary_edge_count !< Number of boundary edges + integer :: total_vertices !< Total vertex count ! GPU-friendly flattened arrays integer :: ntrs !< Copy of model%ntrs @@ -272,9 +270,10 @@ module m_derived_types end type ic_patch_parameters type ib_patch_parameters - integer :: geometry !< Type of geometry for the patch + integer :: gbl_patch_id real(wp) :: x_centroid, y_centroid, z_centroid !< Geometric center coordinates of the patch + !> Centroid locations of intermediate steps in the time_stepper module real(wp) :: step_x_centroid, step_y_centroid, step_z_centroid real(wp), dimension(1:3) :: centroid_offset !< offset of center of mass from computed cell center for odd-shaped IBs diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 0dab036f9b..f02474f8d0 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -983,10 +983,11 @@ contains dx_local = minval(dx); dy_local = minval(dy) if (p /= 0) dz_local = minval(dz) - allocate (stl_bounding_boxes(num_ibs,1:3,1:3)) + allocate (stl_bounding_boxes(num_gbl_ibs,1:3,1:3)) do patch_id = 1, num_ibs if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then + print *, proc_rank, patch_id, num_ibs, patch_ib(patch_id)%geometry allocate (models(patch_id)%model) print *, " * Reading model: " // trim(patch_ib(patch_id)%model_filepath) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index a5fdb3ef81..effc020325 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -909,7 +909,7 @@ contains integer :: ifile, ierr integer, dimension(MPI_STATUS_SIZE) :: status logical :: file_exist - integer :: i + integer :: i, ib_idx integer, parameter :: NFIELDS_PER_IB = 20 real(wp) :: ib_buf(NFIELDS_PER_IB) @@ -923,19 +923,6 @@ contains end if call s_mpi_barrier() - ! Divide num_ibs across num_procs - nibs_per_rank = num_ibs/num_procs - remainder = mod(num_ibs, num_procs) - - ! Ranks < remainder get one extra IB - if (proc_rank < remainder) then - ib_start = proc_rank*(nibs_per_rank + 1) + 1 - ib_end = ib_start + nibs_per_rank ! nibs_per_rank + 1 total - else - ib_start = remainder*(nibs_per_rank + 1) + (proc_rank - remainder)*nibs_per_rank + 1 - ib_end = ib_start + nibs_per_rank - 1 - end if - write (file_loc, '(A,I0,A)') '/restart_data/ib_state_', t_step, '.dat' file_loc = trim(case_dir) // trim(file_loc) @@ -947,20 +934,21 @@ contains call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), mpi_info_int, ifile, ierr) - do i = ib_start, ib_end + do i = 1, num_local_ibs + ib_idx = local_ib_patch_ids(i) ib_buf(1) = mytime - ib_buf(2:4) = patch_ib(i)%force(1:3) - ib_buf(5:7) = patch_ib(i)%torque(1:3) - ib_buf(8:10) = patch_ib(i)%vel(1:3) - ib_buf(11:13) = patch_ib(i)%angular_vel(1:3) - ib_buf(14:16) = patch_ib(i)%angles(1:3) - ib_buf(17) = patch_ib(i)%x_centroid - ib_buf(18) = patch_ib(i)%y_centroid - ib_buf(19) = patch_ib(i)%z_centroid - ib_buf(20) = patch_ib(i)%radius + ib_buf(2:4) = patch_ib(ib_idx)%force(1:3) + ib_buf(5:7) = patch_ib(ib_idx)%torque(1:3) + ib_buf(8:10) = patch_ib(ib_idx)%vel(1:3) + ib_buf(11:13) = patch_ib(ib_idx)%angular_vel(1:3) + ib_buf(14:16) = patch_ib(ib_idx)%angles(1:3) + ib_buf(17) = patch_ib(ib_idx)%x_centroid + ib_buf(18) = patch_ib(ib_idx)%y_centroid + ib_buf(19) = patch_ib(ib_idx)%z_centroid + ib_buf(20) = patch_ib(ib_idx)%radius ! Global IB index (i) determines position in file - disp = int(i - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK + disp = int(patch_ib(ib_idx)%gbl_patch_id - 1, MPI_OFFSET_KIND)*int(NFIELDS_PER_IB, MPI_OFFSET_KIND)*WP_MOK call MPI_FILE_WRITE_AT(ifile, disp, ib_buf, NFIELDS_PER_IB, mpi_p, status, ierr) end do diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 7fb3353e2c..f1f2317236 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -229,7 +229,8 @@ module m_global_parameters $:GPU_DECLARE(create='[ib_bc_x, ib_bc_y, ib_bc_z]') #endif type(bounds_info) :: x_domain, y_domain, z_domain - $:GPU_DECLARE(create='[x_domain, y_domain, z_domain]') + type(bounds_info) :: neighbor_domain_x, neighbor_domain_y, neighbor_domain_z + $:GPU_DECLARE(create='[x_domain, y_domain, z_domain, neighbor_domain_x, neighbor_domain_y, neighbor_domain_z]') real(wp) :: x_a, y_a, z_a real(wp) :: x_b, y_b, z_b logical :: parallel_io !< Format of the data files @@ -336,18 +337,21 @@ module m_global_parameters !> @name Immersed Boundaries !> @{ - logical :: ib - integer :: num_ibs - integer :: collision_model - real(wp) :: coefficient_of_restitution - real(wp) :: collision_time - real(wp) :: ib_coefficient_of_friction - logical :: ib_state_wrt - type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib !< Immersed boundary patch parameters - type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l - integer :: Np - - $:GPU_DECLARE(create='[ib, num_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l]') + logical :: ib + integer :: num_ibs !< number of IBs that the current processor is aware of + integer :: num_gbl_ibs !< number of IBs in the overall simulation + integer :: num_local_ibs !< number of IBs that lie inside the processor domain + integer :: collision_model + real(wp) :: coefficient_of_restitution + real(wp) :: collision_time + real(wp) :: ib_coefficient_of_friction + logical :: ib_state_wrt + type(ib_patch_parameters), allocatable, dimension(:) :: patch_ib !< Immersed boundary patch parameters + integer, dimension(num_local_ibs_max) :: local_ib_patch_ids !< lookup table of IBs in the local compute domain + type(vec3_dt), allocatable, dimension(:) :: airfoil_grid_u, airfoil_grid_l + integer :: Np + + $:GPU_DECLARE(create='[ib, num_ibs, num_gbl_ibs, num_local_ibs, patch_ib, Np, airfoil_grid_u, airfoil_grid_l, local_ib_patch_ids]') $:GPU_DECLARE(create='[ib_coefficient_of_friction]') !> @} @@ -787,7 +791,9 @@ contains relativity = .false. #:endif + allocate (patch_ib(num_ib_patches_max)) do i = 1, num_ib_patches_max + patch_ib(i)%gbl_patch_id = i patch_ib(i)%geometry = dflt_int patch_ib(i)%x_centroid = 0._wp patch_ib(i)%y_centroid = 0._wp diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index cf3ad9ecfa..1770e9c271 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -109,7 +109,7 @@ contains radius = patch_ib(patch_id)%radius ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -221,7 +221,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -376,7 +376,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -466,7 +466,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -510,16 +510,12 @@ contains real(wp) :: radius real(wp), dimension(1:3) :: center - ! Variables to initialize the pressure field that corresponds to the bubble-collapse test case found in Tiwari et al. (2013) - - ! Transferring spherical patch's radius, centroid, smoothing patch identity and smoothing coefficient information - center(1) = patch_ib(patch_id)%x_centroid + real(xp, wp)*(x_domain%end - x_domain%beg) center(2) = patch_ib(patch_id)%y_centroid + real(yp, wp)*(y_domain%end - y_domain%beg) center(3) = patch_ib(patch_id)%z_centroid + real(zp, wp)*(z_domain%end - z_domain%beg) radius = patch_ib(patch_id)%radius - ! completely skip particles no in the domain + ! completely skip particles not in the domain if (center(1) - radius > x_cc(m + gp_layers + 1) .or. center(1) + radius < x_cc(-gp_layers - 1) .or. center(2) & & - radius > y_cc(n + gp_layers + 1) .or. center(2) + radius < y_cc(-gp_layers - 1) .or. center(3) - radius > z_cc(p & & + gp_layers + 1) .or. center(3) + radius < z_cc(-gp_layers - 1)) then @@ -527,7 +523,7 @@ contains end if ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -542,18 +538,12 @@ contains ! Checking whether the sphere covers a particular cell in the domain and verifying whether the current patch has permission ! to write to that cell. If both queries check out, the primitive variables of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i, j, k, cart_y, cart_z]', copyin='[encoded_patch_id, center, radius]', collapse=3) + $:GPU_PARALLEL_LOOP(private='[i, j, k]', copyin='[encoded_patch_id, center, radius]', collapse=3) do k = kl, kr do j = jl, jr do i = il, ir - if (grid_geometry == 3) then - call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) - else - cart_y = y_cc(j) - cart_z = z_cc(k) - end if ! Updating the patch identities bookkeeping variable - if (((x_cc(i) - center(1))**2 + (cart_y - center(2))**2 + (cart_z - center(3))**2 <= radius**2)) then + if (((x_cc(i) - center(1))**2 + (y_cc(j) - center(2))**2 + (z_cc(k) - center(3))**2 <= radius**2)) then ib_markers%sf(i, j, k) = encoded_patch_id end if end do @@ -586,7 +576,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -656,7 +646,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -724,7 +714,7 @@ contains inverse_rotation(:,:) = patch_ib(patch_id)%rotation_matrix_inverse(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) ! find the indices to the left and right of the IB in i, j, k il = -gp_layers - 1 @@ -781,7 +771,7 @@ contains threshold = patch_ib(patch_id)%model_threshold ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, 0, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, 0, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -858,7 +848,7 @@ contains rotation(:,:) = patch_ib(patch_id)%rotation_matrix(:,:) ! encode the periodicity information into the patch_id - call s_encode_patch_periodicity(patch_id, xp, yp, zp, encoded_patch_id) + call s_encode_patch_periodicity(patch_ib(patch_id)%gbl_patch_id, xp, yp, zp, encoded_patch_id) il = -gp_layers - 1 jl = -gp_layers - 1 @@ -1039,7 +1029,7 @@ contains temp_y_per = y_periodicity; if (y_periodicity == -1) temp_y_per = 2 temp_z_per = z_periodicity; if (z_periodicity == -1) temp_z_per = 2 - offset = (num_ibs + 1)*temp_x_per + 3*(num_ibs + 1)*temp_y_per + 9*(num_ibs + 1)*temp_z_per + offset = (num_gbl_ibs + 1)*temp_x_per + 3*(num_gbl_ibs + 1)*temp_y_per + 9*(num_gbl_ibs + 1)*temp_z_per encoded_patch_id = patch_id + offset end subroutine s_encode_patch_periodicity @@ -1054,7 +1044,7 @@ contains integer, intent(out), optional :: x_periodicity, y_periodicity, z_periodicity integer :: offset, remainder, xp, yp, zp, base - base = num_ibs + 1 + base = num_gbl_ibs + 1 patch_id = mod(encoded_patch_id - 1, base) + 1 offset = (encoded_patch_id - patch_id)/base diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 5f865cffeb..90431a6839 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1010,9 +1010,8 @@ contains call s_apply_collision_forces(ghost_points, num_gps, ib_markers, forces, torques) - ! reduce the forces across all MPI ranks - call s_mpi_allreduce_vectors_sum(forces, forces, num_ibs, 3) - call s_mpi_allreduce_vectors_sum(torques, torques, num_ibs, 3) + ! reduce the forces across local neighborhood ranks + call s_communicate_ib_forces(forces, torques) ! consider body forces after reducing to avoid double counting do i = 1, num_ibs @@ -1225,4 +1224,396 @@ contains end subroutine s_wrap_periodic_ibs + !> @brief reasseses ownership of IBs and passes ownership of IBs to neighbor processors + !> Reduces forces and torques across the local neighborhood without a global allreduce. Accumulation phase: 2 passes per + !! dimension receiving from the low-index (-X) neighbor. Pass 1: add received values; save what was received as recv_snap. Pass + !! 2: send current (post-pass-1) values; add received; subtract recv_snap to remove double-counting of the direct contribution + !! already added in pass 1. Back-propagation phase: 2 passes per dimension receiving from the high-index (+X) neighbor, each + !! overwriting local forces with the neighbor's accumulated total. + subroutine s_communicate_ib_forces(forces, torques) + + real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + +#ifdef MFC_MPI + integer :: i, j, pack_pos, unpack_pos, buf_size, ierr + integer :: send_neighbor, recv_neighbor, recv_count, pid + real(wp), dimension(3) :: fval, tval + real(wp), allocatable :: recv_forces_snap(:,:), recv_torques_snap(:,:) + character(len=1), allocatable :: send_buf(:), recv_buf(:) + + if (num_procs == 1) return + + buf_size = storage_size(0)/8 + (storage_size(0)/8 + 6*storage_size(0._wp)/8)*size(patch_ib) + allocate (send_buf(buf_size), recv_buf(buf_size), recv_forces_snap(num_ibs, 3), recv_torques_snap(num_ibs, 3)) + + ! Accumulation phase: propagate contributions toward the high-index corner. + #:for X, ID, TAG1, TAG2 in [('x', 1, 300, 302), ('y', 2, 304, 306), ('z', 3, 308, 310)] + if (num_dims >= ${ID}$) then + send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + + ! Pass 1: send current forces to +${X}$ neighbor; receive from -${X}$ neighbor and add. Save what was received as + ! recv_snap for double-count removal in pass 2. + recv_forces_snap = 0._wp + recv_torques_snap = 0._wp + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG1}$, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, ${TAG1}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == pid) then + recv_forces_snap(j,:) = fval(:) + recv_torques_snap(j,:) = tval(:) + forces(j,:) = forces(j,:) + fval(:) + torques(j,:) = torques(j,:) + tval(:) + exit + end if + end do + end do + end if + + ! Pass 2: send post-pass-1 forces to +${X}$ neighbor; receive from -${X}$ neighbor. Add received values then + ! subtract recv_snap to remove the pass-1 contribution that was already counted, leaving only the 2-hop delta. + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG2}$, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, ${TAG2}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == pid) then + forces(j,:) = forces(j,:) + fval(:) - recv_forces_snap(j,:) + torques(j,:) = torques(j,:) + tval(:) - recv_torques_snap(j,:) + exit + end if + end do + end do + end if + end if + #:endfor + + ! Back-propagation phase: for each dimension, 2 passes receiving from the high-index neighbor. Each pass overwrites local + ! forces with the neighbor's accumulated total. Two passes ensure the total reaches 2 hops back, covering the full + ! neighborhood. + #:for X, ID, TAG1, TAG2 in [('x', 1, 312, 314), ('y', 2, 316, 318), ('z', 3, 320, 322)] + if (num_dims >= ${ID}$) then + send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + + #:for TAG in [TAG1, TAG2] + pack_pos = 0 + call MPI_PACK(num_ibs, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + do i = 1, num_ibs + call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + fval(:) = forces(i,:); tval(:) = torques(i,:) + call MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + end do + call MPI_SENDRECV(send_buf, pack_pos, MPI_PACKED, send_neighbor, ${TAG}$, recv_buf, buf_size, MPI_PACKED, & + & recv_neighbor, ${TAG}$, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + if (recv_neighbor /= MPI_PROC_NULL) then + unpack_pos = 0 + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) + call MPI_UNPACK(recv_buf, buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == pid) then + forces(j,:) = fval(:) + torques(j,:) = tval(:) + exit + end if + end do + end do + end if + #:endfor + end if + #:endfor + + deallocate (send_buf, recv_buf, recv_forces_snap, recv_torques_snap) +#endif + + end subroutine s_communicate_ib_forces + + !> Alternative force reduction using two non-blocking all-to-neighbor broadcasts. Phase 1: every rank sends its full force array + !! to all 26 neighborhood neighbors simultaneously. After MPI_WAITALL, each rank sums contributions from neighbors for its owned + !! particles. Phase 2: each rank sends its finalized owned-particle forces (by gbl_patch_id) back to all neighbors + !! simultaneously. After MPI_WAITALL, each rank overwrites ghost-particle forces with the authoritative values from the owning + !! rank. Not currently called - available for benchmarking against s_communicate_ib_forces. + ! subroutine s_communicate_ib_forces_scatter(forces, torques) + + ! real(wp), dimension(num_ibs, 3), intent(inout) :: forces, torques + + ! #ifdef MFC_MPI integer, parameter :: max_nbrs = 26 integer :: i, j, k, nbr_idx, nreqs, pack_pos, unpack_pos integer :: + ! buf_size, entry_bytes, ierr, recv_count, pid integer :: send_neighbor, recv_neighbor, dx, dy, dz, tag integer, dimension(3) :: + ! nbr_coords logical :: is_owned real(wp), dimension(3) :: fval, tval real(wp), dimension(num_ibs, 3) :: forces_total, + ! torques_total integer, dimension(max_nbrs) :: recv_neighbor_list integer, dimension(2*max_nbrs) :: requests character(len=1), + ! allocatable :: send_buf(:), recv_bufs(:,:) character(len=1), allocatable :: owned_send_buf(:), owned_recv_bufs(:,:) integer :: + ! owned_buf_size + + ! if (num_procs == 1) return + + ! ! Buffer sized to hold count + (gbl_patch_id, forces, torques) per particle entry_bytes = storage_size(0)/8 + + ! 6*storage_size(0._wp)/8 buf_size = storage_size(0)/8 + entry_bytes*num_ibs owned_buf_size = storage_size(0)/8 + + ! entry_bytes*num_local_ibs_max allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs), owned_send_buf(owned_buf_size), & & + ! owned_recv_bufs(owned_buf_size, max_nbrs)) + + ! ! Phase 1: pack full local force array and broadcast to all neighborhood neighbors. pack_pos = 0 call MPI_PACK(num_ibs, 1, + ! MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) do i = 1, num_ibs call MPI_PACK(patch_ib(i)%gbl_patch_id, 1, + ! MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) fval(:) = forces(i,:); tval(:) = torques(i,:) call + ! MPI_PACK(fval, 3, mpi_p, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(tval, 3, mpi_p, send_buf, buf_size, + ! pack_pos, MPI_COMM_WORLD, ierr) end do + + ! nreqs = 0 nbr_idx = 0 do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 + ! .and. dy == 0 .and. dz == 0) cycle nbr_idx = nbr_idx + 1 tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords - [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL recv_neighbor_list(nbr_idx) = recv_neighbor + + ! nreqs = nreqs + 1 call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & & + ! requests(nreqs), ierr) end do end do end do + + ! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 .and. dy == 0 .and. dz + ! == 0) cycle tag = 400 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords + [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + ! nreqs = nreqs + 1 call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), ierr) + ! end do end do end do + + ! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! ! Local reduction: for each owned particle, sum contributions from all neighbors. forces_total = forces torques_total = + ! torques do nbr_idx = 1, merge(26, 8, num_dims == 3) if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle unpack_pos = 0 + ! call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) do i = 1, + ! recv_count call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) ! Only accumulate for particles + ! this rank owns do k = 1, num_local_ibs j = local_ib_patch_ids(k) if (patch_ib(j)%gbl_patch_id == pid) then forces_total(j,:) = + ! forces_total(j,:) + fval(:) torques_total(j,:) = torques_total(j,:) + tval(:) exit end if end do end do end do + + ! ! Write totals back for owned particles only do k = 1, num_local_ibs j = local_ib_patch_ids(k) forces(j,:) = forces_total(j,:) + ! torques(j,:) = torques_total(j,:) end do + + ! ! Phase 2: pack finalized owned-particle forces and back-broadcast to all neighbors. pack_pos = 0 call MPI_PACK(num_local_ibs, + ! 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) do k = 1, num_local_ibs j = + ! local_ib_patch_ids(k) call MPI_PACK(patch_ib(j)%gbl_patch_id, 1, MPI_INTEGER, owned_send_buf, owned_buf_size, pack_pos, + ! MPI_COMM_WORLD, ierr) fval(:) = forces(j,:); tval(:) = torques(j,:) call MPI_PACK(fval, 3, mpi_p, owned_send_buf, + ! owned_buf_size, pack_pos, MPI_COMM_WORLD, ierr) call MPI_PACK(tval, 3, mpi_p, owned_send_buf, owned_buf_size, pack_pos, + ! MPI_COMM_WORLD, ierr) end do + + ! nreqs = 0 nbr_idx = 0 do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 + ! .and. dy == 0 .and. dz == 0) cycle nbr_idx = nbr_idx + 1 tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords - [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL recv_neighbor_list(nbr_idx) = recv_neighbor + + ! nreqs = nreqs + 1 call MPI_IRECV(owned_recv_bufs(:,nbr_idx), owned_buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & + ! & requests(nreqs), ierr) end do end do end do + + ! do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) do dy = -1, 1 do dx = -1, 1 if (dx == 0 .and. dy == 0 .and. dz + ! == 0) cycle tag = 427 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! nbr_coords = proc_coords + [dx, dy, dz] call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) if (ierr /= + ! MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + ! nreqs = nreqs + 1 call MPI_ISEND(owned_send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), + ! ierr) end do end do end do + + ! call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! ! Overwrite ghost-particle forces with authoritative values from the owning rank. do nbr_idx = 1, merge(26, 8, num_dims == 3) + ! if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle unpack_pos = 0 call MPI_UNPACK(owned_recv_bufs(:,nbr_idx), + ! owned_buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, & & ierr) do i = 1, recv_count call + ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, pid, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, fval, 3, mpi_p, MPI_COMM_WORLD, ierr) call + ! MPI_UNPACK(owned_recv_bufs(:,nbr_idx), owned_buf_size, unpack_pos, tval, 3, mpi_p, MPI_COMM_WORLD, ierr) ! Only overwrite + ! ghost particles (not owned ones - this rank's total is authoritative) do j = 1, num_ibs if (patch_ib(j)%gbl_patch_id == pid) + ! then is_owned = .false. do k = 1, num_local_ibs if (local_ib_patch_ids(k) == j) then is_owned = .true. exit end if end do if + ! (.not. is_owned) then forces(j,:) = fval(:) torques(j,:) = tval(:) end if exit end if end do end do end do + + ! deallocate (send_buf, recv_bufs, owned_send_buf, owned_recv_bufs) #endif + + ! end subroutine s_communicate_ib_forces_scatter + + subroutine s_handoff_ib_ownership() + + integer :: i, j, k, output_idx, local_output_idx + integer :: old_num_local_ibs + integer :: new_count, recv_count + integer :: pack_pos, unpack_pos, buf_size, patch_bytes + integer :: send_neighbor, recv_neighbor, ierr + integer :: dx, dy, dz, tag, nbr_idx, nreqs + integer, dimension(3) :: nbr_coords + real(wp) :: position + real(wp), dimension(3) :: centroid + logical :: is_new, already_known + type(ib_patch_parameters) :: tmp_patch + integer, dimension(num_local_ibs_max) :: local_ib_idx_old + ! 26 neighbors max in 3D; each gets its own recv buffer and a request handle for send + recv + integer, parameter :: max_nbrs = 26 + character(len=1), allocatable :: send_buf(:), recv_bufs(:,:) + integer, dimension(2*max_nbrs) :: requests + integer, dimension(max_nbrs) :: recv_neighbor_list + + #:if defined('MFC_MPI') + if (num_procs > 1) then + ! save a copy of the local IB's global indices to cross-reference for later. + old_num_local_ibs = num_local_ibs + do i = 1, num_local_ibs + local_ib_idx_old(i) = patch_ib(local_ib_patch_ids(i))%gbl_patch_id + end do + + ! delete any particles that no longer need to be tracked and coalesce the array + output_idx = 0 + local_output_idx = 0 + do i = 1, num_ibs + #:for X, ID in [('x', 1), ('y', 2), ('z', 3)] + if (patch_ib(i)%${X}$_centroid < neighbor_domain_${X}$%beg .or. neighbor_domain_${X}$%end < patch_ib(i) & + & %${X}$_centroid) then + cycle + end if + #:endfor + + output_idx = output_idx + 1 + if (i /= output_idx) patch_ib(output_idx) = patch_ib(i) + centroid = [patch_ib(i)%x_centroid, patch_ib(i)%y_centroid, 0._wp] + if (num_dims == 3) centroid(3) = patch_ib(i)%z_centroid + if (f_local_rank_owns_collision(centroid)) then + local_output_idx = local_output_idx + 1 + local_ib_patch_ids(local_output_idx) = output_idx + end if + end do + num_ibs = output_idx + num_local_ibs = local_output_idx + + ! Broadcast newly-owned patches to all neighborhood neighbors (including corners/edges). + patch_bytes = storage_size(tmp_patch)/8 + buf_size = storage_size(0)/8 + patch_bytes*num_local_ibs_max + allocate (send_buf(buf_size), recv_bufs(buf_size, max_nbrs)) + + ! Write placeholder count at position 0 + pack_pos = 0 + call MPI_PACK(0, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + + ! Single pass: pack new patches and count them + new_count = 0 + do i = 1, num_local_ibs + k = local_ib_patch_ids(i) + is_new = .true. + do j = 1, old_num_local_ibs + if (patch_ib(k)%gbl_patch_id == local_ib_idx_old(j)) then + is_new = .false. + exit + end if + end do + if (is_new) then + call MPI_PACK(patch_ib(k), patch_bytes, MPI_BYTE, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + new_count = new_count + 1 + end if + end do + + ! Overwrite the placeholder with the real count + pack_pos = 0 + call MPI_PACK(new_count, 1, MPI_INTEGER, send_buf, buf_size, pack_pos, MPI_COMM_WORLD, ierr) + pack_pos = storage_size(0)/8 + new_count*patch_bytes + + ! Post all receives first, then all sends, so they are all in flight together. Tags 200..226: tag = 200 + (dx+1)*9 + + ! (dy+1)*3 + (dz+1) + nreqs = 0 + nbr_idx = 0 + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + nbr_idx = nbr_idx + 1 + tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + ! Receive from the mirror direction + nbr_coords = proc_coords - [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, recv_neighbor, ierr) + if (ierr /= MPI_SUCCESS) recv_neighbor = MPI_PROC_NULL + recv_neighbor_list(nbr_idx) = recv_neighbor + + nreqs = nreqs + 1 + call MPI_IRECV(recv_bufs(:,nbr_idx), buf_size, MPI_PACKED, recv_neighbor, tag, MPI_COMM_WORLD, & + & requests(nreqs), ierr) + end do + end do + end do + + do dz = merge(-1, 0, num_dims == 3), merge(1, 0, num_dims == 3) + do dy = -1, 1 + do dx = -1, 1 + if (dx == 0 .and. dy == 0 .and. dz == 0) cycle + tag = 200 + (dx + 1)*9 + (dy + 1)*3 + (dz + 1) + + nbr_coords = proc_coords + [dx, dy, dz] + call MPI_CART_RANK(MPI_COMM_CART, nbr_coords, send_neighbor, ierr) + if (ierr /= MPI_SUCCESS) send_neighbor = MPI_PROC_NULL + + nreqs = nreqs + 1 + call MPI_ISEND(send_buf, pack_pos, MPI_PACKED, send_neighbor, tag, MPI_COMM_WORLD, requests(nreqs), & + & ierr) + end do + end do + end do + + call MPI_WAITALL(nreqs, requests, MPI_STATUSES_IGNORE, ierr) + + ! Unpack all received buffers + do nbr_idx = 1, merge(26, 8, num_dims == 3) + if (recv_neighbor_list(nbr_idx) == MPI_PROC_NULL) cycle + unpack_pos = 0 + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, recv_count, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr) + do i = 1, recv_count + call MPI_UNPACK(recv_bufs(:,nbr_idx), buf_size, unpack_pos, tmp_patch, patch_bytes, MPI_BYTE, & + & MPI_COMM_WORLD, ierr) + already_known = .false. + do j = 1, num_ibs + if (patch_ib(j)%gbl_patch_id == tmp_patch%gbl_patch_id) then + already_known = .true. + exit + end if + end do + if (.not. already_known) then + num_ibs = num_ibs + 1 + @:ASSERT(num_ibs <= size(patch_ib), 'patch_ib overflow in neighborhood handoff') + patch_ib(num_ibs) = tmp_patch + end if + end do + end do + + deallocate (send_buf, recv_bufs) + end if + #:endif + + end subroutine s_handoff_ib_ownership + end module m_ibm diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index d544324c8a..1420464202 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -39,6 +39,7 @@ module m_start_up use m_nvtx use m_ibm + use m_collisions use m_compile_specific use m_checker_common use m_checker @@ -915,6 +916,7 @@ contains if (model_eqns == 3) call s_initialize_internal_energy_equations(q_cons_ts(1)%vf) if (ib) then if (t_step_start > 0) call s_read_ib_restart_data(t_step_start) + call s_reduce_ib_patch_array() call s_ibm_setup() if (t_step_start == 0) then call s_write_ib_data_file(0) @@ -1010,6 +1012,8 @@ contains #else "on CPUs" #endif + else + allocate (patch_ib(num_ib_patches_max)) end if call s_mpi_bcast_user_inputs() @@ -1192,4 +1196,111 @@ contains end subroutine s_read_ib_restart_data + !> @brief takes the patch_ib struct array that contains all global IB patches and reduces to only contain patches that are in + !! the local computational domain. + subroutine s_reduce_ib_patch_array() + + type(ib_patch_parameters), dimension(num_ib_patches_max) :: patch_ib_gbl + real(wp) :: position + integer :: i, j + integer :: num_aware_ibs + logical :: is_in_neighborhood, is_local + + patch_ib_gbl(:) = patch_ib(:) + call get_neighbor_bounds() ! make sure the bounds of the neighbors are correctly set up + + deallocate (patch_ib) + if (num_dims == 3) then + num_aware_ibs = min(num_local_ibs_max*27, num_ib_patches_max) + else + num_aware_ibs = min(num_local_ibs_max*9, num_ib_patches_max) + end if + allocate (patch_ib(num_aware_ibs)) + + num_gbl_ibs = num_ibs + +#ifdef MFC_MPI + ! fallback for 1-rank case + if (num_procs == 1) then + patch_ib(:) = patch_ib_gbl(1:num_aware_ibs) + else + ! determine the set of patches owned by local rank + num_local_ibs = 0 + num_ibs = 0 + do i = 1, num_gbl_ibs + ! catch the edge case where th collision lies just outside the computational domain + is_in_neighborhood = .true. + is_local = .true. + + #:for X, ID, DIM in [('x', 1, 'm'), ('y', 2, 'n'), ('z', 3, 'p')] + if (num_dims >= ${ID}$) then + position = patch_ib_gbl(i)%${X}$_centroid + if (neighbor_domain_${X}$%beg > position .or. position > neighbor_domain_${X}$%end) then + is_in_neighborhood = .false. + is_local = .false. + else if (${X}$_cb(-1) > position .or. position > ${X}$_cb(${DIM}$)) then + is_local = .false. + end if + end if + #:endfor + + if (is_in_neighborhood) then + num_ibs = num_ibs + 1 + patch_ib(num_ibs) = patch_ib_gbl(i) + patch_ib(num_ibs)%gbl_patch_id = i + if (is_local) then + num_local_ibs = num_local_ibs + 1 + local_ib_patch_ids(num_local_ibs) = num_ibs + end if + end if + end do + end if +#else + ! reduce the size of the array for local simulation in no-MPI case + patch_ib(:) = patch_ib_gbl(1:num_aware_ibs) +#endif + + end subroutine s_reduce_ib_patch_array + + subroutine get_neighbor_bounds() + + real(wp) :: send_val, recv_val + integer :: send_neighbor, recv_neighbor, ierr + + ! Default: no neighbor in any direction + + neighbor_domain_x%beg = -huge(0._wp) + neighbor_domain_x%end = huge(0._wp) + neighbor_domain_y%beg = -huge(0._wp) + neighbor_domain_y%end = huge(0._wp) + neighbor_domain_z%beg = -huge(0._wp) + neighbor_domain_z%end = huge(0._wp) + +#ifdef MFC_MPI + #:for X, ID, TAG, DIM in [('x', 1, 100, 'm'), ('y', 2, 102, 'n'), ('z', 3, 104, 'p')] + if (num_dims >= ${ID}$) then + ! Step 1: broadcast left edge (-1 face) rightward; receive left neighbor's left edge -> neighbor_domain_${X}$%beg + send_val = ${X}$_cb(-1) + send_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_val = -huge(0._wp) + call MPI_SENDRECV(send_val, 1, mpi_p, send_neighbor, ${TAG}$, recv_val, 1, mpi_p, recv_neighbor, ${TAG}$, & + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + neighbor_domain_${X}$%beg = recv_val + + ! Step 2: broadcast right edge (${DIM}$ face) leftward; receive right neighbor's right edge -> + ! neighbor_domain_${X}$%end + send_val = ${X}$_cb(${DIM}$) + send_neighbor = merge(bc_${X}$%beg, MPI_PROC_NULL, bc_${X}$%beg >= 0) + recv_neighbor = merge(bc_${X}$%end, MPI_PROC_NULL, bc_${X}$%end >= 0) + recv_val = huge(0._wp) + call MPI_SENDRECV(send_val, 1, mpi_p, send_neighbor, ${TAG + 1}$, recv_val, 1, mpi_p, recv_neighbor, ${TAG + 1}$, & + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + neighbor_domain_${X}$%end = recv_val + end if + #:endfor +#endif + + end subroutine get_neighbor_bounds + end module m_start_up diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index ce0ae48c4b..6b914eb6ff 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -566,8 +566,10 @@ contains ! if (ib) then - if (moving_immersed_boundary_flag) call s_wrap_periodic_ibs() - if (ib_state_wrt .and. (.not. moving_immersed_boundary_flag)) then + if (moving_immersed_boundary_flag) then + call s_wrap_periodic_ibs() ! wraps the positions of IBs to the local proc + call s_handoff_ib_ownership() ! recomputes which ranks own which IBs and communicate to neighbors + else if (ib_state_wrt) then call s_compute_ib_forces(q_prim_vf, fluid_pp) end if end if