From 08d34cabadbb473bef50d52c42b069d1199a60a6 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 10 Jun 2026 19:13:20 -0400
Subject: [PATCH 1/7] toolchain: deregister parameters removed from the derived
 types

fluid_pp%{mul0,ss,pv,gamma_v,M_v,mu_v,k_v,cp_v,D_v} and
lag_params%{T0,Thost,c0,rho0,x0} were removed from the Fortran derived
types by upstream #1085/#1093 but remained registered in the toolchain.
Setting any of them causes namelist read to abort with a misleading
'datatype mismatch' error.

Verified against src/common/m_derived_types.fpp:
- physical_parameters has: gamma, pi_inf, Re, cv, qv, qvp, G
- bubbles_lagrange_parameters has: solver_approach, cluster_type,
  pressure_corrector, smooth_type, heatTransfer_model, massTransfer_model,
  write_bubbles, write_bubbles_stats, nBubs_glb, epsilonb, charwidth, valmaxvoid

Also remove the now-dead PATTERNS entries in descriptions.py and update
the stale comments in fortran_gen.py (_emit_fluid_pp and _emit_lag_params).
Both emitters now walk the registry rather than hardcoding member lists.
---
 toolchain/mfc/params/definitions.py           | 14 +++++++----
 toolchain/mfc/params/descriptions.py          | 14 -----------
 .../mfc/params/generators/fortran_gen.py      | 24 +++++++++----------
 3 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py
index 1ba1ed965d..3edb09e758 100644
--- a/toolchain/mfc/params/definitions.py
+++ b/toolchain/mfc/params/definitions.py
@@ -842,14 +842,13 @@ def _load():
                 _r(f"{px}sph_har_coeff({ll},{mm})", REAL)
 
     # fluid_pp (10 fluids)
+    # Members present in physical_parameters: gamma, pi_inf, Re, cv, qv, qvp, G.
+    # mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v were removed from the Fortran type
+    # by upstream #1085/#1093 — they must NOT be registered (namelist read would crash).
     for f in range(1, NF + 1):
         px = f"fluid_pp({f})%"
         for a, sym in [("gamma", r"\f$\gamma_k\f$"), ("pi_inf", r"\f$\pi_{\infty,k}\f$"), ("cv", r"\f$c_{v,k}\f$"), ("qv", r"\f$q_{v,k}\f$"), ("qvp", r"\f$q'_{v,k}\f$")]:
             _r(f"{px}{a}", REAL, math=sym)
-        _r(f"{px}mul0", REAL, {"viscosity"}, math=r"\f$\mu_{l,k}\f$")
-        _r(f"{px}ss", REAL, {"surface_tension"}, math=r"\f$\sigma_k\f$")
-        for a in ["pv", "gamma_v", "M_v", "mu_v", "k_v", "cp_v", "D_v"]:
-            _r(f"{px}{a}", REAL, {"bubbles"})
         _r(f"{px}G", REAL, {"elasticity"}, math=r"\f$G_k\f$")
         _r(f"{px}Re(1)", REAL, {"viscosity"}, math=r"\f$\mathrm{Re}_k\f$ (shear)")
         _r(f"{px}Re(2)", REAL, {"viscosity"}, math=r"\f$\mathrm{Re}_k\f$ (bulk)")
@@ -1049,11 +1048,16 @@ def _load():
             _r(f"simplex_params%perturb_vel_offset({d},{j})", REAL)
 
     # lag_params (Lagrangian bubbles)
+    # Members present in bubbles_lagrange_parameters: solver_approach, cluster_type,
+    # pressure_corrector, smooth_type, heatTransfer_model, massTransfer_model,
+    # write_bubbles, write_bubbles_stats, nBubs_glb, epsilonb, charwidth, valmaxvoid.
+    # T0/Thost/c0/rho0/x0 were removed from the Fortran type by upstream #1085/#1093
+    # — they must NOT be registered (namelist read would crash).
     for a in ["heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"]:
         _r(f"lag_params%{a}", LOG, {"bubbles"})
     for a in ["solver_approach", "cluster_type", "smooth_type", "nBubs_glb"]:
         _r(f"lag_params%{a}", INT, {"bubbles"})
-    for a in ["epsilonb", "valmaxvoid", "charwidth", "c0", "rho0", "T0", "x0", "Thost"]:
+    for a in ["epsilonb", "valmaxvoid", "charwidth"]:
         _r(f"lag_params%{a}", REAL, {"bubbles"})
 
     # chem_params
diff --git a/toolchain/mfc/params/descriptions.py b/toolchain/mfc/params/descriptions.py
index 016fd8cde8..79cc4089f4 100644
--- a/toolchain/mfc/params/descriptions.py
+++ b/toolchain/mfc/params/descriptions.py
@@ -323,15 +323,6 @@
     (r"fluid_pp\((\d+)\)%qv", "Heat of formation for fluid {0}"),
     (r"fluid_pp\((\d+)\)%qvp", "Heat of formation prime for fluid {0}"),
     (r"fluid_pp\((\d+)\)%Re\((\d+)\)", "Reynolds number component {1} for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%mul0", "Reference liquid viscosity for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%ss", "Surface tension for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%pv", "Vapor pressure for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%gamma_v", "Specific heat ratio of vapor phase for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%M_v", "Molecular weight of vapor phase for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%mu_v", "Viscosity of vapor phase for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%k_v", "Thermal conductivity of vapor phase for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%cp_v", "Specific heat capacity (const. pressure) of vapor for fluid {0}"),
-    (r"fluid_pp\((\d+)\)%D_v", "Vapor mass diffusivity for fluid {0}"),
     (r"fluid_pp\((\d+)\)%non_newtonian", "Enable Herschel-Bulkley non-Newtonian viscosity for fluid {0}"),
     (r"fluid_pp\((\d+)\)%K", "HB consistency index for fluid {0}"),
     (r"fluid_pp\((\d+)\)%nn", "HB flow behavior index for fluid {0}"),
@@ -500,11 +491,6 @@
     (r"lag_params%epsilonb", "Standard deviation scaling for Gaussian smoothing"),
     (r"lag_params%charwidth", "Domain virtual depth for 2D simulations"),
     (r"lag_params%valmaxvoid", "Maximum permitted void fraction"),
-    (r"lag_params%T0", "Initial bubble temperature"),
-    (r"lag_params%Thost", "Host fluid temperature"),
-    (r"lag_params%c0", "Initial sound speed"),
-    (r"lag_params%rho0", "Initial density"),
-    (r"lag_params%x0", "Initial bubble position"),
     (r"lag_params%(\w+)", "Lagrangian tracking parameter: {0}"),
     # chem_params patterns - specific fields first
     (r"chem_params%diffusion", "Enable species diffusion for chemistry"),
diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py
index d1bd45477e..817a5fa4fe 100644
--- a/toolchain/mfc/params/generators/fortran_gen.py
+++ b/toolchain/mfc/params/generators/fortran_gen.py
@@ -318,15 +318,16 @@ def _emit_bcast_group(lines: List[str], vars_list: List[str], mpi_type: str) ->
 def _emit_fluid_pp(lines: List[str], target: str) -> None:
     """Emit the fluid_pp(i) member-loop broadcast block.
 
-    Members broadcast: the 13 REAL members (EOS core plus the Herschel-Bulkley
-    non-Newtonian set) and the LOGICAL non_newtonian flag, matching the manual
-    lists this generator replaces. Sim additionally: Re(1) with count=2.
+    Members broadcast: all REAL registry members of fluid_pp (gamma, pi_inf, G, cv, qv,
+    qvp) derived from physical_parameters.  Sim additionally: Re(1) with count=2.
+    mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v were removed from the Fortran type by
+    upstream #1085/#1093 and are no longer registered.
     """
-    fp_real_members = ["gamma", "pi_inf", "G", "cv", "qv", "qvp", "K", "nn", "tau0", "hb_m", "mu_min", "mu_max", "mu_bulk"]
+    # Walk the registry for fluid_pp REAL members (Re handled separately; exclude).
+    fp_real_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("fluid_pp(1)%") and not k.startswith("fluid_pp(1)%Re("))
     lines.append("        do i = 1, num_fluids_max")
     for mem in fp_real_members:
         lines.append(f"            call MPI_BCAST(fluid_pp(i)%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)")
-    lines.append("            call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)")
     if target == "sim":
         lines.append("            call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)")
     lines.append("        end do")
@@ -347,14 +348,13 @@ def _emit_bub_pp(lines: List[str]) -> None:
 def _emit_lag_params(lines: List[str]) -> None:
     """Emit the lag_params member broadcast block (sim-only, under bubbles_lagrange guard).
 
-    Subset of lag_params members that are actually broadcast: the fields that appear in the
-    existing simulation m_mpi_proxy.fpp lag_params block.  The registry has additional
-    members (T0, Thost, c0, rho0, x0) that are not broadcast (rank-0-only).
+    All registered lag_params members are broadcast.  T0/Thost/c0/rho0/x0 were removed
+    from the Fortran type by upstream #1085/#1093 and are no longer in the registry.
     """
-    # Hardcoded broadcast subset — matches the existing sim m_mpi_proxy exactly.
-    lag_log = ["heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"]
-    lag_int = ["cluster_type", "nBubs_glb", "smooth_type", "solver_approach"]
-    lag_real = ["charwidth", "epsilonb", "valmaxvoid"]
+    # Walk the registry for lag_params members, split by type.
+    lag_log = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("lag_params%") and REGISTRY.all_params[k].param_type == ParamType.LOG)
+    lag_int = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("lag_params%") and REGISTRY.all_params[k].param_type in (ParamType.INT, ParamType.ANALYTIC_INT))
+    lag_real = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("lag_params%") and REGISTRY.all_params[k].param_type in _REAL_TYPES)
     lines.append("        if (bubbles_lagrange) then")
     for mem in sorted(lag_log):
         lines.append(f"            call MPI_BCAST(lag_params%{mem}, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)")

From 3596cc0c6f810dbd35df8f06092c9d1633dcb239 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 10 Jun 2026 19:17:58 -0400
Subject: [PATCH 2/7] src: broadcast wall-velocity BC members in pre and post

bc_x/y/z%{vb1,vb2,vb3,ve1,ve2,ve3} are read from the namelist on rank 0
and consumed on all ranks by s_slip_wall/s_no_slip_wall in
src/common/m_boundary_common.fpp (~833-1155). These routines are compiled
into all three targets and are reached from pre_process (m_data_output,
m_perturbation) and post_process (m_start_up) code paths.

Without the broadcast, non-root ranks use uninitialised values for the
wall-velocity components, producing rank-dependent ghost cells for any
multi-rank wall-BC pre-process or post-process run. The sim residue has
always broadcast these; pre and post were missing them.

Add the matching broadcasts to both pre and post using the same nested
Fypp loop idiom as the simulation residue.
---
 src/post_process/m_mpi_proxy.fpp | 8 ++++++++
 src/pre_process/m_mpi_proxy.fpp  | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp
index d9beac5347..7649747b72 100644
--- a/src/post_process/m_mpi_proxy.fpp
+++ b/src/post_process/m_mpi_proxy.fpp
@@ -85,6 +85,14 @@ contains
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
+        ! wall-velocity members consumed by s_slip_wall/s_no_slip_wall on all ranks
+        #:for DIM in ['x', 'y', 'z']
+            #:for DIR in [1, 2, 3]
+                call MPI_BCAST(bc_${DIM}$%vb${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+                call MPI_BCAST(bc_${DIM}$%ve${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        #:endfor
+
         ! manual: cfl_dt (runtime-computed logical), bc_io (BC-file existence)
         call MPI_BCAST(cfl_dt, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(bc_io, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index bf6e3d1180..d286fd87a4 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -49,6 +49,14 @@ contains
             call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
+        ! wall-velocity members consumed by s_slip_wall/s_no_slip_wall on all ranks
+        #:for DIM in ['x', 'y', 'z']
+            #:for DIR in [1, 2, 3]
+                call MPI_BCAST(bc_${DIM}$%vb${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+                call MPI_BCAST(bc_${DIM}$%ve${DIR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
+            #:endfor
+        #:endfor
+
         ! manual: cfl_dt (runtime-computed logical), bc_io (BC-file existence)
         call MPI_BCAST(cfl_dt, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         call MPI_BCAST(bc_io, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)

From bdd6a7e735ba367cff1bc7af43b1e2df4b2ede74 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 10 Jun 2026 19:23:35 -0400
Subject: [PATCH 3/7] codegen: broadcast muscl_eps and walk the registry for
 struct broadcasts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

muscl_eps was excluded from broadcast generation via _BCAST_EXCLUDE on the
incorrect assumption that it is derived post-broadcast. The derivation
(in m_weno or m_muscl) only fires under f_is_default(muscl_eps), and
default values are assigned on rank 0 only. Every multi-rank MUSCL run
therefore had rank-divergent muscl_eps on non-root ranks. Remove it from
the exclusion set.

Tuple-set delta (var, mpi_type, count) vs. HEAD~:
  sim: +1 entry: (muscl_eps, mpi_p, 1)
  pre: no change
  post: no change

_emit_fluid_pp and _emit_lag_params now walk the registry instead of
maintaining hardcoded member lists. After Commit 1 deregistered the
dead members (mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v for fluid_pp;
T0/Thost/c0/rho0/x0 for lag_params), the registry now matches the
Fortran types exactly. Re(1) count=2 remains sim-only via an explicit
target check with a comment. G is walked as a regular REAL member.

Tests: 3 new tests added — muscl_eps now broadcast in sim, fluid_pp and
lag_params registry walks produce exactly the registered members minus
documented exclusions, dead members absent.
---
 .../mfc/params/generators/fortran_gen.py      | 21 +++--
 .../mfc/params_tests/test_fortran_gen.py      | 83 ++++++++++++++++++-
 2 files changed, 93 insertions(+), 11 deletions(-)

diff --git a/toolchain/mfc/params/generators/fortran_gen.py b/toolchain/mfc/params/generators/fortran_gen.py
index 817a5fa4fe..14e0f472a6 100644
--- a/toolchain/mfc/params/generators/fortran_gen.py
+++ b/toolchain/mfc/params/generators/fortran_gen.py
@@ -235,7 +235,11 @@ def generate_case_opt_decls_fpp() -> str:
 _STRUCT_ROOTS = frozenset({"bc_x", "bc_y", "bc_z", "x_domain", "y_domain", "z_domain", "x_output", "y_output", "z_output"})
 
 # Variables excluded from broadcast generation (derived post-broadcast or non-namelist).
-_BCAST_EXCLUDE = frozenset({"muscl_eps"})
+# muscl_eps was previously excluded here on the assumption that it was derived
+# post-broadcast, but the derivation only fires under f_is_default(muscl_eps),
+# and default values are assigned on rank 0 only.  Every multi-rank MUSCL run
+# therefore had rank-divergent muscl_eps.  Removed from exclusion to fix the bug.
+_BCAST_EXCLUDE: frozenset = frozenset()
 
 # Post-process scalars that are namelist-bound but consumed on rank 0 only (reading/init).
 # Broadcasting them would be harmless but changes the existing call set, which we preserve.
@@ -318,16 +322,19 @@ def _emit_bcast_group(lines: List[str], vars_list: List[str], mpi_type: str) ->
 def _emit_fluid_pp(lines: List[str], target: str) -> None:
     """Emit the fluid_pp(i) member-loop broadcast block.
 
-    Members broadcast: all REAL registry members of fluid_pp (gamma, pi_inf, G, cv, qv,
-    qvp) derived from physical_parameters.  Sim additionally: Re(1) with count=2.
+    Walks the registry for every fluid_pp member, emitting the MPI datatype that
+    matches each member's registered ParamType (the Herschel-Bulkley merge added
+    a LOGICAL member, so the datatype must come from the registry, not be assumed
+    REAL).  Sim additionally broadcasts Re(1) with count=2 (kept sim-only to
+    preserve the historical call set; pre/post never consumed Re).
     mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v were removed from the Fortran type by
     upstream #1085/#1093 and are no longer registered.
     """
-    # Walk the registry for fluid_pp REAL members (Re handled separately; exclude).
-    fp_real_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("fluid_pp(1)%") and not k.startswith("fluid_pp(1)%Re("))
+    fp_members = sorted(k.split("%", 1)[1] for k in REGISTRY.all_params if k.startswith("fluid_pp(1)%") and not k.startswith("fluid_pp(1)%Re("))
     lines.append("        do i = 1, num_fluids_max")
-    for mem in fp_real_members:
-        lines.append(f"            call MPI_BCAST(fluid_pp(i)%{mem}, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)")
+    for mem in fp_members:
+        ptype = REGISTRY.all_params[f"fluid_pp(1)%{mem}"].param_type
+        lines.append(f"            call MPI_BCAST(fluid_pp(i)%{mem}, 1, {_mpi_type_for(ptype)}, 0, MPI_COMM_WORLD, ierr)")
     if target == "sim":
         lines.append("            call MPI_BCAST(fluid_pp(i)%Re(1), 2, mpi_p, 0, MPI_COMM_WORLD, ierr)")
     lines.append("        end do")
diff --git a/toolchain/mfc/params_tests/test_fortran_gen.py b/toolchain/mfc/params_tests/test_fortran_gen.py
index e4fe751f7c..eac0bfccaf 100644
--- a/toolchain/mfc/params_tests/test_fortran_gen.py
+++ b/toolchain/mfc/params_tests/test_fortran_gen.py
@@ -387,8 +387,9 @@ def test_generate_bcast_fpp_case_opt_guard_sim():
     assert "num_fluids" in guard_body
     assert "mapped_weno" in guard_body
 
-    # muscl_eps must NOT appear anywhere (it is excluded — derived post-broadcast)
-    assert "muscl_eps" not in out
+    # muscl_eps IS inside the case-opt guard (it is sim-only, non-CASE_OPT_PARAM,
+    # so it appears in the real-scalars section, which is outside the case-opt block)
+    assert "call MPI_BCAST(muscl_eps, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in out
 
 
 def test_generate_bcast_fpp_case_opt_not_in_pre_post():
@@ -504,8 +505,6 @@ def test_generate_bcast_fpp_excludes_manual_residue():
         assert "m_glb" not in out, f"{target}: m_glb should be manual"
         assert "n_glb" not in out, f"{target}: n_glb should be manual"
         assert "p_glb" not in out, f"{target}: p_glb should be manual"
-        # muscl_eps is excluded (derived post-broadcast)
-        assert "muscl_eps" not in out, f"{target}: muscl_eps should be excluded"
 
     sim = generate_bcast_fpp("sim")
     # shear_stress, bulk_stress, bodyForces are derived (non-namelist)
@@ -530,3 +529,79 @@ def test_generate_bcast_fpp_bad_target():
 
     with pytest.raises(ValueError, match="Unknown target"):
         generate_bcast_fpp("bad")
+
+
+def test_generate_bcast_fpp_muscl_eps_now_broadcast():
+    """muscl_eps is broadcast for sim (latent-bug fix: derivation is rank-0-only).
+
+    Previously excluded via _BCAST_EXCLUDE; every multi-rank MUSCL run had
+    rank-divergent muscl_eps because f_is_default() only fires on rank 0.
+    """
+    from mfc.params.generators.fortran_gen import generate_bcast_fpp
+
+    sim = generate_bcast_fpp("sim")
+    assert "call MPI_BCAST(muscl_eps, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)" in sim
+
+    # muscl_eps is sim-only; must not appear in pre/post
+    pre = generate_bcast_fpp("pre")
+    post = generate_bcast_fpp("post")
+    assert "muscl_eps" not in pre
+    assert "muscl_eps" not in post
+
+
+def test_generate_bcast_fpp_fluid_pp_registry_walk():
+    """fluid_pp emitter walks registry; no dead members (mul0/ss/pv/gamma_v/M_v/mu_v/k_v/cp_v/D_v removed upstream).
+
+    Re(1) count=2 is sim-only; all other registered REAL members appear in all three
+    targets.
+    """
+    from mfc.params.generators.fortran_gen import generate_bcast_fpp
+
+    sim = generate_bcast_fpp("sim")
+    pre = generate_bcast_fpp("pre")
+    post = generate_bcast_fpp("post")
+
+    # All registered members appear in every target
+    for mem in ("gamma", "pi_inf", "cv", "qv", "qvp", "G"):
+        for out, t in [(sim, "sim"), (pre, "pre"), (post, "post")]:
+            assert f"fluid_pp(i)%{mem}" in out, f"{t}: fluid_pp(i)%{mem} missing"
+
+    # Re(1) count=2 is sim-only
+    assert "fluid_pp(i)%Re(1)" in sim
+    assert "fluid_pp(i)%Re(1)" not in pre
+    assert "fluid_pp(i)%Re(1)" not in post
+
+    # Dead members must not appear
+    for dead in ("mul0", "ss", "pv", "gamma_v", "M_v", "mu_v", "k_v", "cp_v", "D_v"):
+        for out, t in [(sim, "sim"), (pre, "pre"), (post, "post")]:
+            assert f"fluid_pp(i)%{dead}" not in out, f"{t}: dead member fluid_pp(i)%{dead} present"
+
+
+def test_generate_bcast_fpp_fluid_pp_member_datatypes():
+    # The HB merge added a LOGICAL fluid_pp member; datatypes must come from the
+    # registry, not be assumed REAL.
+    from mfc.params.generators.fortran_gen import generate_bcast_fpp
+
+    sim = generate_bcast_fpp("sim")
+    assert "call MPI_BCAST(fluid_pp(i)%non_newtonian, 1, MPI_LOGICAL," in sim
+    assert "call MPI_BCAST(fluid_pp(i)%tau0, 1, mpi_p," in sim
+    assert "non_newtonian, 1, mpi_p" not in sim
+
+
+def test_generate_bcast_fpp_lag_params_registry_walk():
+    """lag_params emitter walks registry; no dead members (T0/Thost/c0/rho0/x0 removed upstream)."""
+    from mfc.params.generators.fortran_gen import generate_bcast_fpp
+
+    sim = generate_bcast_fpp("sim")
+
+    # All registered members must appear
+    for mem in ("solver_approach", "cluster_type", "smooth_type", "nBubs_glb"):
+        assert f"lag_params%{mem}" in sim, f"lag_params%{mem} missing from sim"
+    for mem in ("heatTransfer_model", "massTransfer_model", "pressure_corrector", "write_bubbles", "write_bubbles_stats"):
+        assert f"lag_params%{mem}" in sim, f"lag_params%{mem} missing from sim"
+    for mem in ("epsilonb", "charwidth", "valmaxvoid"):
+        assert f"lag_params%{mem}" in sim, f"lag_params%{mem} missing from sim"
+
+    # Dead members must not appear
+    for dead in ("T0", "Thost", "c0", "rho0", "x0"):
+        assert f"lag_params%{dead}" not in sim, f"dead member lag_params%{dead} present in sim"

From c2cd9c58819d06fd73b3cba5a1535dde8ba476b7 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 10 Jun 2026 21:53:57 -0400
Subject: [PATCH 4/7] src: broadcast BC type codes with MPI_INTEGER in
 pre_process; pin residue broadcasts

pre_process sent the integer bc_x/y/z%beg/%end with mpi_p (an 8-byte real transfer over 4-byte integers - undefined behavior that happened to work by adjacency); simulation and post_process already used MPI_INTEGER. Also adds a test pinning the hand-written vb/ve and BC-code residue in pre/post so a merge conflict cannot silently drop them. Both from review.
---
 src/pre_process/m_mpi_proxy.fpp                | 10 +++++++---
 toolchain/mfc/params_tests/test_fortran_gen.py | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp
index d286fd87a4..8e733765f3 100644
--- a/src/pre_process/m_mpi_proxy.fpp
+++ b/src/pre_process/m_mpi_proxy.fpp
@@ -39,16 +39,20 @@ contains
             call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
-        ! manual: bc_x/y/z and domain bounds (registered as struct members, not in NAMELIST_VARS)
+        ! manual: domain bounds and wall temperatures (REAL struct members, not in NAMELIST_VARS)
         #:for VAR in [ 'x_domain%beg', 'x_domain%end', 'y_domain%beg',         &
             & 'y_domain%end', 'z_domain%beg', 'z_domain%end',                   &
-            & 'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end',                  &
-            & 'bc_z%beg', 'bc_z%end', 'bc_x%Twall_in', 'bc_x%Twall_out',       &
+            & 'bc_x%Twall_in', 'bc_x%Twall_out',                                &
             & 'bc_y%Twall_in', 'bc_y%Twall_out', 'bc_z%Twall_in',              &
             & 'bc_z%Twall_out']
             call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr)
         #:endfor
 
+        ! manual: BC type codes (INTEGER struct members)
+        #:for VAR in [ 'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end']
+            call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+        #:endfor
+
         ! wall-velocity members consumed by s_slip_wall/s_no_slip_wall on all ranks
         #:for DIM in ['x', 'y', 'z']
             #:for DIR in [1, 2, 3]
diff --git a/toolchain/mfc/params_tests/test_fortran_gen.py b/toolchain/mfc/params_tests/test_fortran_gen.py
index eac0bfccaf..0dc208608c 100644
--- a/toolchain/mfc/params_tests/test_fortran_gen.py
+++ b/toolchain/mfc/params_tests/test_fortran_gen.py
@@ -605,3 +605,19 @@ def test_generate_bcast_fpp_lag_params_registry_walk():
     # Dead members must not appear
     for dead in ("T0", "Thost", "c0", "rho0", "x0"):
         assert f"lag_params%{dead}" not in sim, f"dead member lag_params%{dead} present in sim"
+
+
+def test_mpi_proxy_residue_pins_wall_velocity_and_bc_datatypes():
+    """The vb/ve wall-velocity broadcasts and integer BC datatypes live in
+    hand-written residue (not codegen); pin them so an edit or merge conflict
+    that drops them fails loudly."""
+    import pathlib
+
+    root = pathlib.Path(__file__).resolve().parents[3]
+    for target in ("pre_process", "post_process"):
+        src = (root / "src" / target / "m_mpi_proxy.fpp").read_text()
+        assert "bc_${DIM}$%vb${DIR}$" in src, f"{target}: vb broadcasts missing"
+        assert "bc_${DIM}$%ve${DIR}$" in src, f"{target}: ve broadcasts missing"
+        assert "'bc_x%beg', 'bc_x%end', 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end']" in src
+        seg = src.split("'bc_z%beg', 'bc_z%end']", 1)[1]
+        assert "MPI_INTEGER" in seg.split("#:endfor")[0], f"{target}: BC codes not MPI_INTEGER"

From a319043096e760cd6550a311ee9c7700e90b23ae Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 10 Jun 2026 21:15:20 -0400
Subject: [PATCH 5/7] src: bundle pre_process initial-condition state into
 ic_context

---
 src/common/m_derived_types.fpp          |  15 +++
 src/pre_process/m_initial_condition.fpp | 121 +++++++++++-------------
 src/pre_process/m_start_up.fpp          |  12 +--
 3 files changed, 75 insertions(+), 73 deletions(-)

diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp
index bfed0f07e4..0c4ea95872 100644
--- a/src/common/m_derived_types.fpp
+++ b/src/common/m_derived_types.fpp
@@ -154,6 +154,21 @@ module m_derived_types
         integer               :: psi      !< Psi variable equation
     end type eqn_idx_info
 
+    !> Initial-condition state assembled by pre_process: working primitive and
+    !> conservative fields, temperature, boundary-condition types, and the
+    !> patch-identity bookkeeping array.
+    type ic_context
+        type(scalar_field), allocatable, dimension(:)    :: q_prim_vf  !< Primitive variables
+        type(scalar_field), allocatable, dimension(:)    :: q_cons_vf  !< Conservative variables
+        type(scalar_field)                               :: q_T_sf     !< Temperature field
+        type(integer_field), allocatable, dimension(:,:) :: bc_type    !< Boundary-condition type fields
+#ifdef MFC_MIXED_PRECISION
+        integer(kind=1), allocatable, dimension(:,:,:) :: patch_id_fp  !< Patch identities bookkeeping
+#else
+        integer, allocatable, dimension(:,:,:) :: patch_id_fp  !< Patch identities bookkeeping
+#endif
+    end type ic_context
+
     type bc_patch_parameters
         integer                :: geometry
         integer                :: type
diff --git a/src/pre_process/m_initial_condition.fpp b/src/pre_process/m_initial_condition.fpp
index 187507ce23..d788bb0a6e 100644
--- a/src/pre_process/m_initial_condition.fpp
+++ b/src/pre_process/m_initial_condition.fpp
@@ -18,20 +18,7 @@ module m_initial_condition
 
     implicit none
 
-    ! NOTE: Abstract interface enables dynamic dispatch without repeated model_eqns checks
-    type(scalar_field), allocatable, dimension(:)    :: q_prim_vf  !< primitive variables
-    type(scalar_field), allocatable, dimension(:)    :: q_cons_vf  !< conservative variables
-    type(scalar_field)                               :: q_T_sf     !< Temperature field
-    type(integer_field), dimension(:,:), allocatable :: bc_type    !< bc_type fields
-    !> @cond
-#ifdef MFC_MIXED_PRECISION
-    integer(kind=1), allocatable, dimension(:,:,:) :: patch_id_fp
-#else
-    !> @endcond
-    integer, allocatable, dimension(:,:,:) :: patch_id_fp
-    !> @cond
-#endif
-    !> @endcond
+    type(ic_context) :: ic  !< Initial-condition state (fields, bc types, patch ids)
 
 contains
 
@@ -40,19 +27,19 @@ contains
 
         integer :: i, j, k, l
 
-        allocate (q_prim_vf(1:sys_size))
-        allocate (q_cons_vf(1:sys_size))
+        allocate (ic%q_prim_vf(1:sys_size))
+        allocate (ic%q_cons_vf(1:sys_size))
 
         do i = 1, sys_size
-            allocate (q_prim_vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,idwbuff(2)%beg:idwbuff(2)%end,idwbuff(3)%beg:idwbuff(3)%end))
-            allocate (q_cons_vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,idwbuff(2)%beg:idwbuff(2)%end,idwbuff(3)%beg:idwbuff(3)%end))
+            allocate (ic%q_prim_vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,idwbuff(2)%beg:idwbuff(2)%end,idwbuff(3)%beg:idwbuff(3)%end))
+            allocate (ic%q_cons_vf(i)%sf(idwbuff(1)%beg:idwbuff(1)%end,idwbuff(2)%beg:idwbuff(2)%end,idwbuff(3)%beg:idwbuff(3)%end))
         end do
 
         if (chemistry) then
-            allocate (q_T_sf%sf(idwbuff(1)%beg:idwbuff(1)%end,idwbuff(2)%beg:idwbuff(2)%end,idwbuff(3)%beg:idwbuff(3)%end))
+            allocate (ic%q_T_sf%sf(idwbuff(1)%beg:idwbuff(1)%end,idwbuff(2)%beg:idwbuff(2)%end,idwbuff(3)%beg:idwbuff(3)%end))
         end if
 
-        allocate (patch_id_fp(0:m,0:n,0:p))
+        allocate (ic%patch_id_fp(0:m,0:n,0:p))
 
         if (qbmm .and. .not. polytropic) then
             allocate (pb%sf(0:m,0:n,0:p,1:nnode,1:nb))
@@ -60,41 +47,41 @@ contains
         end if
 
         do i = 1, sys_size
-            q_cons_vf(i)%sf = -1.e-6_stp  ! real(dflt_real, kind=stp) ! TODO :: remove this magic number
-            q_prim_vf(i)%sf = -1.e-6_stp  ! real(dflt_real, kind=stp)
+            ic%q_cons_vf(i)%sf = -1.e-6_stp  ! real(dflt_real, kind=stp) ! TODO :: remove this magic number
+            ic%q_prim_vf(i)%sf = -1.e-6_stp  ! real(dflt_real, kind=stp)
         end do
 
-        allocate (bc_type(1:num_dims,1:2))
+        allocate (ic%bc_type(1:num_dims,1:2))
 
-        allocate (bc_type(1, 1)%sf(0:0,0:n,0:p))
-        allocate (bc_type(1, 2)%sf(0:0,0:n,0:p))
+        allocate (ic%bc_type(1, 1)%sf(0:0,0:n,0:p))
+        allocate (ic%bc_type(1, 2)%sf(0:0,0:n,0:p))
 
         do l = 0, p
             do k = 0, n
-                bc_type(1, 1)%sf(0, k, l) = int(min(bc_x%beg, 0), kind=1)
-                bc_type(1, 2)%sf(0, k, l) = int(min(bc_x%end, 0), kind=1)
+                ic%bc_type(1, 1)%sf(0, k, l) = int(min(bc_x%beg, 0), kind=1)
+                ic%bc_type(1, 2)%sf(0, k, l) = int(min(bc_x%end, 0), kind=1)
             end do
         end do
 
         if (n > 0) then
-            allocate (bc_type(2, 1)%sf(-buff_size:m + buff_size,0:0,0:p))
-            allocate (bc_type(2, 2)%sf(-buff_size:m + buff_size,0:0,0:p))
+            allocate (ic%bc_type(2, 1)%sf(-buff_size:m + buff_size,0:0,0:p))
+            allocate (ic%bc_type(2, 2)%sf(-buff_size:m + buff_size,0:0,0:p))
 
             do l = 0, p
                 do j = -buff_size, m + buff_size
-                    bc_type(2, 1)%sf(j, 0, l) = int(min(bc_y%beg, 0), kind=1)
-                    bc_type(2, 2)%sf(j, 0, l) = int(min(bc_y%end, 0), kind=1)
+                    ic%bc_type(2, 1)%sf(j, 0, l) = int(min(bc_y%beg, 0), kind=1)
+                    ic%bc_type(2, 2)%sf(j, 0, l) = int(min(bc_y%end, 0), kind=1)
                 end do
             end do
 
             if (p > 0) then
-                allocate (bc_type(3, 1)%sf(-buff_size:m + buff_size,-buff_size:n + buff_size,0:0))
-                allocate (bc_type(3, 2)%sf(-buff_size:m + buff_size,-buff_size:n + buff_size,0:0))
+                allocate (ic%bc_type(3, 1)%sf(-buff_size:m + buff_size,-buff_size:n + buff_size,0:0))
+                allocate (ic%bc_type(3, 2)%sf(-buff_size:m + buff_size,-buff_size:n + buff_size,0:0))
 
                 do k = -buff_size, n + buff_size
                     do j = -buff_size, m + buff_size
-                        bc_type(3, 1)%sf(j, k, 0) = int(min(bc_z%beg, 0), kind=1)
-                        bc_type(3, 2)%sf(j, k, 0) = int(min(bc_z%end, 0), kind=1)
+                        ic%bc_type(3, 1)%sf(j, k, 0) = int(min(bc_z%beg, 0), kind=1)
+                        ic%bc_type(3, 2)%sf(j, k, 0) = int(min(bc_z%end, 0), kind=1)
                     end do
                 end do
             end if
@@ -102,20 +89,20 @@ contains
 
         ! Initial damage state is always zero
         if (cont_damage) then
-            q_cons_vf(eqn_idx%damage)%sf = 0._wp
-            q_prim_vf(eqn_idx%damage)%sf = 0._wp
+            ic%q_cons_vf(eqn_idx%damage)%sf = 0._wp
+            ic%q_prim_vf(eqn_idx%damage)%sf = 0._wp
         end if
 
         ! Initial hyper_cleaning state is always zero TODO more general
         if (hyper_cleaning) then
-            q_cons_vf(eqn_idx%psi)%sf = 0._wp
-            q_prim_vf(eqn_idx%psi)%sf = 0._wp
+            ic%q_cons_vf(eqn_idx%psi)%sf = 0._wp
+            ic%q_prim_vf(eqn_idx%psi)%sf = 0._wp
         end if
 
         ! Setting default values for patch identities bookkeeping variable. This is necessary to avoid any confusion in the
         ! assessment of the extent of application that the overwrite permissions give a patch when it is being applied in the
         ! domain.
-        patch_id_fp = 0
+        ic%patch_id_fp = 0
 
     end subroutine s_initialize_initial_condition_module
 
@@ -127,31 +114,31 @@ contains
         integer :: i
 
         if (old_ic) then
-            call s_convert_conservative_to_primitive_variables(q_cons_vf, q_T_sf, q_prim_vf, idwbuff)
+            call s_convert_conservative_to_primitive_variables(ic%q_cons_vf, ic%q_T_sf, ic%q_prim_vf, idwbuff)
         end if
 
-        call s_apply_icpp_patches(patch_id_fp, q_prim_vf)
+        call s_apply_icpp_patches(ic%patch_id_fp, ic%q_prim_vf)
 
-        if (num_bc_patches > 0) call s_apply_boundary_patches(q_prim_vf, bc_type)
+        if (num_bc_patches > 0) call s_apply_boundary_patches(ic%q_prim_vf, ic%bc_type)
 
-        if (perturb_flow) call s_perturb_surrounding_flow(q_prim_vf)
-        if (perturb_sph) call s_perturb_sphere(q_prim_vf)
-        if (mixlayer_perturb) call s_perturb_mixlayer(q_prim_vf)
-        if (simplex_perturb) call s_perturb_simplex(q_prim_vf)
-        if (chemistry) call s_compute_T_from_primitives(q_T_sf, q_prim_vf, idwint)
+        if (perturb_flow) call s_perturb_surrounding_flow(ic%q_prim_vf)
+        if (perturb_sph) call s_perturb_sphere(ic%q_prim_vf)
+        if (mixlayer_perturb) call s_perturb_mixlayer(ic%q_prim_vf)
+        if (simplex_perturb) call s_perturb_simplex(ic%q_prim_vf)
+        if (chemistry) call s_compute_T_from_primitives(ic%q_T_sf, ic%q_prim_vf, idwint)
 
         if (elliptic_smoothing .and. chemistry) then
-            call s_elliptic_smoothing(q_prim_vf, bc_type, q_T_sf)
-            call s_compute_T_from_primitives(q_T_sf, q_prim_vf, idwint)
+            call s_elliptic_smoothing(ic%q_prim_vf, ic%bc_type, ic%q_T_sf)
+            call s_compute_T_from_primitives(ic%q_T_sf, ic%q_prim_vf, idwint)
         else if (elliptic_smoothing) then
-            call s_elliptic_smoothing(q_prim_vf, bc_type)
+            call s_elliptic_smoothing(ic%q_prim_vf, ic%bc_type)
         end if
 
-        call s_convert_primitive_to_conservative_variables(q_prim_vf, q_cons_vf)
+        call s_convert_primitive_to_conservative_variables(ic%q_prim_vf, ic%q_cons_vf)
 
         if (qbmm .and. .not. polytropic) then
-            call s_initialize_mv(q_cons_vf, mv%sf)
-            call s_initialize_pb(q_cons_vf, mv%sf, pb%sf)
+            call s_initialize_mv(ic%q_cons_vf, mv%sf)
+            call s_initialize_pb(ic%q_cons_vf, mv%sf, pb%sf)
         end if
 
     end subroutine s_generate_initial_condition
@@ -162,33 +149,33 @@ contains
         integer :: i
 
         do i = 1, sys_size
-            deallocate (q_prim_vf(i)%sf)
-            deallocate (q_cons_vf(i)%sf)
+            deallocate (ic%q_prim_vf(i)%sf)
+            deallocate (ic%q_cons_vf(i)%sf)
         end do
 
-        deallocate (q_prim_vf)
-        deallocate (q_cons_vf)
+        deallocate (ic%q_prim_vf)
+        deallocate (ic%q_cons_vf)
 
         if (chemistry) then
-            deallocate (q_T_sf%sf)
+            deallocate (ic%q_T_sf%sf)
         end if
 
-        deallocate (patch_id_fp)
+        deallocate (ic%patch_id_fp)
 
-        deallocate (bc_type(1, 1)%sf)
-        deallocate (bc_type(1, 2)%sf)
+        deallocate (ic%bc_type(1, 1)%sf)
+        deallocate (ic%bc_type(1, 2)%sf)
 
         if (n > 0) then
-            deallocate (bc_type(2, 1)%sf)
-            deallocate (bc_type(2, 2)%sf)
+            deallocate (ic%bc_type(2, 1)%sf)
+            deallocate (ic%bc_type(2, 2)%sf)
         end if
 
         if (p > 0) then
-            deallocate (bc_type(3, 1)%sf)
-            deallocate (bc_type(3, 2)%sf)
+            deallocate (ic%bc_type(3, 1)%sf)
+            deallocate (ic%bc_type(3, 2)%sf)
         end if
 
-        deallocate (bc_type)
+        deallocate (ic%bc_type)
 
     end subroutine s_finalize_initial_condition_module
 
diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp
index 7fabdc796a..bec4585e38 100644
--- a/src/pre_process/m_start_up.fpp
+++ b/src/pre_process/m_start_up.fpp
@@ -531,7 +531,7 @@ contains
 
         call cpu_time(start)
 
-        if (old_ic) call s_read_ic_data_files(q_cons_vf)
+        if (old_ic) call s_read_ic_data_files(ic%q_cons_vf)
 
         call s_generate_initial_condition()
 
@@ -544,8 +544,8 @@ contains
                         r2 = x_cc(j)**2
                         if (n > 0) r2 = r2 + y_cc(k)**2
                         if (p > 0) r2 = r2 + z_cc(l)**2
-                        q_cons_vf(eqn_idx%psi)%sf(j, k, l) = 1.0e-2_wp*exp(-r2/(2.0_wp*0.05_wp**2))
-                        q_prim_vf(eqn_idx%psi)%sf(j, k, l) = q_cons_vf(eqn_idx%psi)%sf(j, k, l)
+                        ic%q_cons_vf(eqn_idx%psi)%sf(j, k, l) = 1.0e-2_wp*exp(-r2/(2.0_wp*0.05_wp**2))
+                        ic%q_prim_vf(eqn_idx%psi)%sf(j, k, l) = ic%q_cons_vf(eqn_idx%psi)%sf(j, k, l)
                     end do
                 end do
             end do
@@ -556,13 +556,13 @@ contains
                 print *, 'initial condition might have been altered due to enforcement of pTg-equilibrium (relax = "T" activated)'
             end if
 
-            call s_infinite_relaxation_k(q_cons_vf)
+            call s_infinite_relaxation_k(ic%q_cons_vf)
         end if
 
         if (chemistry) then
-            call s_write_data_files(q_cons_vf, q_prim_vf, bc_type, q_T_sf)
+            call s_write_data_files(ic%q_cons_vf, ic%q_prim_vf, ic%bc_type, ic%q_T_sf)
         else
-            call s_write_data_files(q_cons_vf, q_prim_vf, bc_type)
+            call s_write_data_files(ic%q_cons_vf, ic%q_prim_vf, ic%bc_type)
         end if
 
         call cpu_time(finish)

From f2a8bf7be6d5ace4d609a13c98fb3db58cad0326 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 10 Jun 2026 21:19:44 -0400
Subject: [PATCH 6/7] src: unify the boundary-condition dispatcher across
 directions

---
 src/common/m_boundary_common.fpp | 257 +++++++++----------------------
 1 file changed, 76 insertions(+), 181 deletions(-)

diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index 6eb5383f75..f008659fa7 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -79,137 +79,18 @@ contains
         type(scalar_field), dimension(sys_size), intent(inout)                                               :: q_prim_vf
         real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
         type(integer_field), dimension(1:num_dims,1:2), intent(in)                                           :: bc_type
-        integer                                                                                              :: k, l
         type(scalar_field), optional, intent(inout)                                                          :: q_T_sf
 
-        ! BC type codes defined in m_constants.fpp; non-negative values are MPI boundaries
-
-        if (bc_x%beg >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, -1, sys_size, pb_in, mv_in, q_T_sf)
-        else
-            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-            do l = 0, p
-                do k = 0, n
-                    select case (int(bc_type(1, 1)%sf(0, k, l)))
-                    case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                        call s_ghost_cell_extrapolation(q_prim_vf, 1, -1, k, l, q_T_sf)
-                    case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, 1, -1, k, l, pb_in, mv_in, q_T_sf)
-                    case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, 1, -1, k, l, pb_in, mv_in, q_T_sf)
-                    case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, 1, -1, k, l, q_T_sf)
-                    case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, 1, -1, k, l, q_T_sf)
-                    case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, 1, -1, k, l, q_T_sf)
-                    end select
-
-                    if (qbmm .and. (.not. polytropic) .and. present(pb_in) .and. present(mv_in) .and. (bc_type(1, 1)%sf(0, k, &
-                        & l) <= BC_GHOST_EXTRAP)) then
-                        call s_qbmm_extrapolation(1, -1, k, l, pb_in, mv_in)
-                    end if
-                end do
-            end do
-            $:END_GPU_PARALLEL_LOOP()
-        end if
-
-        if (bc_x%end >= 0) then
-            call s_mpi_sendrecv_variables_buffers(q_prim_vf, 1, 1, sys_size, pb_in, mv_in, q_T_sf)
-        else
-            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-            do l = 0, p
-                do k = 0, n
-                    select case (int(bc_type(1, 2)%sf(0, k, l)))
-                    case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)  ! Ghost-cell extrap. BC at end
-                        call s_ghost_cell_extrapolation(q_prim_vf, 1, 1, k, l, q_T_sf)
-                    case (BC_REFLECTIVE)
-                        call s_symmetry(q_prim_vf, 1, 1, k, l, pb_in, mv_in, q_T_sf)
-                    case (BC_PERIODIC)
-                        call s_periodic(q_prim_vf, 1, 1, k, l, pb_in, mv_in, q_T_sf)
-                    case (BC_SLIP_WALL)
-                        call s_slip_wall(q_prim_vf, 1, 1, k, l, q_T_sf)
-                    case (BC_NO_SLIP_WALL)
-                        call s_no_slip_wall(q_prim_vf, 1, 1, k, l, q_T_sf)
-                    case (BC_DIRICHLET)
-                        call s_dirichlet(q_prim_vf, 1, 1, k, l, q_T_sf)
-                    end select
-
-                    if (qbmm .and. (.not. polytropic) .and. present(pb_in) .and. present(mv_in) .and. (bc_type(1, 2)%sf(0, k, &
-                        & l) <= BC_GHOST_EXTRAP)) then
-                        call s_qbmm_extrapolation(1, 1, k, l, pb_in, mv_in)
-                    end if
-                end do
-            end do
-            $:END_GPU_PARALLEL_LOOP()
-        end if
+        call s_populate_bc_direction(1, -1, bc_x, bc_type(1, 1), q_prim_vf, pb_in, mv_in, q_T_sf)
+        call s_populate_bc_direction(1, 1, bc_x, bc_type(1, 2), q_prim_vf, pb_in, mv_in, q_T_sf)
 
         ! Population of Buffers in y-direction
 
         if (n == 0) return
 
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
-            if (bc_y%beg >= 0) then
-                call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in, q_T_sf)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = 0, p
-                    do k = -buff_size, m + buff_size
-                        select case (int(bc_type(2, 1)%sf(k, 0, l)))
-                        case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                            call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l, q_T_sf)
-                        case (BC_AXIS)
-                            call s_axis(q_prim_vf, pb_in, mv_in, k, l)
-                        case (BC_REFLECTIVE)
-                            call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_PERIODIC)
-                            call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_SLIP_WALL)
-                            call s_slip_wall(q_prim_vf, 2, -1, k, l, q_T_sf)
-                        case (BC_NO_SLIP_WALL)
-                            call s_no_slip_wall(q_prim_vf, 2, -1, k, l, q_T_sf)
-                        case (BC_DIRICHLET)
-                            call s_dirichlet(q_prim_vf, 2, -1, k, l, q_T_sf)
-                        end select
-
-                        if (qbmm .and. (.not. polytropic) .and. present(pb_in) .and. present(mv_in) .and. (bc_type(2, 1)%sf(k, 0, &
-                            & l) <= BC_GHOST_EXTRAP) .and. (bc_type(2, 1)%sf(k, 0, l) /= BC_AXIS)) then
-                            call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in)
-                        end if
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-
-            if (bc_y%end >= 0) then
-                call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in, q_T_sf)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = 0, p
-                    do k = -buff_size, m + buff_size
-                        select case (int(bc_type(2, 2)%sf(k, 0, l)))
-                        case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                            call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l, q_T_sf)
-                        case (BC_REFLECTIVE)
-                            call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_PERIODIC)
-                            call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_SLIP_WALL)
-                            call s_slip_wall(q_prim_vf, 2, 1, k, l, q_T_sf)
-                        case (BC_NO_SLIP_WALL)
-                            call s_no_slip_wall(q_prim_vf, 2, 1, k, l, q_T_sf)
-                        case (BC_DIRICHLET)
-                            call s_dirichlet(q_prim_vf, 2, 1, k, l, q_T_sf)
-                        end select
-
-                        if (qbmm .and. (.not. polytropic) .and. present(pb_in) .and. present(mv_in) .and. (bc_type(2, 2)%sf(k, 0, &
-                            & l) <= BC_GHOST_EXTRAP)) then
-                            call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in)
-                        end if
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
+            call s_populate_bc_direction(2, -1, bc_y, bc_type(2, 1), q_prim_vf, pb_in, mv_in, q_T_sf)
+            call s_populate_bc_direction(2, 1, bc_y, bc_type(2, 2), q_prim_vf, pb_in, mv_in, q_T_sf)
         #:endif
 
         ! Population of Buffers in z-direction
@@ -217,68 +98,82 @@ contains
         if (p == 0) return
 
         #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
-            if (bc_z%beg >= 0) then
-                call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, -1, sys_size, pb_in, mv_in, q_T_sf)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = -buff_size, n + buff_size
-                    do k = -buff_size, m + buff_size
-                        select case (int(bc_type(3, 1)%sf(k, l, 0)))
-                        case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                            call s_ghost_cell_extrapolation(q_prim_vf, 3, -1, k, l, q_T_sf)
-                        case (BC_REFLECTIVE)
-                            call s_symmetry(q_prim_vf, 3, -1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_PERIODIC)
-                            call s_periodic(q_prim_vf, 3, -1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_SLIP_WALL)
-                            call s_slip_wall(q_prim_vf, 3, -1, k, l, q_T_sf)
-                        case (BC_NO_SLIP_WALL)
-                            call s_no_slip_wall(q_prim_vf, 3, -1, k, l, q_T_sf)
-                        case (BC_DIRICHLET)
-                            call s_dirichlet(q_prim_vf, 3, -1, k, l, q_T_sf)
-                        end select
+            call s_populate_bc_direction(3, -1, bc_z, bc_type(3, 1), q_prim_vf, pb_in, mv_in, q_T_sf)
+            call s_populate_bc_direction(3, 1, bc_z, bc_type(3, 2), q_prim_vf, pb_in, mv_in, q_T_sf)
+        #:endif
 
-                        if (qbmm .and. (.not. polytropic) .and. present(pb_in) .and. present(mv_in) .and. (bc_type(3, 1)%sf(k, l, &
-                            & 0) <= BC_GHOST_EXTRAP)) then
-                            call s_qbmm_extrapolation(3, -1, k, l, pb_in, mv_in)
-                        end if
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
+    end subroutine s_populate_variables_buffers
 
-            if (bc_z%end >= 0) then
-                call s_mpi_sendrecv_variables_buffers(q_prim_vf, 3, 1, sys_size, pb_in, mv_in, q_T_sf)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = -buff_size, n + buff_size
-                    do k = -buff_size, m + buff_size
-                        select case (int(bc_type(3, 2)%sf(k, l, 0)))
-                        case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
-                            call s_ghost_cell_extrapolation(q_prim_vf, 3, 1, k, l, q_T_sf)
-                        case (BC_REFLECTIVE)
-                            call s_symmetry(q_prim_vf, 3, 1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_PERIODIC)
-                            call s_periodic(q_prim_vf, 3, 1, k, l, pb_in, mv_in, q_T_sf)
-                        case (BC_SlIP_WALL)
-                            call s_slip_wall(q_prim_vf, 3, 1, k, l, q_T_sf)
-                        case (BC_NO_SLIP_WALL)
-                            call s_no_slip_wall(q_prim_vf, 3, 1, k, l, q_T_sf)
-                        case (BC_DIRICHLET)
-                            call s_dirichlet(q_prim_vf, 3, 1, k, l, q_T_sf)
-                        end select
+    !> Populate the variable buffers along one direction and location, via MPI exchange for processor boundaries or by dispatching
+    !! the per-cell BC routines over the boundary face.
+    impure subroutine s_populate_bc_direction(bc_dir, bc_loc, bc_bounds, bc_type_edge, q_prim_vf, pb_in, mv_in, q_T_sf)
 
-                        if (qbmm .and. (.not. polytropic) .and. present(pb_in) .and. present(mv_in) .and. (bc_type(3, 2)%sf(k, l, &
-                            & 0) <= BC_GHOST_EXTRAP)) then
-                            call s_qbmm_extrapolation(3, 1, k, l, pb_in, mv_in)
-                        end if
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-        #:endif
+        integer, intent(in) :: bc_dir, bc_loc
+        type(int_bounds_info), intent(in) :: bc_bounds
+        type(integer_field), intent(in) :: bc_type_edge
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+        real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
+        type(scalar_field), optional, intent(inout) :: q_T_sf
+        integer :: bc_edge, k_beg, k_end, l_beg, l_end
+        integer :: bc_code, k, l
 
-    end subroutine s_populate_variables_buffers
+        if (bc_loc == -1) then
+            bc_edge = bc_bounds%beg
+        else
+            bc_edge = bc_bounds%end
+        end if
+
+        ! BC type codes defined in m_constants.fpp; non-negative values are MPI boundaries
+        if (bc_edge >= 0) then
+            call s_mpi_sendrecv_variables_buffers(q_prim_vf, bc_dir, bc_loc, sys_size, pb_in, mv_in, q_T_sf)
+            return
+        end if
+
+        if (bc_dir == 1) then
+            k_beg = 0; k_end = n; l_beg = 0; l_end = p
+        else if (bc_dir == 2) then
+            k_beg = -buff_size; k_end = m + buff_size; l_beg = 0; l_end = p
+        else
+            k_beg = -buff_size; k_end = m + buff_size; l_beg = -buff_size; l_end = n + buff_size
+        end if
+
+        $:GPU_PARALLEL_LOOP(private='[l, k, bc_code]', collapse=2)
+        do l = l_beg, l_end
+            do k = k_beg, k_end
+                if (bc_dir == 1) then
+                    bc_code = int(bc_type_edge%sf(0, k, l))
+                else if (bc_dir == 2) then
+                    bc_code = int(bc_type_edge%sf(k, 0, l))
+                else
+                    bc_code = int(bc_type_edge%sf(k, l, 0))
+                end if
+
+                select case (bc_code)
+                case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP)
+                    call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+                case (BC_AXIS)
+                    if (bc_dir == 2 .and. bc_loc == -1) call s_axis(q_prim_vf, pb_in, mv_in, k, l)
+                case (BC_REFLECTIVE)
+                    call s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in, q_T_sf)
+                case (BC_PERIODIC)
+                    call s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in, q_T_sf)
+                case (BC_SLIP_WALL)
+                    call s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+                case (BC_NO_SLIP_WALL)
+                    call s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+                case (BC_DIRICHLET)
+                    call s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+                end select
+
+                if (qbmm .and. (.not. polytropic) .and. present(pb_in) .and. present(mv_in) .and. (bc_code <= BC_GHOST_EXTRAP) &
+                    & .and. .not. (bc_dir == 2 .and. bc_loc == -1 .and. bc_code == BC_AXIS)) then
+                    call s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb_in, mv_in)
+                end if
+            end do
+        end do
+        $:END_GPU_PARALLEL_LOOP()
+
+    end subroutine s_populate_bc_direction
 
     !> Fill ghost cells by copying the nearest boundary cell value along the specified direction.
     subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)

From 2eff838d81110e5c150a95202bfeee787d321ff2 Mon Sep 17 00:00:00 2001
From: Spencer Bryngelson <sbryngelson@gmail.com>
Date: Wed, 10 Jun 2026 21:50:44 -0400
Subject: [PATCH 7/7] src: split m_boundary_common into dispatcher, primitives,
 and io modules

---
 docs/module_categories.json          |    2 +
 src/common/m_boundary_common.fpp     | 2060 +-------------------------
 src/common/m_boundary_io.fpp         | 1035 +++++++++++++
 src/common/m_boundary_primitives.fpp | 1063 +++++++++++++
 4 files changed, 2102 insertions(+), 2058 deletions(-)
 create mode 100644 src/common/m_boundary_io.fpp
 create mode 100644 src/common/m_boundary_primitives.fpp

diff --git a/docs/module_categories.json b/docs/module_categories.json
index f8ba379f2d..6734de2c53 100644
--- a/docs/module_categories.json
+++ b/docs/module_categories.json
@@ -38,6 +38,8 @@
             "m_cbc",
             "m_compute_cbc",
             "m_boundary_common",
+            "m_boundary_primitives",
+            "m_boundary_io",
             "m_ibm",
             "m_particle_cloud",
             "m_igr",
diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp
index f008659fa7..575e2fa1fb 100644
--- a/src/common/m_boundary_common.fpp
+++ b/src/common/m_boundary_common.fpp
@@ -12,19 +12,11 @@ module m_boundary_common
     use m_global_parameters
     use m_mpi_proxy
     use m_constants
-    use m_delay_file_access
-    use m_compile_specific
+    use m_boundary_primitives
+    use m_boundary_io
 
     implicit none
 
-    type(scalar_field), dimension(:,:), allocatable :: bc_buffers
-    $:GPU_DECLARE(create='[bc_buffers]')
-
-#ifdef MFC_MPI
-    integer, dimension(1:3,1:2) :: MPI_BC_TYPE_TYPE
-    integer, dimension(1:3,1:2) :: MPI_BC_BUFFER_TYPE
-#endif
-
     private; public :: s_initialize_boundary_common_module, s_populate_variables_buffers, s_create_mpi_types, &
         & s_populate_capillary_buffers, s_populate_F_igr_buffers, s_write_serial_boundary_condition_files, &
         & s_write_parallel_boundary_condition_files, s_read_serial_boundary_condition_files, &
@@ -175,2054 +167,6 @@ contains
 
     end subroutine s_populate_bc_direction
 
-    !> Fill ghost cells by copying the nearest boundary cell value along the specified direction.
-    subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
-
-        $:GPU_ROUTINE(function_name='s_ghost_cell_extrapolation', parallelism='[seq]', cray_inline=True)
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        integer, intent(in)                                    :: bc_dir, bc_loc
-        integer, intent(in)                                    :: k, l
-        integer                                                :: j, i
-        type(scalar_field), optional, intent(inout)            :: q_T_sf
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  ! bc_x%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(0, k, l)
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(-j, k, l) = q_T_sf%sf(0, k, l)
-                    end do
-                end if
-            else  !< bc_x%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m, k, l)
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m, k, l)
-                    end do
-                end if
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, 0, l)
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, 0, l)
-                    end do
-                end if
-            else  !< bc_y%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n, l)
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n, l)
-                    end do
-                end if
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, 0)
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, 0)
-                    end do
-                end if
-            else  !< bc_z%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p)
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p)
-                    end do
-                end if
-            end if
-        end if
-
-    end subroutine s_ghost_cell_extrapolation
-
-    !> Apply reflective (symmetry) boundary conditions by mirroring primitive variables and flipping the normal velocity component.
-    subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in, q_T_sf)
-
-        $:GPU_ROUTINE(parallelism='[seq]')
-        type(scalar_field), dimension(sys_size), intent(inout)                                               :: q_prim_vf
-        real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
-        integer, intent(in)                                                                                  :: bc_dir, bc_loc
-        integer, intent(in)                                                                                  :: k, l
-        integer                                                                                              :: j, q, i
-        type(scalar_field), optional, intent(inout)                                                          :: q_T_sf
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  !< bc_x%beg
-                do j = 1, buff_size
-                    do i = 1, eqn_idx%cont%end
-                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(j - 1, k, l)
-                    end do
-
-                    q_prim_vf(eqn_idx%mom%beg)%sf(-j, k, l) = -q_prim_vf(eqn_idx%mom%beg)%sf(j - 1, k, l)
-
-                    do i = eqn_idx%mom%beg + 1, sys_size
-                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(j - 1, k, l)
-                    end do
-
-                    if (chemistry .and. present(q_T_sf)) then
-                        q_T_sf%sf(-j, k, l) = q_T_sf%sf(j - 1, k, l)
-                    end if
-
-                    if (elasticity) then
-                        do i = 1, shear_BC_flip_num
-                            q_prim_vf(shear_BC_flip_indices(1, i))%sf(-j, k, l) = -q_prim_vf(shear_BC_flip_indices(1, &
-                                      & i))%sf(j - 1, k, l)
-                        end do
-                    end if
-
-                    if (hyperelasticity) then
-                        q_prim_vf(eqn_idx%xi%beg)%sf(-j, k, l) = -q_prim_vf(eqn_idx%xi%beg)%sf(j - 1, k, l)
-                    end if
-                end do
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(-j, k, l, q, i) = pb_in(j - 1, k, l, q, i)
-                                mv_in(-j, k, l, q, i) = mv_in(j - 1, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            else  !< bc_x%end
-                do j = 1, buff_size
-                    do i = 1, eqn_idx%cont%end
-                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l)
-                    end do
-
-                    q_prim_vf(eqn_idx%mom%beg)%sf(m + j, k, l) = -q_prim_vf(eqn_idx%mom%beg)%sf(m - (j - 1), k, l)
-
-                    do i = eqn_idx%mom%beg + 1, sys_size
-                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l)
-                    end do
-
-                    if (chemistry .and. present(q_T_sf)) then
-                        q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m - (j - 1), k, l)
-                    end if
-
-                    if (elasticity) then
-                        do i = 1, shear_BC_flip_num
-                            q_prim_vf(shear_BC_flip_indices(1, i))%sf(m + j, k, l) = -q_prim_vf(shear_BC_flip_indices(1, &
-                                      & i))%sf(m - (j - 1), k, l)
-                        end do
-                    end if
-
-                    if (hyperelasticity) then
-                        q_prim_vf(eqn_idx%xi%beg)%sf(m + j, k, l) = -q_prim_vf(eqn_idx%xi%beg)%sf(m - (j - 1), k, l)
-                    end if
-                end do
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(m + j, k, l, q, i) = pb_in(m - (j - 1), k, l, q, i)
-                                mv_in(m + j, k, l, q, i) = mv_in(m - (j - 1), k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do j = 1, buff_size
-                    do i = 1, eqn_idx%mom%beg
-                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l)
-                    end do
-
-                    q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, j - 1, l)
-
-                    do i = eqn_idx%mom%beg + 2, sys_size
-                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l)
-                    end do
-
-                    if (chemistry .and. present(q_T_sf)) then
-                        q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, j - 1, l)
-                    end if
-
-                    if (elasticity) then
-                        do i = 1, shear_BC_flip_num
-                            q_prim_vf(shear_BC_flip_indices(2, i))%sf(k, -j, l) = -q_prim_vf(shear_BC_flip_indices(2, i))%sf(k, &
-                                      & j - 1, l)
-                        end do
-                    end if
-
-                    if (hyperelasticity) then
-                        q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, j - 1, l)
-                    end if
-                end do
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, -j, l, q, i) = pb_in(k, j - 1, l, q, i)
-                                mv_in(k, -j, l, q, i) = mv_in(k, j - 1, l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            else  !< bc_y%end
-                do j = 1, buff_size
-                    do i = 1, eqn_idx%mom%beg
-                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l)
-                    end do
-
-                    q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, n + j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, n - (j - 1), l)
-
-                    do i = eqn_idx%mom%beg + 2, sys_size
-                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l)
-                    end do
-
-                    if (chemistry .and. present(q_T_sf)) then
-                        q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n - (j - 1), l)
-                    end if
-
-                    if (elasticity) then
-                        do i = 1, shear_BC_flip_num
-                            q_prim_vf(shear_BC_flip_indices(2, i))%sf(k, n + j, l) = -q_prim_vf(shear_BC_flip_indices(2, &
-                                      & i))%sf(k, n - (j - 1), l)
-                        end do
-                    end if
-
-                    if (hyperelasticity) then
-                        q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, n + j, l) = -q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, n - (j - 1), l)
-                    end if
-                end do
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, n + j, l, q, i) = pb_in(k, n - (j - 1), l, q, i)
-                                mv_in(k, n + j, l, q, i) = mv_in(k, n - (j - 1), l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do j = 1, buff_size
-                    do i = 1, eqn_idx%mom%beg + 1
-                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, j - 1)
-                    end do
-
-                    q_prim_vf(eqn_idx%mom%end)%sf(k, l, -j) = -q_prim_vf(eqn_idx%mom%end)%sf(k, l, j - 1)
-
-                    do i = eqn_idx%E, sys_size
-                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, j - 1)
-                    end do
-
-                    if (chemistry .and. present(q_T_sf)) then
-                        q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, j - 1)
-                    end if
-
-                    if (elasticity) then
-                        do i = 1, shear_BC_flip_num
-                            q_prim_vf(shear_BC_flip_indices(3, i))%sf(k, l, -j) = -q_prim_vf(shear_BC_flip_indices(3, i))%sf(k, &
-                                      & l, j - 1)
-                        end do
-                    end if
-
-                    if (hyperelasticity) then
-                        q_prim_vf(eqn_idx%xi%end)%sf(k, l, -j) = -q_prim_vf(eqn_idx%xi%end)%sf(k, l, j - 1)
-                    end if
-                end do
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, l, -j, q, i) = pb_in(k, l, j - 1, q, i)
-                                mv_in(k, l, -j, q, i) = mv_in(k, l, j - 1, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            else  !< bc_z%end
-                do j = 1, buff_size
-                    do i = 1, eqn_idx%mom%beg + 1
-                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p - (j - 1))
-                    end do
-
-                    q_prim_vf(eqn_idx%mom%end)%sf(k, l, p + j) = -q_prim_vf(eqn_idx%mom%end)%sf(k, l, p - (j - 1))
-
-                    do i = eqn_idx%E, sys_size
-                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p - (j - 1))
-                    end do
-
-                    if (chemistry .and. present(q_T_sf)) then
-                        q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p - (j - 1))
-                    end if
-
-                    if (elasticity) then
-                        do i = 1, shear_BC_flip_num
-                            q_prim_vf(shear_BC_flip_indices(3, i))%sf(k, l, p + j) = -q_prim_vf(shear_BC_flip_indices(3, &
-                                      & i))%sf(k, l, p - (j - 1))
-                        end do
-                    end if
-
-                    if (hyperelasticity) then
-                        q_prim_vf(eqn_idx%xi%end)%sf(k, l, p + j) = -q_prim_vf(eqn_idx%xi%end)%sf(k, l, p - (j - 1))
-                    end if
-                end do
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, l, p + j, q, i) = pb_in(k, l, p - (j - 1), q, i)
-                                mv_in(k, l, p + j, q, i) = mv_in(k, l, p - (j - 1), q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            end if
-        end if
-
-    end subroutine s_symmetry
-
-    !> Apply periodic boundary conditions by copying values from the opposite domain boundary.
-    subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in, q_T_sf)
-
-        $:GPU_ROUTINE(parallelism='[seq]')
-        type(scalar_field), dimension(sys_size), intent(inout)                                               :: q_prim_vf
-        real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
-        integer, intent(in)                                                                                  :: bc_dir, bc_loc
-        integer, intent(in)                                                                                  :: k, l
-        integer                                                                                              :: j, q, i
-        type(scalar_field), optional, intent(inout)                                                          :: q_T_sf
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  !< bc_x%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l)
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(-j, k, l) = q_T_sf%sf(m - (j - 1), k, l)
-                    end do
-                end if
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(-j, k, l, q, i) = pb_in(m - (j - 1), k, l, q, i)
-                                mv_in(-j, k, l, q, i) = mv_in(m - (j - 1), k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            else  !< bc_x%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(j - 1, k, l)
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(m + j, k, l) = q_T_sf%sf(j - 1, k, l)
-                    end do
-                end if
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(m + j, k, l, q, i) = pb_in(j - 1, k, l, q, i)
-                                mv_in(m + j, k, l, q, i) = mv_in(j - 1, k, l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l)
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, n - (j - 1), l)
-                    end do
-                end if
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, -j, l, q, i) = pb_in(k, n - (j - 1), l, q, i)
-                                mv_in(k, -j, l, q, i) = mv_in(k, n - (j - 1), l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            else  !< bc_y%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, j - 1, l)
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, j - 1, l)
-                    end do
-                end if
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, n + j, l, q, i) = pb_in(k, (j - 1), l, q, i)
-                                mv_in(k, n + j, l, q, i) = mv_in(k, (j - 1), l, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, p - (j - 1))
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, p - (j - 1))
-                    end do
-                end if
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, l, -j, q, i) = pb_in(k, l, p - (j - 1), q, i)
-                                mv_in(k, l, -j, q, i) = mv_in(k, l, p - (j - 1), q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            else  !< bc_z%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, j - 1)
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, j - 1)
-                    end do
-                end if
-
-                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-                    do i = 1, nb
-                        do q = 1, nnode
-                            do j = 1, buff_size
-                                pb_in(k, l, p + j, q, i) = pb_in(k, l, j - 1, q, i)
-                                mv_in(k, l, p + j, q, i) = mv_in(k, l, j - 1, q, i)
-                            end do
-                        end do
-                    end do
-                end if
-            end if
-        end if
-
-    end subroutine s_periodic
-
-    !> Apply axis boundary conditions for cylindrical coordinates by reflecting values across the axis with azimuthal phase shift.
-    subroutine s_axis(q_prim_vf, pb_in, mv_in, k, l)
-
-        $:GPU_ROUTINE(parallelism='[seq]')
-        type(scalar_field), dimension(sys_size), intent(inout)                                               :: q_prim_vf
-        real(stp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), optional, intent(inout) :: pb_in, mv_in
-        integer, intent(in)                                                                                  :: k, l
-        integer                                                                                              :: j, q, i
-
-        do j = 1, buff_size
-            if (z_cc(l) < pi) then
-                do i = 1, eqn_idx%mom%beg
-                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l + ((p + 1)/2))
-                end do
-
-                q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, j - 1, l + ((p + 1)/2))
-
-                q_prim_vf(eqn_idx%mom%end)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%end)%sf(k, j - 1, l + ((p + 1)/2))
-
-                do i = eqn_idx%E, sys_size
-                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l + ((p + 1)/2))
-                end do
-            else
-                do i = 1, eqn_idx%mom%beg
-                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l - ((p + 1)/2))
-                end do
-
-                q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, j - 1, l - ((p + 1)/2))
-
-                q_prim_vf(eqn_idx%mom%end)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%end)%sf(k, j - 1, l - ((p + 1)/2))
-
-                do i = eqn_idx%E, sys_size
-                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l - ((p + 1)/2))
-                end do
-            end if
-        end do
-
-        if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
-            do i = 1, nb
-                do q = 1, nnode
-                    do j = 1, buff_size
-                        if (z_cc(l) < pi) then
-                            pb_in(k, -j, l, q, i) = pb_in(k, j - 1, l + ((p + 1)/2), q, i)
-                            mv_in(k, -j, l, q, i) = mv_in(k, j - 1, l + ((p + 1)/2), q, i)
-                        else
-                            pb_in(k, -j, l, q, i) = pb_in(k, j - 1, l - ((p + 1)/2), q, i)
-                            mv_in(k, -j, l, q, i) = mv_in(k, j - 1, l - ((p + 1)/2), q, i)
-                        end if
-                    end do
-                end do
-            end do
-        end if
-
-    end subroutine s_axis
-
-    !> Apply slip wall boundary conditions by extrapolating scalars and reflecting the wall-normal velocity component.
-    subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
-
-        $:GPU_ROUTINE(function_name='s_slip_wall',parallelism='[seq]', cray_inline=True)
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        integer, intent(in)                                    :: bc_dir, bc_loc
-        integer, intent(in)                                    :: k, l
-        integer                                                :: j, i
-        type(scalar_field), optional, intent(inout)            :: q_T_sf
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  !< bc_x%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb1
-                        else
-                            q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(0, k, l)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_x%isothermal_in) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(-j, k, l) = 2._wp*bc_x%Twall_in - q_T_sf%sf(j - 1, k, l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(-j, k, l) = q_T_sf%sf(0, k, l)
-                        end do
-                    end if
-                end if
-            else  !< bc_x%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve1
-                        else
-                            q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m, k, l)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_x%isothermal_out) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(m + j, k, l) = 2._wp*bc_x%Twall_out - q_T_sf%sf(m - (j - 1), k, l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m, k, l)
-                        end do
-                    end if
-                end if
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg + 1) then
-                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb2
-                        else
-                            q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, 0, l)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_y%isothermal_in) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, -j, l) = 2._wp*bc_y%Twall_in - q_T_sf%sf(k, j - 1, l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, 0, l)
-                        end do
-                    end if
-                end if
-            else  !< bc_y%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg + 1) then
-                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve2
-                        else
-                            q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n, l)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_y%isothermal_out) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, n + j, l) = 2._wp*bc_y%Twall_out - q_T_sf%sf(k, n - (j - 1), l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n, l)
-                        end do
-                    end if
-                end if
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%end) then
-                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb3
-                        else
-                            q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, 0)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_z%isothermal_in) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, -j) = 2._wp*bc_z%Twall_in - q_T_sf%sf(k, l, j - 1)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, 0)
-                        end do
-                    end if
-                end if
-            else  !< bc_z%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%end) then
-                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve3
-                        else
-                            q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_z%isothermal_out) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, p + j) = 2._wp*bc_z%Twall_out - q_T_sf%sf(k, l, p - (j - 1))
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p)
-                        end do
-                    end if
-                end if
-            end if
-        end if
-
-    end subroutine s_slip_wall
-
-    !> Apply no-slip wall boundary conditions by reflecting and negating all velocity components at the wall.
-    subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
-
-        $:GPU_ROUTINE(function_name='s_no_slip_wall',parallelism='[seq]', cray_inline=True)
-
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        integer, intent(in)                                    :: bc_dir, bc_loc
-        integer, intent(in)                                    :: k, l
-        integer                                                :: j, i
-        type(scalar_field), optional, intent(inout)            :: q_T_sf
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  !< bc_x%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb1
-                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
-                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb2
-                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
-                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb3
-                        else
-                            q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(0, k, l)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_x%isothermal_in) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(-j, k, l) = 2._wp*bc_x%Twall_in - q_T_sf%sf(j - 1, k, l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(-j, k, l) = q_T_sf%sf(0, k, l)
-                        end do
-                    end if
-                end if
-            else  !< bc_x%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve1
-                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
-                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve2
-                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
-                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve3
-                        else
-                            q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m, k, l)
-                        end if
-                    end do
-                end do
-
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_x%isothermal_out) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(m + j, k, l) = 2._wp*bc_x%Twall_out - q_T_sf%sf(m - (j - 1), k, l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m, k, l)
-                        end do
-                    end if
-                end if
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb1
-                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
-                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb2
-                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
-                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb3
-                        else
-                            q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, 0, l)
-                        end if
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_y%isothermal_in) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, -j, l) = 2._wp*bc_y%Twall_in - q_T_sf%sf(k, j - 1, l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, 0, l)
-                        end do
-                    end if
-                end if
-            else  !< bc_y%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve1
-                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
-                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve2
-                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
-                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve3
-                        else
-                            q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n, l)
-                        end if
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_y%isothermal_out) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, n + j, l) = 2._wp*bc_y%Twall_out - q_T_sf%sf(k, n - (j - 1), l)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n, l)
-                        end do
-                    end if
-                end if
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb1
-                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
-                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb2
-                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
-                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb3
-                        else
-                            q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, 0)
-                        end if
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_z%isothermal_in) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, -j) = 2._wp*bc_z%Twall_in - q_T_sf%sf(k, l, j - 1)
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, 0)
-                        end do
-                    end if
-                end if
-            else  !< bc_z%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        if (i == eqn_idx%mom%beg) then
-                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve1
-                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
-                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve2
-                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
-                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve3
-                        else
-                            q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p)
-                        end if
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    if (bc_z%isothermal_out) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, p + j) = 2._wp*bc_z%Twall_out - q_T_sf%sf(k, l, p - (j - 1))
-                        end do
-                    else
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p)
-                        end do
-                    end if
-                end if
-            end if
-        end if
-
-    end subroutine s_no_slip_wall
-
-    !> Apply Dirichlet boundary conditions by prescribing ghost cell values from stored boundary buffers.
-    subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
-
-        $:GPU_ROUTINE(function_name='s_dirichlet',parallelism='[seq]', cray_inline=True)
-        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
-        integer, intent(in)                                    :: bc_dir, bc_loc
-        integer, intent(in)                                    :: k, l
-        integer                                                :: j, i
-        type(scalar_field), optional, intent(inout)            :: q_T_sf
-
-#ifdef MFC_SIMULATION
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  ! bc_x%beg
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(-j, k, l) = bc_buffers(1, 1)%sf(i, k, l)
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(-j, k, l) = bc_buffers(1, 1)%sf(sys_size + 1, k, l)
-                    end do
-                end if
-            else  !< bc_x%end
-                do i = 1, sys_size
-                    do j = 1, buff_size
-                        q_prim_vf(i)%sf(m + j, k, l) = bc_buffers(1, 2)%sf(i, k, l)
-                    end do
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    do j = 1, buff_size
-                        q_T_sf%sf(m + j, k, l) = bc_buffers(1, 2)%sf(sys_size + 1, k, l)
-                    end do
-                end if
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
-                if (bc_loc == -1) then  !< bc_y%beg
-                    do i = 1, sys_size
-                        do j = 1, buff_size
-                            q_prim_vf(i)%sf(k, -j, l) = bc_buffers(2, 1)%sf(k, i, l)
-                        end do
-                    end do
-                    if (chemistry .and. present(q_T_sf)) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, -j, l) = bc_buffers(2, 1)%sf(k, sys_size + 1, l)
-                        end do
-                    end if
-                else  !< bc_y%end
-                    do i = 1, sys_size
-                        do j = 1, buff_size
-                            q_prim_vf(i)%sf(k, n + j, l) = bc_buffers(2, 2)%sf(k, i, l)
-                        end do
-                    end do
-                    if (chemistry .and. present(q_T_sf)) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, n + j, l) = bc_buffers(2, 2)%sf(k, sys_size + 1, l)
-                        end do
-                    end if
-                end if
-            #:endif
-        else if (bc_dir == 3) then  !< z-direction
-            #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
-                if (bc_loc == -1) then  !< bc_z%beg
-                    do i = 1, sys_size
-                        do j = 1, buff_size
-                            q_prim_vf(i)%sf(k, l, -j) = bc_buffers(3, 1)%sf(k, l, i)
-                        end do
-                    end do
-                    if (chemistry .and. present(q_T_sf)) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, -j) = bc_buffers(3, 1)%sf(k, l, sys_size + 1)
-                        end do
-                    end if
-                else  !< bc_z%end
-                    do i = 1, sys_size
-                        do j = 1, buff_size
-                            q_prim_vf(i)%sf(k, l, p + j) = bc_buffers(3, 2)%sf(k, l, i)
-                        end do
-                    end do
-                    if (chemistry .and. present(q_T_sf)) then
-                        do j = 1, buff_size
-                            q_T_sf%sf(k, l, p + j) = bc_buffers(3, 2)%sf(k, l, sys_size + 1)
-                        end do
-                    end if
-                end if
-            #:endif
-        end if
-#else
-        call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
-#endif
-
-    end subroutine s_dirichlet
-
-    !> Extrapolate QBMM bubble pressure and mass-vapor variables into ghost cells by copying boundary values.
-    subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb_in, mv_in)
-
-        $:GPU_ROUTINE(parallelism='[seq]')
-        real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
-        integer, intent(in)                                                                                  :: bc_dir, bc_loc
-        integer, intent(in)                                                                                  :: k, l
-        integer                                                                                              :: j, q, i
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  ! bc_x%beg
-                do i = 1, nb
-                    do q = 1, nnode
-                        do j = 1, buff_size
-                            pb_in(-j, k, l, q, i) = pb_in(0, k, l, q, i)
-                            mv_in(-j, k, l, q, i) = mv_in(0, k, l, q, i)
-                        end do
-                    end do
-                end do
-            else  !< bc_x%end
-                do i = 1, nb
-                    do q = 1, nnode
-                        do j = 1, buff_size
-                            pb_in(m + j, k, l, q, i) = pb_in(m, k, l, q, i)
-                            mv_in(m + j, k, l, q, i) = mv_in(m, k, l, q, i)
-                        end do
-                    end do
-                end do
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, nb
-                    do q = 1, nnode
-                        do j = 1, buff_size
-                            pb_in(k, -j, l, q, i) = pb_in(k, 0, l, q, i)
-                            mv_in(k, -j, l, q, i) = mv_in(k, 0, l, q, i)
-                        end do
-                    end do
-                end do
-            else  !< bc_y%end
-                do i = 1, nb
-                    do q = 1, nnode
-                        do j = 1, buff_size
-                            pb_in(k, n + j, l, q, i) = pb_in(k, n, l, q, i)
-                            mv_in(k, n + j, l, q, i) = mv_in(k, n, l, q, i)
-                        end do
-                    end do
-                end do
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, nb
-                    do q = 1, nnode
-                        do j = 1, buff_size
-                            pb_in(k, l, -j, q, i) = pb_in(k, l, 0, q, i)
-                            mv_in(k, l, -j, q, i) = mv_in(k, l, 0, q, i)
-                        end do
-                    end do
-                end do
-            else  !< bc_z%end
-                do i = 1, nb
-                    do q = 1, nnode
-                        do j = 1, buff_size
-                            pb_in(k, l, p + j, q, i) = pb_in(k, l, p, q, i)
-                            mv_in(k, l, p + j, q, i) = mv_in(k, l, p, q, i)
-                        end do
-                    end do
-                end do
-            end if
-        end if
-
-    end subroutine s_qbmm_extrapolation
-
-    !> Populate ghost cell buffers for the color function and its divergence used in capillary surface tension.
-    impure subroutine s_populate_capillary_buffers(c_divs, bc_type, bc)
-
-        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
-        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
-        type(bc_xyz_info), intent(in)                              :: bc
-        integer                                                    :: k, l
-
-        !> x-direction
-
-        if (bc%x%beg >= 0) then
-            call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1)
-        else
-            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-            do l = 0, p
-                do k = 0, n
-                    select case (bc_type(1, 1)%sf(0, k, l))
-                    case (BC_PERIODIC)
-                        call s_color_function_periodic(c_divs, 1, -1, k, l)
-                    case (BC_REFLECTIVE)
-                        call s_color_function_reflective(c_divs, 1, -1, k, l)
-                    case default
-                        call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l)
-                    end select
-                end do
-            end do
-            $:END_GPU_PARALLEL_LOOP()
-        end if
-
-        if (bc%x%end >= 0) then
-            call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1)
-        else
-            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-            do l = 0, p
-                do k = 0, n
-                    select case (bc_type(1, 2)%sf(0, k, l))
-                    case (BC_PERIODIC)
-                        call s_color_function_periodic(c_divs, 1, 1, k, l)
-                    case (BC_REFLECTIVE)
-                        call s_color_function_reflective(c_divs, 1, 1, k, l)
-                    case default
-                        call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l)
-                    end select
-                end do
-            end do
-            $:END_GPU_PARALLEL_LOOP()
-        end if
-
-        if (n == 0) return
-
-        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
-            !> y-direction
-            if (bc%y%beg >= 0) then
-                call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = 0, p
-                    do k = -buff_size, m + buff_size
-                        select case (bc_type(2, 1)%sf(k, 0, l))
-                        case (BC_PERIODIC)
-                            call s_color_function_periodic(c_divs, 2, -1, k, l)
-                        case (BC_REFLECTIVE)
-                            call s_color_function_reflective(c_divs, 2, -1, k, l)
-                        case default
-                            call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l)
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-
-            if (bc%y%end >= 0) then
-                call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = 0, p
-                    do k = -buff_size, m + buff_size
-                        select case (bc_type(2, 2)%sf(k, 0, l))
-                        case (BC_PERIODIC)
-                            call s_color_function_periodic(c_divs, 2, 1, k, l)
-                        case (BC_REFLECTIVE)
-                            call s_color_function_reflective(c_divs, 2, 1, k, l)
-                        case default
-                            call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l)
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-        #:endif
-
-        if (p == 0) return
-
-        #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
-            !> z-direction
-            if (bc%z%beg >= 0) then
-                call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = -buff_size, n + buff_size
-                    do k = -buff_size, m + buff_size
-                        select case (bc_type(3, 1)%sf(k, l, 0))
-                        case (BC_PERIODIC)
-                            call s_color_function_periodic(c_divs, 3, -1, k, l)
-                        case (BC_REFLECTIVE)
-                            call s_color_function_reflective(c_divs, 3, -1, k, l)
-                        case default
-                            call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l)
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-
-            if (bc%z%end >= 0) then
-                call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = -buff_size, n + buff_size
-                    do k = -buff_size, m + buff_size
-                        select case (bc_type(3, 2)%sf(k, l, 0))
-                        case (BC_PERIODIC)
-                            call s_color_function_periodic(c_divs, 3, 1, k, l)
-                        case (BC_REFLECTIVE)
-                            call s_color_function_reflective(c_divs, 3, 1, k, l)
-                        case default
-                            call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l)
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-        #:endif
-
-    end subroutine s_populate_capillary_buffers
-
-    !> Apply periodic boundary conditions to the color function and its divergence fields.
-    subroutine s_color_function_periodic(c_divs, bc_dir, bc_loc, k, l)
-
-        $:GPU_ROUTINE(function_name='s_color_function_periodic', parallelism='[seq]', cray_inline=True)
-        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
-        integer, intent(in)                                        :: bc_dir, bc_loc
-        integer, intent(in)                                        :: k, l
-        integer                                                    :: j, i
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  ! bc_x%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(m - (j - 1), k, l)
-                    end do
-                end do
-            else  !< bc_x%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(j - 1, k, l)
-                    end do
-                end do
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, n - (j - 1), l)
-                    end do
-                end do
-            else  !< bc_y%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, j - 1, l)
-                    end do
-                end do
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, p - (j - 1))
-                    end do
-                end do
-            else  !< bc_z%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, j - 1)
-                    end do
-                end do
-            end if
-        end if
-
-    end subroutine s_color_function_periodic
-
-    !> Apply reflective boundary conditions to the color function and its divergence fields.
-    subroutine s_color_function_reflective(c_divs, bc_dir, bc_loc, k, l)
-
-        $:GPU_ROUTINE(function_name='s_color_function_reflective', parallelism='[seq]', cray_inline=True)
-        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
-        integer, intent(in)                                        :: bc_dir, bc_loc
-        integer, intent(in)                                        :: k, l
-        integer                                                    :: j, i
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  ! bc_x%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        if (i == bc_dir) then
-                            c_divs(i)%sf(-j, k, l) = -c_divs(i)%sf(j - 1, k, l)
-                        else
-                            c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(j - 1, k, l)
-                        end if
-                    end do
-                end do
-            else  !< bc_x%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        if (i == bc_dir) then
-                            c_divs(i)%sf(m + j, k, l) = -c_divs(i)%sf(m - (j - 1), k, l)
-                        else
-                            c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(m - (j - 1), k, l)
-                        end if
-                    end do
-                end do
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        if (i == bc_dir) then
-                            c_divs(i)%sf(k, -j, l) = -c_divs(i)%sf(k, j - 1, l)
-                        else
-                            c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, j - 1, l)
-                        end if
-                    end do
-                end do
-            else  !< bc_y%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        if (i == bc_dir) then
-                            c_divs(i)%sf(k, n + j, l) = -c_divs(i)%sf(k, n - (j - 1), l)
-                        else
-                            c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, n - (j - 1), l)
-                        end if
-                    end do
-                end do
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        if (i == bc_dir) then
-                            c_divs(i)%sf(k, l, -j) = -c_divs(i)%sf(k, l, j - 1)
-                        else
-                            c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, j - 1)
-                        end if
-                    end do
-                end do
-            else  !< bc_z%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        if (i == bc_dir) then
-                            c_divs(i)%sf(k, l, p + j) = -c_divs(i)%sf(k, l, p - (j - 1))
-                        else
-                            c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, p - (j - 1))
-                        end if
-                    end do
-                end do
-            end if
-        end if
-
-    end subroutine s_color_function_reflective
-
-    !> Extrapolate the color function and its divergence into ghost cells by copying boundary values.
-    subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l)
-
-        $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', parallelism='[seq]', cray_inline=True)
-        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
-        integer, intent(in)                                        :: bc_dir, bc_loc
-        integer, intent(in)                                        :: k, l
-        integer                                                    :: j, i
-
-        if (bc_dir == 1) then  !< x-direction
-            if (bc_loc == -1) then  ! bc_x%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(0, k, l)
-                    end do
-                end do
-            else  !< bc_x%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(m, k, l)
-                    end do
-                end do
-            end if
-        else if (bc_dir == 2) then  !< y-direction
-            if (bc_loc == -1) then  !< bc_y%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, 0, l)
-                    end do
-                end do
-            else  !< bc_y%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, n, l)
-                    end do
-                end do
-            end if
-        else if (bc_dir == 3) then  !< z-direction
-            if (bc_loc == -1) then  !< bc_z%beg
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, 0)
-                    end do
-                end do
-            else  !< bc_z%end
-                do i = 1, num_dims + 1
-                    do j = 1, buff_size
-                        c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, p)
-                    end do
-                end do
-            end if
-        end if
-
-    end subroutine s_color_function_ghost_cell_extrapolation
-
-    !> Populate ghost cell buffers for the Jacobian scalar field used in the IGR elliptic solver.
-    impure subroutine s_populate_F_igr_buffers(bc_type, jac_sf)
-
-        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
-        type(scalar_field), dimension(1:), intent(inout)           :: jac_sf
-        integer                                                    :: j, k, l
-
-        if (bc_x%beg >= 0) then
-            call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1)
-        else
-            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-            do l = 0, p
-                do k = 0, n
-                    select case (bc_type(1, 1)%sf(0, k, l))
-                    case (BC_PERIODIC)
-                        do j = 1, buff_size
-                            jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l)
-                        end do
-                    case (BC_REFLECTIVE)
-                        do j = 1, buff_size
-                            jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l)
-                        end do
-                    case default
-                        do j = 1, buff_size
-                            jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l)
-                        end do
-                    end select
-                end do
-            end do
-            $:END_GPU_PARALLEL_LOOP()
-        end if
-
-        if (bc_x%end >= 0) then
-            call s_mpi_sendrecv_variables_buffers(jac_sf, 1, 1, 1)
-        else
-            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-            do l = 0, p
-                do k = 0, n
-                    select case (bc_type(1, 2)%sf(0, k, l))
-                    case (BC_PERIODIC)
-                        do j = 1, buff_size
-                            jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l)
-                        end do
-                    case (BC_REFLECTIVE)
-                        do j = 1, buff_size
-                            jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l)
-                        end do
-                    case default
-                        do j = 1, buff_size
-                            jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l)
-                        end do
-                    end select
-                end do
-            end do
-            $:END_GPU_PARALLEL_LOOP()
-        end if
-
-        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
-            if (n == 0) then
-                return
-            else if (bc_y%beg >= 0) then
-                call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = 0, p
-                    do k = idwbuff(1)%beg, idwbuff(1)%end
-                        select case (bc_type(2, 1)%sf(k, 0, l))
-                        case (BC_PERIODIC)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l)
-                            end do
-                        case (BC_REFLECTIVE)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l)
-                            end do
-                        case default
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l)
-                            end do
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-
-            if (bc_y%end >= 0) then
-                call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = 0, p
-                    do k = idwbuff(1)%beg, idwbuff(1)%end
-                        select case (bc_type(2, 2)%sf(k, 0, l))
-                        case (BC_PERIODIC)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l)
-                            end do
-                        case (BC_REFLECTIVE)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l)
-                            end do
-                        case default
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l)
-                            end do
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-        #:endif
-
-        #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
-            if (p == 0) then
-                return
-            else if (bc_z%beg >= 0) then
-                call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = idwbuff(2)%beg, idwbuff(2)%end
-                    do k = idwbuff(1)%beg, idwbuff(1)%end
-                        select case (bc_type(3, 1)%sf(k, l, 0))
-                        case (BC_PERIODIC)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1)
-                            end do
-                        case (BC_REFLECTIVE)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1)
-                            end do
-                        case default
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0)
-                            end do
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-
-            if (bc_z%end >= 0) then
-                call s_mpi_sendrecv_variables_buffers(jac_sf, 3, 1, 1)
-            else
-                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
-                do l = idwbuff(2)%beg, idwbuff(2)%end
-                    do k = idwbuff(1)%beg, idwbuff(1)%end
-                        select case (bc_type(3, 2)%sf(k, l, 0))
-                        case (BC_PERIODIC)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1)
-                            end do
-                        case (BC_REFLECTIVE)
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1))
-                            end do
-                        case default
-                            do j = 1, buff_size
-                                jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p)
-                            end do
-                        end select
-                    end do
-                end do
-                $:END_GPU_PARALLEL_LOOP()
-            end if
-        #:endif
-
-    end subroutine s_populate_F_igr_buffers
-
-    !> Create MPI derived datatypes for boundary condition type arrays and buffer arrays used in parallel I/O.
-    impure subroutine s_create_mpi_types(bc_type)
-
-        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
-
-#ifdef MFC_MPI
-        integer               :: dir, loc
-        integer, dimension(3) :: sf_start_idx, sf_extents_loc
-        integer               :: ierr
-
-        do dir = 1, num_dims
-            do loc = 1, 2
-                sf_start_idx = (/0, 0, 0/)
-                sf_extents_loc = shape(bc_type(dir, loc)%sf)
-
-                call MPI_TYPE_CREATE_SUBARRAY(num_dims, sf_extents_loc, sf_extents_loc, sf_start_idx, MPI_ORDER_FORTRAN, &
-                                              & MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), ierr)
-                call MPI_TYPE_COMMIT(MPI_BC_TYPE_TYPE(dir, loc), ierr)
-            end do
-        end do
-
-        do dir = 1, num_dims
-            do loc = 1, 2
-                sf_start_idx = (/0, 0, 0/)
-                sf_extents_loc = shape(bc_buffers(dir, loc)%sf)
-
-                call MPI_TYPE_CREATE_SUBARRAY(num_dims, sf_extents_loc*mpi_io_type, sf_extents_loc*mpi_io_type, sf_start_idx, &
-                                              & MPI_ORDER_FORTRAN, mpi_io_p, MPI_BC_BUFFER_TYPE(dir, loc), ierr)
-                call MPI_TYPE_COMMIT(MPI_BC_BUFFER_TYPE(dir, loc), ierr)
-            end do
-        end do
-#endif
-
-    end subroutine s_create_mpi_types
-
-    !> Write boundary condition type and buffer data to serial (unformatted) restart files.
-    subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath, old_grid_in, q_T_sf)
-
-        type(scalar_field), dimension(sys_size), intent(in)        :: q_prim_vf
-        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
-        logical, intent(in)                                        :: old_grid_in
-        character(LEN=*), intent(in)                               :: step_dirpath
-        integer                                                    :: dir, loc
-        character(len=path_len)                                    :: file_path
-        character(len=10)                                          :: status
-        type(scalar_field), optional, intent(in)                   :: q_T_sf
-
-        if (old_grid_in) then
-            status = 'old'
-        else
-            status = 'new'
-        end if
-
-        call s_pack_boundary_condition_buffers(q_prim_vf, q_T_sf)
-
-        file_path = trim(step_dirpath) // '/bc_type.dat'
-        open (1, FILE=trim(file_path), form='unformatted', STATUS=status)
-        do dir = 1, num_dims
-            do loc = 1, 2
-                write (1) bc_type(dir, loc)%sf
-            end do
-        end do
-        close (1)
-
-        file_path = trim(step_dirpath) // '/bc_buffers.dat'
-        open (1, FILE=trim(file_path), form='unformatted', STATUS=status)
-        do dir = 1, num_dims
-            do loc = 1, 2
-                write (1) bc_buffers(dir, loc)%sf
-            end do
-        end do
-        close (1)
-
-    end subroutine s_write_serial_boundary_condition_files
-
-    !> Write boundary condition type and buffer data to per-rank parallel files using MPI I/O.
-    subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type, q_T_sf)
-
-        type(scalar_field), dimension(sys_size), intent(in)        :: q_prim_vf
-        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
-        integer                                                    :: dir, loc
-        character(len=path_len)                                    :: file_loc, file_path
-        type(scalar_field), intent(in), optional                   :: q_T_sf
-
-#ifdef MFC_MPI
-        integer          :: ierr
-        integer          :: file_id
-        character(len=7) :: proc_rank_str
-        logical          :: dir_check
-        integer          :: nelements
-
-        call s_pack_boundary_condition_buffers(q_prim_vf, q_T_sf)
-
-        file_loc = trim(case_dir) // '/restart_data/boundary_conditions'
-        if (proc_rank == 0) then
-            call my_inquire(file_loc, dir_check)
-            if (dir_check .neqv. .true.) then
-                call s_create_directory(trim(file_loc))
-            end if
-        end if
-
-        call s_create_mpi_types(bc_type)
-
-        call s_mpi_barrier()
-
-        call DelayFileAccess(proc_rank)
-
-        write (proc_rank_str, '(I7.7)') proc_rank
-        file_path = trim(file_loc) // '/bc_' // trim(proc_rank_str) // '.dat'
-        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr)
-
-        ! Write bc_types
-        do dir = 1, num_dims
-            do loc = 1, 2
-#ifdef MFC_MIXED_PRECISION
-                nelements = sizeof(bc_type(dir, loc)%sf)
-                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_BYTE, MPI_STATUS_IGNORE, ierr)
-#else
-                nelements = sizeof(bc_type(dir, loc)%sf)/4
-                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
-#endif
-            end do
-        end do
-
-        ! Write bc_buffers
-        do dir = 1, num_dims
-            do loc = 1, 2
-                nelements = sizeof(bc_buffers(dir, loc)%sf)*mpi_io_type/stp
-                call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, nelements, mpi_io_p, MPI_STATUS_IGNORE, ierr)
-            end do
-        end do
-
-        call MPI_File_close(file_id, ierr)
-#endif
-
-    end subroutine s_write_parallel_boundary_condition_files
-
-    !> Read boundary condition type and buffer data from serial (unformatted) restart files.
-    subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type)
-
-        character(LEN=*), intent(in)                                  :: step_dirpath
-        type(integer_field), dimension(1:num_dims,1:2), intent(inout) :: bc_type
-        integer                                                       :: dir, loc
-        logical                                                       :: file_exist
-        character(len=path_len)                                       :: file_path
-
-        ! Read bc_types
-
-        file_path = trim(step_dirpath) // '/bc_type.dat'
-        inquire (FILE=trim(file_path), EXIST=file_exist)
-        if (.not. file_exist) then
-            call s_mpi_abort(trim(file_path) // ' is missing. Exiting.')
-        end if
-
-        open (1, FILE=trim(file_path), form='unformatted', STATUS='unknown')
-        do dir = 1, num_dims
-            do loc = 1, 2
-                read (1) bc_type(dir, loc)%sf
-                $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]')
-            end do
-        end do
-        close (1)
-
-        ! Read bc_buffers
-        file_path = trim(step_dirpath) // '/bc_buffers.dat'
-        inquire (FILE=trim(file_path), EXIST=file_exist)
-        if (.not. file_exist) then
-            call s_mpi_abort(trim(file_path) // ' is missing. Exiting.')
-        end if
-
-        open (1, FILE=trim(file_path), form='unformatted', STATUS='unknown')
-        do dir = 1, num_dims
-            do loc = 1, 2
-                read (1) bc_buffers(dir, loc)%sf
-                $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]')
-            end do
-        end do
-        close (1)
-
-    end subroutine s_read_serial_boundary_condition_files
-
-    !> Read boundary condition type and buffer data from per-rank parallel files using MPI I/O.
-    subroutine s_read_parallel_boundary_condition_files(bc_type)
-
-        type(integer_field), dimension(1:num_dims,1:2), intent(inout) :: bc_type
-        integer                                                       :: dir, loc
-        character(len=path_len)                                       :: file_loc, file_path
-
-#ifdef MFC_MPI
-        integer          :: ierr
-        integer          :: file_id
-        character(len=7) :: proc_rank_str
-        logical          :: dir_check
-        integer          :: nelements
-
-        file_loc = trim(case_dir) // '/restart_data/boundary_conditions'
-
-        if (proc_rank == 0) then
-            call my_inquire(file_loc, dir_check)
-            if (dir_check .neqv. .true.) then
-                call s_mpi_abort(trim(file_loc) // ' is missing. Exiting.')
-            end if
-        end if
-
-        call s_create_mpi_types(bc_type)
-
-        call s_mpi_barrier()
-
-        call DelayFileAccess(proc_rank)
-
-        write (proc_rank_str, '(I7.7)') proc_rank
-        file_path = trim(file_loc) // '/bc_' // trim(proc_rank_str) // '.dat'
-        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr)
-
-        ! Read bc_types
-        do dir = 1, num_dims
-            do loc = 1, 2
-#ifdef MFC_MIXED_PRECISION
-                nelements = sizeof(bc_type(dir, loc)%sf)
-                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_BYTE, MPI_STATUS_IGNORE, ierr)
-#else
-                nelements = sizeof(bc_type(dir, loc)%sf)/4
-                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
-#endif
-                $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]')
-            end do
-        end do
-
-        ! Read bc_buffers
-        do dir = 1, num_dims
-            do loc = 1, 2
-                nelements = sizeof(bc_buffers(dir, loc)%sf)*mpi_io_type/stp
-                call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, nelements, mpi_io_p, MPI_STATUS_IGNORE, ierr)
-                $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]')
-            end do
-        end do
-
-        call MPI_File_close(file_id, ierr)
-#endif
-
-    end subroutine s_read_parallel_boundary_condition_files
-
-    !> Pack primitive variable boundary slices into bc_buffers arrays for serialization.
-    subroutine s_pack_boundary_condition_buffers(q_prim_vf, q_T_sf)
-
-        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
-        integer                                             :: i, j, k
-        type(scalar_field), intent(in), optional            :: q_T_sf
-
-        do k = 0, p
-            do j = 0, n
-                do i = 1, sys_size
-                    bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k)
-                    bc_buffers(1, 2)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k)
-                end do
-                if (chemistry .and. present(q_T_sf)) then
-                    bc_buffers(1, 1)%sf(sys_size + 1, j, k) = q_T_sf%sf(0, j, k)
-                    bc_buffers(1, 2)%sf(sys_size + 1, j, k) = q_T_sf%sf(m, j, k)
-                end if
-            end do
-        end do
-
-        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
-            if (n > 0) then
-                do k = 0, p
-                    do j = 1, sys_size
-                        do i = 0, m
-                            bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k)
-                            bc_buffers(2, 2)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k)
-                        end do
-                    end do
-                    if (chemistry .and. present(q_T_sf)) then
-                        do i = 0, m
-                            bc_buffers(2, 1)%sf(i, sys_size + 1, k) = q_T_sf%sf(i, 0, k)
-                            bc_buffers(2, 2)%sf(i, sys_size + 1, k) = q_T_sf%sf(i, n, k)
-                        end do
-                    end if
-                end do
-
-                #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
-                    if (p > 0) then
-                        do k = 1, sys_size
-                            do j = 0, n
-                                do i = 0, m
-                                    bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0)
-                                    bc_buffers(3, 2)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p)
-                                end do
-                            end do
-                        end do
-                        if (chemistry .and. present(q_T_sf)) then
-                            do j = 0, n
-                                do i = 0, m
-                                    bc_buffers(3, 1)%sf(i, j, sys_size + 1) = q_T_sf%sf(i, j, 0)
-                                    bc_buffers(3, 2)%sf(i, j, sys_size + 1) = q_T_sf%sf(i, j, p)
-                                end do
-                            end do
-                        end if
-                    end if
-                #:endif
-            end if
-        #:endif
-
-    end subroutine s_pack_boundary_condition_buffers
-
-    !> Initialize the per-cell boundary condition type arrays with the global default BC values.
-    subroutine s_assign_default_bc_type(bc_type)
-
-        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
-
-        bc_type(1, 1)%sf(:,:,:) = int(min(bc_x%beg, 0), kind=1)
-        bc_type(1, 2)%sf(:,:,:) = int(min(bc_x%end, 0), kind=1)
-        $:GPU_UPDATE(device='[bc_type(1, 1)%sf, bc_type(1, 2)%sf]')
-
-        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
-            if (n > 0) then
-                bc_type(2, 1)%sf(:,:,:) = int(min(bc_y%beg, 0), kind=1)
-                bc_type(2, 2)%sf(:,:,:) = int(min(bc_y%end, 0), kind=1)
-                $:GPU_UPDATE(device='[bc_type(2, 1)%sf, bc_type(2, 2)%sf]')
-                #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
-                    if (p > 0) then
-                        bc_type(3, 1)%sf(:,:,:) = int(min(bc_z%beg, 0), kind=1)
-                        bc_type(3, 2)%sf(:,:,:) = int(min(bc_z%end, 0), kind=1)
-                        $:GPU_UPDATE(device='[bc_type(3, 1)%sf, bc_type(3, 2)%sf]')
-                    end if
-                #:endif
-            end if
-        #:endif
-
-    end subroutine s_assign_default_bc_type
-
-    !> Populate the buffers of the grid variables, which are constituted of the cell-boundary locations and cell-width
-    !! distributions, based on the boundary conditions.
-    subroutine s_populate_grid_variables_buffers
-
-        integer :: i
-
-#ifdef MFC_SIMULATION
-        ! Required for compatibility between codes
-        type(int_bounds_info) :: offset_x, offset_y, offset_z
-
-        offset_x%beg = buff_size; offset_x%end = buff_size
-        offset_y%beg = buff_size; offset_y%end = buff_size
-        offset_z%beg = buff_size; offset_z%end = buff_size
-#endif
-
-#ifndef MFC_PRE_PROCESS
-        ! Population of Buffers in x-direction
-
-        ! Populating cell-width distribution buffer at bc_x%beg
-        if (bc_x%beg >= 0) then
-            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
-        else if (bc_x%beg <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dx(-i) = dx(0)
-            end do
-        else if (bc_x%beg == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dx(-i) = dx(i - 1)
-            end do
-        else if (bc_x%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dx(-i) = dx(m - (i - 1))
-            end do
-        end if
-
-        ! Computing the cell-boundary and center locations buffer at bc_x%beg
-        do i = 1, offset_x%beg
-            x_cb(-1 - i) = x_cb(-i) - dx(-i)
-        end do
-
-        do i = 1, buff_size
-            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer at bc_x%end
-        if (bc_x%end >= 0) then
-            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
-        else if (bc_x%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dx(m + i) = dx(m)
-            end do
-        else if (bc_x%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dx(m + i) = dx(m - (i - 1))
-            end do
-        else if (bc_x%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dx(m + i) = dx(i - 1)
-            end do
-        end if
-
-        ! Populating the cell-boundary and center locations buffer at bc_x%end
-        do i = 1, offset_x%end
-            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
-        end do
-
-        do i = 1, buff_size
-            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
-        end do
-
-        ! Population of Buffers in y-direction
-
-        ! Populating cell-width distribution buffer at bc_y%beg
-        if (n == 0) then
-            return
-        else if (bc_y%beg >= 0) then
-            call s_mpi_sendrecv_grid_variables_buffers(2, -1)
-        else if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
-            do i = 1, buff_size
-                dy(-i) = dy(0)
-            end do
-        else if (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
-            do i = 1, buff_size
-                dy(-i) = dy(i - 1)
-            end do
-        else if (bc_y%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dy(-i) = dy(n - (i - 1))
-            end do
-        end if
-
-        ! Computing the cell-boundary and center locations buffer at bc_y%beg
-        do i = 1, offset_y%beg
-            y_cb(-1 - i) = y_cb(-i) - dy(-i)
-        end do
-
-        do i = 1, buff_size
-            y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer at bc_y%end
-        if (bc_y%end >= 0) then
-            call s_mpi_sendrecv_grid_variables_buffers(2, 1)
-        else if (bc_y%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dy(n + i) = dy(n)
-            end do
-        else if (bc_y%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dy(n + i) = dy(n - (i - 1))
-            end do
-        else if (bc_y%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dy(n + i) = dy(i - 1)
-            end do
-        end if
-
-        ! Populating the cell-boundary and center locations buffer at bc_y%end
-        do i = 1, offset_y%end
-            y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
-        end do
-
-        do i = 1, buff_size
-            y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
-        end do
-
-        ! Population of Buffers in z-direction
-
-        ! Populating cell-width distribution buffer at bc_z%beg
-        if (p == 0) then
-            return
-        else if (Bc_z%beg >= 0) then
-            call s_mpi_sendrecv_grid_variables_buffers(3, -1)
-        else if (bc_z%beg <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dz(-i) = dz(0)
-            end do
-        else if (bc_z%beg == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dz(-i) = dz(i - 1)
-            end do
-        else if (bc_z%beg == BC_PERIODIC) then
-            do i = 1, buff_size
-                dz(-i) = dz(p - (i - 1))
-            end do
-        end if
-
-        ! Computing the cell-boundary and center locations buffer at bc_z%beg
-        do i = 1, offset_z%beg
-            z_cb(-1 - i) = z_cb(-i) - dz(-i)
-        end do
-
-        do i = 1, buff_size
-            z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
-        end do
-
-        ! Populating the cell-width distribution buffer at bc_z%end
-        if (bc_z%end >= 0) then
-            call s_mpi_sendrecv_grid_variables_buffers(3, 1)
-        else if (bc_z%end <= BC_GHOST_EXTRAP) then
-            do i = 1, buff_size
-                dz(p + i) = dz(p)
-            end do
-        else if (bc_z%end == BC_REFLECTIVE) then
-            do i = 1, buff_size
-                dz(p + i) = dz(p - (i - 1))
-            end do
-        else if (bc_z%end == BC_PERIODIC) then
-            do i = 1, buff_size
-                dz(p + i) = dz(i - 1)
-            end do
-        end if
-
-        ! Populating the cell-boundary and center locations buffer at bc_z%end
-        do i = 1, offset_z%end
-            z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
-        end do
-
-        do i = 1, buff_size
-            z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
-        end do
-#endif
-
-    end subroutine s_populate_grid_variables_buffers
-
     !> Deallocate boundary condition buffer arrays allocated during module initialization.
     subroutine s_finalize_boundary_common_module()
 
diff --git a/src/common/m_boundary_io.fpp b/src/common/m_boundary_io.fpp
new file mode 100644
index 0000000000..c7d0f9ffef
--- /dev/null
+++ b/src/common/m_boundary_io.fpp
@@ -0,0 +1,1035 @@
+!>
+!! @file
+!! @brief Contains module m_boundary_io
+
+!> @brief Boundary condition restart I/O, capillary/IGR buffer population, and grid-variable buffers
+#:include 'case.fpp'
+#:include 'macros.fpp'
+
+module m_boundary_io
+
+    use m_derived_types
+    use m_global_parameters
+    use m_mpi_proxy
+    use m_constants
+    use m_delay_file_access
+    use m_compile_specific
+    use m_boundary_primitives
+
+    implicit none
+
+#ifdef MFC_MPI
+    integer, dimension(1:3,1:2) :: MPI_BC_TYPE_TYPE
+    integer, dimension(1:3,1:2) :: MPI_BC_BUFFER_TYPE
+#endif
+
+contains
+
+    !> Populate ghost cell buffers for the color function and its divergence used in capillary surface tension.
+    impure subroutine s_populate_capillary_buffers(c_divs, bc_type, bc)
+
+        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
+        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
+        type(bc_xyz_info), intent(in)                              :: bc
+        integer                                                    :: k, l
+
+        !> x-direction
+
+        if (bc%x%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, -1, num_dims + 1)
+        else
+            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+            do l = 0, p
+                do k = 0, n
+                    select case (bc_type(1, 1)%sf(0, k, l))
+                    case (BC_PERIODIC)
+                        call s_color_function_periodic(c_divs, 1, -1, k, l)
+                    case (BC_REFLECTIVE)
+                        call s_color_function_reflective(c_divs, 1, -1, k, l)
+                    case default
+                        call s_color_function_ghost_cell_extrapolation(c_divs, 1, -1, k, l)
+                    end select
+                end do
+            end do
+            $:END_GPU_PARALLEL_LOOP()
+        end if
+
+        if (bc%x%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(c_divs, 1, 1, num_dims + 1)
+        else
+            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+            do l = 0, p
+                do k = 0, n
+                    select case (bc_type(1, 2)%sf(0, k, l))
+                    case (BC_PERIODIC)
+                        call s_color_function_periodic(c_divs, 1, 1, k, l)
+                    case (BC_REFLECTIVE)
+                        call s_color_function_reflective(c_divs, 1, 1, k, l)
+                    case default
+                        call s_color_function_ghost_cell_extrapolation(c_divs, 1, 1, k, l)
+                    end select
+                end do
+            end do
+            $:END_GPU_PARALLEL_LOOP()
+        end if
+
+        if (n == 0) return
+
+        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
+            !> y-direction
+            if (bc%y%beg >= 0) then
+                call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = 0, p
+                    do k = -buff_size, m + buff_size
+                        select case (bc_type(2, 1)%sf(k, 0, l))
+                        case (BC_PERIODIC)
+                            call s_color_function_periodic(c_divs, 2, -1, k, l)
+                        case (BC_REFLECTIVE)
+                            call s_color_function_reflective(c_divs, 2, -1, k, l)
+                        case default
+                            call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l)
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+
+            if (bc%y%end >= 0) then
+                call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = 0, p
+                    do k = -buff_size, m + buff_size
+                        select case (bc_type(2, 2)%sf(k, 0, l))
+                        case (BC_PERIODIC)
+                            call s_color_function_periodic(c_divs, 2, 1, k, l)
+                        case (BC_REFLECTIVE)
+                            call s_color_function_reflective(c_divs, 2, 1, k, l)
+                        case default
+                            call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l)
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+        #:endif
+
+        if (p == 0) return
+
+        #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
+            !> z-direction
+            if (bc%z%beg >= 0) then
+                call s_mpi_sendrecv_variables_buffers(c_divs, 3, -1, num_dims + 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = -buff_size, n + buff_size
+                    do k = -buff_size, m + buff_size
+                        select case (bc_type(3, 1)%sf(k, l, 0))
+                        case (BC_PERIODIC)
+                            call s_color_function_periodic(c_divs, 3, -1, k, l)
+                        case (BC_REFLECTIVE)
+                            call s_color_function_reflective(c_divs, 3, -1, k, l)
+                        case default
+                            call s_color_function_ghost_cell_extrapolation(c_divs, 3, -1, k, l)
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+
+            if (bc%z%end >= 0) then
+                call s_mpi_sendrecv_variables_buffers(c_divs, 3, 1, num_dims + 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = -buff_size, n + buff_size
+                    do k = -buff_size, m + buff_size
+                        select case (bc_type(3, 2)%sf(k, l, 0))
+                        case (BC_PERIODIC)
+                            call s_color_function_periodic(c_divs, 3, 1, k, l)
+                        case (BC_REFLECTIVE)
+                            call s_color_function_reflective(c_divs, 3, 1, k, l)
+                        case default
+                            call s_color_function_ghost_cell_extrapolation(c_divs, 3, 1, k, l)
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+        #:endif
+
+    end subroutine s_populate_capillary_buffers
+
+    !> Apply periodic boundary conditions to the color function and its divergence fields.
+    subroutine s_color_function_periodic(c_divs, bc_dir, bc_loc, k, l)
+
+        $:GPU_ROUTINE(function_name='s_color_function_periodic', parallelism='[seq]', cray_inline=True)
+        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
+        integer, intent(in)                                        :: bc_dir, bc_loc
+        integer, intent(in)                                        :: k, l
+        integer                                                    :: j, i
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  ! bc_x%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(m - (j - 1), k, l)
+                    end do
+                end do
+            else  !< bc_x%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(j - 1, k, l)
+                    end do
+                end do
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, n - (j - 1), l)
+                    end do
+                end do
+            else  !< bc_y%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, j - 1, l)
+                    end do
+                end do
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, p - (j - 1))
+                    end do
+                end do
+            else  !< bc_z%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, j - 1)
+                    end do
+                end do
+            end if
+        end if
+
+    end subroutine s_color_function_periodic
+
+    !> Apply reflective boundary conditions to the color function and its divergence fields.
+    subroutine s_color_function_reflective(c_divs, bc_dir, bc_loc, k, l)
+
+        $:GPU_ROUTINE(function_name='s_color_function_reflective', parallelism='[seq]', cray_inline=True)
+        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
+        integer, intent(in)                                        :: bc_dir, bc_loc
+        integer, intent(in)                                        :: k, l
+        integer                                                    :: j, i
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  ! bc_x%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        if (i == bc_dir) then
+                            c_divs(i)%sf(-j, k, l) = -c_divs(i)%sf(j - 1, k, l)
+                        else
+                            c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(j - 1, k, l)
+                        end if
+                    end do
+                end do
+            else  !< bc_x%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        if (i == bc_dir) then
+                            c_divs(i)%sf(m + j, k, l) = -c_divs(i)%sf(m - (j - 1), k, l)
+                        else
+                            c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(m - (j - 1), k, l)
+                        end if
+                    end do
+                end do
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        if (i == bc_dir) then
+                            c_divs(i)%sf(k, -j, l) = -c_divs(i)%sf(k, j - 1, l)
+                        else
+                            c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, j - 1, l)
+                        end if
+                    end do
+                end do
+            else  !< bc_y%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        if (i == bc_dir) then
+                            c_divs(i)%sf(k, n + j, l) = -c_divs(i)%sf(k, n - (j - 1), l)
+                        else
+                            c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, n - (j - 1), l)
+                        end if
+                    end do
+                end do
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        if (i == bc_dir) then
+                            c_divs(i)%sf(k, l, -j) = -c_divs(i)%sf(k, l, j - 1)
+                        else
+                            c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, j - 1)
+                        end if
+                    end do
+                end do
+            else  !< bc_z%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        if (i == bc_dir) then
+                            c_divs(i)%sf(k, l, p + j) = -c_divs(i)%sf(k, l, p - (j - 1))
+                        else
+                            c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, p - (j - 1))
+                        end if
+                    end do
+                end do
+            end if
+        end if
+
+    end subroutine s_color_function_reflective
+
+    !> Extrapolate the color function and its divergence into ghost cells by copying boundary values.
+    subroutine s_color_function_ghost_cell_extrapolation(c_divs, bc_dir, bc_loc, k, l)
+
+        $:GPU_ROUTINE(function_name='s_color_function_ghost_cell_extrapolation', parallelism='[seq]', cray_inline=True)
+        type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs
+        integer, intent(in)                                        :: bc_dir, bc_loc
+        integer, intent(in)                                        :: k, l
+        integer                                                    :: j, i
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  ! bc_x%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(-j, k, l) = c_divs(i)%sf(0, k, l)
+                    end do
+                end do
+            else  !< bc_x%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(m + j, k, l) = c_divs(i)%sf(m, k, l)
+                    end do
+                end do
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, -j, l) = c_divs(i)%sf(k, 0, l)
+                    end do
+                end do
+            else  !< bc_y%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, n + j, l) = c_divs(i)%sf(k, n, l)
+                    end do
+                end do
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, l, -j) = c_divs(i)%sf(k, l, 0)
+                    end do
+                end do
+            else  !< bc_z%end
+                do i = 1, num_dims + 1
+                    do j = 1, buff_size
+                        c_divs(i)%sf(k, l, p + j) = c_divs(i)%sf(k, l, p)
+                    end do
+                end do
+            end if
+        end if
+
+    end subroutine s_color_function_ghost_cell_extrapolation
+
+    !> Populate ghost cell buffers for the Jacobian scalar field used in the IGR elliptic solver.
+    impure subroutine s_populate_F_igr_buffers(bc_type, jac_sf)
+
+        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
+        type(scalar_field), dimension(1:), intent(inout)           :: jac_sf
+        integer                                                    :: j, k, l
+
+        if (bc_x%beg >= 0) then
+            call s_mpi_sendrecv_variables_buffers(jac_sf, 1, -1, 1)
+        else
+            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+            do l = 0, p
+                do k = 0, n
+                    select case (bc_type(1, 1)%sf(0, k, l))
+                    case (BC_PERIODIC)
+                        do j = 1, buff_size
+                            jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(m - j + 1, k, l)
+                        end do
+                    case (BC_REFLECTIVE)
+                        do j = 1, buff_size
+                            jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(j - 1, k, l)
+                        end do
+                    case default
+                        do j = 1, buff_size
+                            jac_sf(1)%sf(-j, k, l) = jac_sf(1)%sf(0, k, l)
+                        end do
+                    end select
+                end do
+            end do
+            $:END_GPU_PARALLEL_LOOP()
+        end if
+
+        if (bc_x%end >= 0) then
+            call s_mpi_sendrecv_variables_buffers(jac_sf, 1, 1, 1)
+        else
+            $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+            do l = 0, p
+                do k = 0, n
+                    select case (bc_type(1, 2)%sf(0, k, l))
+                    case (BC_PERIODIC)
+                        do j = 1, buff_size
+                            jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(j - 1, k, l)
+                        end do
+                    case (BC_REFLECTIVE)
+                        do j = 1, buff_size
+                            jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m - (j - 1), k, l)
+                        end do
+                    case default
+                        do j = 1, buff_size
+                            jac_sf(1)%sf(m + j, k, l) = jac_sf(1)%sf(m, k, l)
+                        end do
+                    end select
+                end do
+            end do
+            $:END_GPU_PARALLEL_LOOP()
+        end if
+
+        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
+            if (n == 0) then
+                return
+            else if (bc_y%beg >= 0) then
+                call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = 0, p
+                    do k = idwbuff(1)%beg, idwbuff(1)%end
+                        select case (bc_type(2, 1)%sf(k, 0, l))
+                        case (BC_PERIODIC)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l)
+                            end do
+                        case (BC_REFLECTIVE)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l)
+                            end do
+                        case default
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l)
+                            end do
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+
+            if (bc_y%end >= 0) then
+                call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = 0, p
+                    do k = idwbuff(1)%beg, idwbuff(1)%end
+                        select case (bc_type(2, 2)%sf(k, 0, l))
+                        case (BC_PERIODIC)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l)
+                            end do
+                        case (BC_REFLECTIVE)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l)
+                            end do
+                        case default
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l)
+                            end do
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+        #:endif
+
+        #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
+            if (p == 0) then
+                return
+            else if (bc_z%beg >= 0) then
+                call s_mpi_sendrecv_variables_buffers(jac_sf, 3, -1, 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = idwbuff(2)%beg, idwbuff(2)%end
+                    do k = idwbuff(1)%beg, idwbuff(1)%end
+                        select case (bc_type(3, 1)%sf(k, l, 0))
+                        case (BC_PERIODIC)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, p - j + 1)
+                            end do
+                        case (BC_REFLECTIVE)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, j - 1)
+                            end do
+                        case default
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, l, -j) = jac_sf(1)%sf(k, l, 0)
+                            end do
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+
+            if (bc_z%end >= 0) then
+                call s_mpi_sendrecv_variables_buffers(jac_sf, 3, 1, 1)
+            else
+                $:GPU_PARALLEL_LOOP(private='[l, k]', collapse=2)
+                do l = idwbuff(2)%beg, idwbuff(2)%end
+                    do k = idwbuff(1)%beg, idwbuff(1)%end
+                        select case (bc_type(3, 2)%sf(k, l, 0))
+                        case (BC_PERIODIC)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, j - 1)
+                            end do
+                        case (BC_REFLECTIVE)
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p - (j - 1))
+                            end do
+                        case default
+                            do j = 1, buff_size
+                                jac_sf(1)%sf(k, l, p + j) = jac_sf(1)%sf(k, l, p)
+                            end do
+                        end select
+                    end do
+                end do
+                $:END_GPU_PARALLEL_LOOP()
+            end if
+        #:endif
+
+    end subroutine s_populate_F_igr_buffers
+
+    !> Create MPI derived datatypes for boundary condition type arrays and buffer arrays used in parallel I/O.
+    impure subroutine s_create_mpi_types(bc_type)
+
+        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
+
+#ifdef MFC_MPI
+        integer               :: dir, loc
+        integer, dimension(3) :: sf_start_idx, sf_extents_loc
+        integer               :: ierr
+
+        do dir = 1, num_dims
+            do loc = 1, 2
+                sf_start_idx = (/0, 0, 0/)
+                sf_extents_loc = shape(bc_type(dir, loc)%sf)
+
+                call MPI_TYPE_CREATE_SUBARRAY(num_dims, sf_extents_loc, sf_extents_loc, sf_start_idx, MPI_ORDER_FORTRAN, &
+                                              & MPI_INTEGER, MPI_BC_TYPE_TYPE(dir, loc), ierr)
+                call MPI_TYPE_COMMIT(MPI_BC_TYPE_TYPE(dir, loc), ierr)
+            end do
+        end do
+
+        do dir = 1, num_dims
+            do loc = 1, 2
+                sf_start_idx = (/0, 0, 0/)
+                sf_extents_loc = shape(bc_buffers(dir, loc)%sf)
+
+                call MPI_TYPE_CREATE_SUBARRAY(num_dims, sf_extents_loc*mpi_io_type, sf_extents_loc*mpi_io_type, sf_start_idx, &
+                                              & MPI_ORDER_FORTRAN, mpi_io_p, MPI_BC_BUFFER_TYPE(dir, loc), ierr)
+                call MPI_TYPE_COMMIT(MPI_BC_BUFFER_TYPE(dir, loc), ierr)
+            end do
+        end do
+#endif
+
+    end subroutine s_create_mpi_types
+
+    !> Write boundary condition type and buffer data to serial (unformatted) restart files.
+    subroutine s_write_serial_boundary_condition_files(q_prim_vf, bc_type, step_dirpath, old_grid_in, q_T_sf)
+
+        type(scalar_field), dimension(sys_size), intent(in)        :: q_prim_vf
+        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
+        logical, intent(in)                                        :: old_grid_in
+        character(LEN=*), intent(in)                               :: step_dirpath
+        integer                                                    :: dir, loc
+        character(len=path_len)                                    :: file_path
+        character(len=10)                                          :: status
+        type(scalar_field), optional, intent(in)                   :: q_T_sf
+
+        if (old_grid_in) then
+            status = 'old'
+        else
+            status = 'new'
+        end if
+
+        call s_pack_boundary_condition_buffers(q_prim_vf, q_T_sf)
+
+        file_path = trim(step_dirpath) // '/bc_type.dat'
+        open (1, FILE=trim(file_path), form='unformatted', STATUS=status)
+        do dir = 1, num_dims
+            do loc = 1, 2
+                write (1) bc_type(dir, loc)%sf
+            end do
+        end do
+        close (1)
+
+        file_path = trim(step_dirpath) // '/bc_buffers.dat'
+        open (1, FILE=trim(file_path), form='unformatted', STATUS=status)
+        do dir = 1, num_dims
+            do loc = 1, 2
+                write (1) bc_buffers(dir, loc)%sf
+            end do
+        end do
+        close (1)
+
+    end subroutine s_write_serial_boundary_condition_files
+
+    !> Write boundary condition type and buffer data to per-rank parallel files using MPI I/O.
+    subroutine s_write_parallel_boundary_condition_files(q_prim_vf, bc_type, q_T_sf)
+
+        type(scalar_field), dimension(sys_size), intent(in)        :: q_prim_vf
+        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
+        integer                                                    :: dir, loc
+        character(len=path_len)                                    :: file_loc, file_path
+        type(scalar_field), intent(in), optional                   :: q_T_sf
+
+#ifdef MFC_MPI
+        integer          :: ierr
+        integer          :: file_id
+        character(len=7) :: proc_rank_str
+        logical          :: dir_check
+        integer          :: nelements
+
+        call s_pack_boundary_condition_buffers(q_prim_vf, q_T_sf)
+
+        file_loc = trim(case_dir) // '/restart_data/boundary_conditions'
+        if (proc_rank == 0) then
+            call my_inquire(file_loc, dir_check)
+            if (dir_check .neqv. .true.) then
+                call s_create_directory(trim(file_loc))
+            end if
+        end if
+
+        call s_create_mpi_types(bc_type)
+
+        call s_mpi_barrier()
+
+        call DelayFileAccess(proc_rank)
+
+        write (proc_rank_str, '(I7.7)') proc_rank
+        file_path = trim(file_loc) // '/bc_' // trim(proc_rank_str) // '.dat'
+        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_CREATE + MPI_MODE_WRONLY, MPI_INFO_NULL, file_id, ierr)
+
+        ! Write bc_types
+        do dir = 1, num_dims
+            do loc = 1, 2
+#ifdef MFC_MIXED_PRECISION
+                nelements = sizeof(bc_type(dir, loc)%sf)
+                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_BYTE, MPI_STATUS_IGNORE, ierr)
+#else
+                nelements = sizeof(bc_type(dir, loc)%sf)/4
+                call MPI_File_write_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+#endif
+            end do
+        end do
+
+        ! Write bc_buffers
+        do dir = 1, num_dims
+            do loc = 1, 2
+                nelements = sizeof(bc_buffers(dir, loc)%sf)*mpi_io_type/stp
+                call MPI_File_write_all(file_id, bc_buffers(dir, loc)%sf, nelements, mpi_io_p, MPI_STATUS_IGNORE, ierr)
+            end do
+        end do
+
+        call MPI_File_close(file_id, ierr)
+#endif
+
+    end subroutine s_write_parallel_boundary_condition_files
+
+    !> Read boundary condition type and buffer data from serial (unformatted) restart files.
+    subroutine s_read_serial_boundary_condition_files(step_dirpath, bc_type)
+
+        character(LEN=*), intent(in)                                  :: step_dirpath
+        type(integer_field), dimension(1:num_dims,1:2), intent(inout) :: bc_type
+        integer                                                       :: dir, loc
+        logical                                                       :: file_exist
+        character(len=path_len)                                       :: file_path
+
+        ! Read bc_types
+
+        file_path = trim(step_dirpath) // '/bc_type.dat'
+        inquire (FILE=trim(file_path), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort(trim(file_path) // ' is missing. Exiting.')
+        end if
+
+        open (1, FILE=trim(file_path), form='unformatted', STATUS='unknown')
+        do dir = 1, num_dims
+            do loc = 1, 2
+                read (1) bc_type(dir, loc)%sf
+                $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]')
+            end do
+        end do
+        close (1)
+
+        ! Read bc_buffers
+        file_path = trim(step_dirpath) // '/bc_buffers.dat'
+        inquire (FILE=trim(file_path), EXIST=file_exist)
+        if (.not. file_exist) then
+            call s_mpi_abort(trim(file_path) // ' is missing. Exiting.')
+        end if
+
+        open (1, FILE=trim(file_path), form='unformatted', STATUS='unknown')
+        do dir = 1, num_dims
+            do loc = 1, 2
+                read (1) bc_buffers(dir, loc)%sf
+                $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]')
+            end do
+        end do
+        close (1)
+
+    end subroutine s_read_serial_boundary_condition_files
+
+    !> Read boundary condition type and buffer data from per-rank parallel files using MPI I/O.
+    subroutine s_read_parallel_boundary_condition_files(bc_type)
+
+        type(integer_field), dimension(1:num_dims,1:2), intent(inout) :: bc_type
+        integer                                                       :: dir, loc
+        character(len=path_len)                                       :: file_loc, file_path
+
+#ifdef MFC_MPI
+        integer          :: ierr
+        integer          :: file_id
+        character(len=7) :: proc_rank_str
+        logical          :: dir_check
+        integer          :: nelements
+
+        file_loc = trim(case_dir) // '/restart_data/boundary_conditions'
+
+        if (proc_rank == 0) then
+            call my_inquire(file_loc, dir_check)
+            if (dir_check .neqv. .true.) then
+                call s_mpi_abort(trim(file_loc) // ' is missing. Exiting.')
+            end if
+        end if
+
+        call s_create_mpi_types(bc_type)
+
+        call s_mpi_barrier()
+
+        call DelayFileAccess(proc_rank)
+
+        write (proc_rank_str, '(I7.7)') proc_rank
+        file_path = trim(file_loc) // '/bc_' // trim(proc_rank_str) // '.dat'
+        call MPI_File_open(MPI_COMM_SELF, trim(file_path), MPI_MODE_RDONLY, MPI_INFO_NULL, file_id, ierr)
+
+        ! Read bc_types
+        do dir = 1, num_dims
+            do loc = 1, 2
+#ifdef MFC_MIXED_PRECISION
+                nelements = sizeof(bc_type(dir, loc)%sf)
+                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_BYTE, MPI_STATUS_IGNORE, ierr)
+#else
+                nelements = sizeof(bc_type(dir, loc)%sf)/4
+                call MPI_File_read_all(file_id, bc_type(dir, loc)%sf, nelements, MPI_INTEGER, MPI_STATUS_IGNORE, ierr)
+#endif
+                $:GPU_UPDATE(device='[bc_type(dir, loc)%sf]')
+            end do
+        end do
+
+        ! Read bc_buffers
+        do dir = 1, num_dims
+            do loc = 1, 2
+                nelements = sizeof(bc_buffers(dir, loc)%sf)*mpi_io_type/stp
+                call MPI_File_read_all(file_id, bc_buffers(dir, loc)%sf, nelements, mpi_io_p, MPI_STATUS_IGNORE, ierr)
+                $:GPU_UPDATE(device='[bc_buffers(dir, loc)%sf]')
+            end do
+        end do
+
+        call MPI_File_close(file_id, ierr)
+#endif
+
+    end subroutine s_read_parallel_boundary_condition_files
+
+    !> Pack primitive variable boundary slices into bc_buffers arrays for serialization.
+    subroutine s_pack_boundary_condition_buffers(q_prim_vf, q_T_sf)
+
+        type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf
+        integer                                             :: i, j, k
+        type(scalar_field), intent(in), optional            :: q_T_sf
+
+        do k = 0, p
+            do j = 0, n
+                do i = 1, sys_size
+                    bc_buffers(1, 1)%sf(i, j, k) = q_prim_vf(i)%sf(0, j, k)
+                    bc_buffers(1, 2)%sf(i, j, k) = q_prim_vf(i)%sf(m, j, k)
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    bc_buffers(1, 1)%sf(sys_size + 1, j, k) = q_T_sf%sf(0, j, k)
+                    bc_buffers(1, 2)%sf(sys_size + 1, j, k) = q_T_sf%sf(m, j, k)
+                end if
+            end do
+        end do
+
+        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
+            if (n > 0) then
+                do k = 0, p
+                    do j = 1, sys_size
+                        do i = 0, m
+                            bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k)
+                            bc_buffers(2, 2)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k)
+                        end do
+                    end do
+                    if (chemistry .and. present(q_T_sf)) then
+                        do i = 0, m
+                            bc_buffers(2, 1)%sf(i, sys_size + 1, k) = q_T_sf%sf(i, 0, k)
+                            bc_buffers(2, 2)%sf(i, sys_size + 1, k) = q_T_sf%sf(i, n, k)
+                        end do
+                    end if
+                end do
+
+                #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
+                    if (p > 0) then
+                        do k = 1, sys_size
+                            do j = 0, n
+                                do i = 0, m
+                                    bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0)
+                                    bc_buffers(3, 2)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p)
+                                end do
+                            end do
+                        end do
+                        if (chemistry .and. present(q_T_sf)) then
+                            do j = 0, n
+                                do i = 0, m
+                                    bc_buffers(3, 1)%sf(i, j, sys_size + 1) = q_T_sf%sf(i, j, 0)
+                                    bc_buffers(3, 2)%sf(i, j, sys_size + 1) = q_T_sf%sf(i, j, p)
+                                end do
+                            end do
+                        end if
+                    end if
+                #:endif
+            end if
+        #:endif
+
+    end subroutine s_pack_boundary_condition_buffers
+
+    !> Initialize the per-cell boundary condition type arrays with the global default BC values.
+    subroutine s_assign_default_bc_type(bc_type)
+
+        type(integer_field), dimension(1:num_dims,1:2), intent(in) :: bc_type
+
+        bc_type(1, 1)%sf(:,:,:) = int(min(bc_x%beg, 0), kind=1)
+        bc_type(1, 2)%sf(:,:,:) = int(min(bc_x%end, 0), kind=1)
+        $:GPU_UPDATE(device='[bc_type(1, 1)%sf, bc_type(1, 2)%sf]')
+
+        #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
+            if (n > 0) then
+                bc_type(2, 1)%sf(:,:,:) = int(min(bc_y%beg, 0), kind=1)
+                bc_type(2, 2)%sf(:,:,:) = int(min(bc_y%end, 0), kind=1)
+                $:GPU_UPDATE(device='[bc_type(2, 1)%sf, bc_type(2, 2)%sf]')
+                #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
+                    if (p > 0) then
+                        bc_type(3, 1)%sf(:,:,:) = int(min(bc_z%beg, 0), kind=1)
+                        bc_type(3, 2)%sf(:,:,:) = int(min(bc_z%end, 0), kind=1)
+                        $:GPU_UPDATE(device='[bc_type(3, 1)%sf, bc_type(3, 2)%sf]')
+                    end if
+                #:endif
+            end if
+        #:endif
+
+    end subroutine s_assign_default_bc_type
+
+    !> Populate the buffers of the grid variables, which are constituted of the cell-boundary locations and cell-width
+    !! distributions, based on the boundary conditions.
+    subroutine s_populate_grid_variables_buffers
+
+        integer :: i
+
+#ifdef MFC_SIMULATION
+        ! Required for compatibility between codes
+        type(int_bounds_info) :: offset_x, offset_y, offset_z
+
+        offset_x%beg = buff_size; offset_x%end = buff_size
+        offset_y%beg = buff_size; offset_y%end = buff_size
+        offset_z%beg = buff_size; offset_z%end = buff_size
+#endif
+
+#ifndef MFC_PRE_PROCESS
+        ! Population of Buffers in x-direction
+
+        ! Populating cell-width distribution buffer at bc_x%beg
+        if (bc_x%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(1, -1)
+        else if (bc_x%beg <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dx(-i) = dx(0)
+            end do
+        else if (bc_x%beg == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dx(-i) = dx(i - 1)
+            end do
+        else if (bc_x%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dx(-i) = dx(m - (i - 1))
+            end do
+        end if
+
+        ! Computing the cell-boundary and center locations buffer at bc_x%beg
+        do i = 1, offset_x%beg
+            x_cb(-1 - i) = x_cb(-i) - dx(-i)
+        end do
+
+        do i = 1, buff_size
+            x_cc(-i) = x_cc(1 - i) - (dx(1 - i) + dx(-i))/2._wp
+        end do
+
+        ! Populating the cell-width distribution buffer at bc_x%end
+        if (bc_x%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(1, 1)
+        else if (bc_x%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dx(m + i) = dx(m)
+            end do
+        else if (bc_x%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dx(m + i) = dx(m - (i - 1))
+            end do
+        else if (bc_x%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dx(m + i) = dx(i - 1)
+            end do
+        end if
+
+        ! Populating the cell-boundary and center locations buffer at bc_x%end
+        do i = 1, offset_x%end
+            x_cb(m + i) = x_cb(m + (i - 1)) + dx(m + i)
+        end do
+
+        do i = 1, buff_size
+            x_cc(m + i) = x_cc(m + (i - 1)) + (dx(m + (i - 1)) + dx(m + i))/2._wp
+        end do
+
+        ! Population of Buffers in y-direction
+
+        ! Populating cell-width distribution buffer at bc_y%beg
+        if (n == 0) then
+            return
+        else if (bc_y%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(2, -1)
+        else if (bc_y%beg <= BC_GHOST_EXTRAP .and. bc_y%beg /= BC_AXIS) then
+            do i = 1, buff_size
+                dy(-i) = dy(0)
+            end do
+        else if (bc_y%beg == BC_REFLECTIVE .or. bc_y%beg == BC_AXIS) then
+            do i = 1, buff_size
+                dy(-i) = dy(i - 1)
+            end do
+        else if (bc_y%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dy(-i) = dy(n - (i - 1))
+            end do
+        end if
+
+        ! Computing the cell-boundary and center locations buffer at bc_y%beg
+        do i = 1, offset_y%beg
+            y_cb(-1 - i) = y_cb(-i) - dy(-i)
+        end do
+
+        do i = 1, buff_size
+            y_cc(-i) = y_cc(1 - i) - (dy(1 - i) + dy(-i))/2._wp
+        end do
+
+        ! Populating the cell-width distribution buffer at bc_y%end
+        if (bc_y%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(2, 1)
+        else if (bc_y%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dy(n + i) = dy(n)
+            end do
+        else if (bc_y%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dy(n + i) = dy(n - (i - 1))
+            end do
+        else if (bc_y%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dy(n + i) = dy(i - 1)
+            end do
+        end if
+
+        ! Populating the cell-boundary and center locations buffer at bc_y%end
+        do i = 1, offset_y%end
+            y_cb(n + i) = y_cb(n + (i - 1)) + dy(n + i)
+        end do
+
+        do i = 1, buff_size
+            y_cc(n + i) = y_cc(n + (i - 1)) + (dy(n + (i - 1)) + dy(n + i))/2._wp
+        end do
+
+        ! Population of Buffers in z-direction
+
+        ! Populating cell-width distribution buffer at bc_z%beg
+        if (p == 0) then
+            return
+        else if (Bc_z%beg >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(3, -1)
+        else if (bc_z%beg <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dz(-i) = dz(0)
+            end do
+        else if (bc_z%beg == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dz(-i) = dz(i - 1)
+            end do
+        else if (bc_z%beg == BC_PERIODIC) then
+            do i = 1, buff_size
+                dz(-i) = dz(p - (i - 1))
+            end do
+        end if
+
+        ! Computing the cell-boundary and center locations buffer at bc_z%beg
+        do i = 1, offset_z%beg
+            z_cb(-1 - i) = z_cb(-i) - dz(-i)
+        end do
+
+        do i = 1, buff_size
+            z_cc(-i) = z_cc(1 - i) - (dz(1 - i) + dz(-i))/2._wp
+        end do
+
+        ! Populating the cell-width distribution buffer at bc_z%end
+        if (bc_z%end >= 0) then
+            call s_mpi_sendrecv_grid_variables_buffers(3, 1)
+        else if (bc_z%end <= BC_GHOST_EXTRAP) then
+            do i = 1, buff_size
+                dz(p + i) = dz(p)
+            end do
+        else if (bc_z%end == BC_REFLECTIVE) then
+            do i = 1, buff_size
+                dz(p + i) = dz(p - (i - 1))
+            end do
+        else if (bc_z%end == BC_PERIODIC) then
+            do i = 1, buff_size
+                dz(p + i) = dz(i - 1)
+            end do
+        end if
+
+        ! Populating the cell-boundary and center locations buffer at bc_z%end
+        do i = 1, offset_z%end
+            z_cb(p + i) = z_cb(p + (i - 1)) + dz(p + i)
+        end do
+
+        do i = 1, buff_size
+            z_cc(p + i) = z_cc(p + (i - 1)) + (dz(p + (i - 1)) + dz(p + i))/2._wp
+        end do
+#endif
+
+    end subroutine s_populate_grid_variables_buffers
+
+end module m_boundary_io
diff --git a/src/common/m_boundary_primitives.fpp b/src/common/m_boundary_primitives.fpp
new file mode 100644
index 0000000000..41916ac115
--- /dev/null
+++ b/src/common/m_boundary_primitives.fpp
@@ -0,0 +1,1063 @@
+!>
+!! @file
+!! @brief Contains module m_boundary_primitives
+
+!> @brief Per-cell noncharacteristic boundary condition primitives applied in the ghost cells
+#:include 'case.fpp'
+#:include 'macros.fpp'
+
+module m_boundary_primitives
+
+    use m_derived_types
+    use m_global_parameters
+    use m_constants
+
+    implicit none
+
+    type(scalar_field), dimension(:,:), allocatable :: bc_buffers
+    $:GPU_DECLARE(create='[bc_buffers]')
+
+contains
+
+    !> Fill ghost cells by copying the nearest boundary cell value along the specified direction.
+    subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+
+        $:GPU_ROUTINE(function_name='s_ghost_cell_extrapolation', parallelism='[seq]', cray_inline=True)
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+        integer, intent(in)                                    :: bc_dir, bc_loc
+        integer, intent(in)                                    :: k, l
+        integer                                                :: j, i
+        type(scalar_field), optional, intent(inout)            :: q_T_sf
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  ! bc_x%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(0, k, l)
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(-j, k, l) = q_T_sf%sf(0, k, l)
+                    end do
+                end if
+            else  !< bc_x%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m, k, l)
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m, k, l)
+                    end do
+                end if
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, 0, l)
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, 0, l)
+                    end do
+                end if
+            else  !< bc_y%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n, l)
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n, l)
+                    end do
+                end if
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, 0)
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, 0)
+                    end do
+                end if
+            else  !< bc_z%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p)
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p)
+                    end do
+                end if
+            end if
+        end if
+
+    end subroutine s_ghost_cell_extrapolation
+
+    !> Apply reflective (symmetry) boundary conditions by mirroring primitive variables and flipping the normal velocity component.
+    subroutine s_symmetry(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in, q_T_sf)
+
+        $:GPU_ROUTINE(parallelism='[seq]')
+        type(scalar_field), dimension(sys_size), intent(inout)                                               :: q_prim_vf
+        real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
+        integer, intent(in)                                                                                  :: bc_dir, bc_loc
+        integer, intent(in)                                                                                  :: k, l
+        integer                                                                                              :: j, q, i
+        type(scalar_field), optional, intent(inout)                                                          :: q_T_sf
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  !< bc_x%beg
+                do j = 1, buff_size
+                    do i = 1, eqn_idx%cont%end
+                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(j - 1, k, l)
+                    end do
+
+                    q_prim_vf(eqn_idx%mom%beg)%sf(-j, k, l) = -q_prim_vf(eqn_idx%mom%beg)%sf(j - 1, k, l)
+
+                    do i = eqn_idx%mom%beg + 1, sys_size
+                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(j - 1, k, l)
+                    end do
+
+                    if (chemistry .and. present(q_T_sf)) then
+                        q_T_sf%sf(-j, k, l) = q_T_sf%sf(j - 1, k, l)
+                    end if
+
+                    if (elasticity) then
+                        do i = 1, shear_BC_flip_num
+                            q_prim_vf(shear_BC_flip_indices(1, i))%sf(-j, k, l) = -q_prim_vf(shear_BC_flip_indices(1, &
+                                      & i))%sf(j - 1, k, l)
+                        end do
+                    end if
+
+                    if (hyperelasticity) then
+                        q_prim_vf(eqn_idx%xi%beg)%sf(-j, k, l) = -q_prim_vf(eqn_idx%xi%beg)%sf(j - 1, k, l)
+                    end if
+                end do
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(-j, k, l, q, i) = pb_in(j - 1, k, l, q, i)
+                                mv_in(-j, k, l, q, i) = mv_in(j - 1, k, l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            else  !< bc_x%end
+                do j = 1, buff_size
+                    do i = 1, eqn_idx%cont%end
+                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l)
+                    end do
+
+                    q_prim_vf(eqn_idx%mom%beg)%sf(m + j, k, l) = -q_prim_vf(eqn_idx%mom%beg)%sf(m - (j - 1), k, l)
+
+                    do i = eqn_idx%mom%beg + 1, sys_size
+                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l)
+                    end do
+
+                    if (chemistry .and. present(q_T_sf)) then
+                        q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m - (j - 1), k, l)
+                    end if
+
+                    if (elasticity) then
+                        do i = 1, shear_BC_flip_num
+                            q_prim_vf(shear_BC_flip_indices(1, i))%sf(m + j, k, l) = -q_prim_vf(shear_BC_flip_indices(1, &
+                                      & i))%sf(m - (j - 1), k, l)
+                        end do
+                    end if
+
+                    if (hyperelasticity) then
+                        q_prim_vf(eqn_idx%xi%beg)%sf(m + j, k, l) = -q_prim_vf(eqn_idx%xi%beg)%sf(m - (j - 1), k, l)
+                    end if
+                end do
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(m + j, k, l, q, i) = pb_in(m - (j - 1), k, l, q, i)
+                                mv_in(m + j, k, l, q, i) = mv_in(m - (j - 1), k, l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do j = 1, buff_size
+                    do i = 1, eqn_idx%mom%beg
+                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l)
+                    end do
+
+                    q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, j - 1, l)
+
+                    do i = eqn_idx%mom%beg + 2, sys_size
+                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l)
+                    end do
+
+                    if (chemistry .and. present(q_T_sf)) then
+                        q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, j - 1, l)
+                    end if
+
+                    if (elasticity) then
+                        do i = 1, shear_BC_flip_num
+                            q_prim_vf(shear_BC_flip_indices(2, i))%sf(k, -j, l) = -q_prim_vf(shear_BC_flip_indices(2, i))%sf(k, &
+                                      & j - 1, l)
+                        end do
+                    end if
+
+                    if (hyperelasticity) then
+                        q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, j - 1, l)
+                    end if
+                end do
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, -j, l, q, i) = pb_in(k, j - 1, l, q, i)
+                                mv_in(k, -j, l, q, i) = mv_in(k, j - 1, l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            else  !< bc_y%end
+                do j = 1, buff_size
+                    do i = 1, eqn_idx%mom%beg
+                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l)
+                    end do
+
+                    q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, n + j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, n - (j - 1), l)
+
+                    do i = eqn_idx%mom%beg + 2, sys_size
+                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l)
+                    end do
+
+                    if (chemistry .and. present(q_T_sf)) then
+                        q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n - (j - 1), l)
+                    end if
+
+                    if (elasticity) then
+                        do i = 1, shear_BC_flip_num
+                            q_prim_vf(shear_BC_flip_indices(2, i))%sf(k, n + j, l) = -q_prim_vf(shear_BC_flip_indices(2, &
+                                      & i))%sf(k, n - (j - 1), l)
+                        end do
+                    end if
+
+                    if (hyperelasticity) then
+                        q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, n + j, l) = -q_prim_vf(eqn_idx%xi%beg + 1)%sf(k, n - (j - 1), l)
+                    end if
+                end do
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, n + j, l, q, i) = pb_in(k, n - (j - 1), l, q, i)
+                                mv_in(k, n + j, l, q, i) = mv_in(k, n - (j - 1), l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do j = 1, buff_size
+                    do i = 1, eqn_idx%mom%beg + 1
+                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, j - 1)
+                    end do
+
+                    q_prim_vf(eqn_idx%mom%end)%sf(k, l, -j) = -q_prim_vf(eqn_idx%mom%end)%sf(k, l, j - 1)
+
+                    do i = eqn_idx%E, sys_size
+                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, j - 1)
+                    end do
+
+                    if (chemistry .and. present(q_T_sf)) then
+                        q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, j - 1)
+                    end if
+
+                    if (elasticity) then
+                        do i = 1, shear_BC_flip_num
+                            q_prim_vf(shear_BC_flip_indices(3, i))%sf(k, l, -j) = -q_prim_vf(shear_BC_flip_indices(3, i))%sf(k, &
+                                      & l, j - 1)
+                        end do
+                    end if
+
+                    if (hyperelasticity) then
+                        q_prim_vf(eqn_idx%xi%end)%sf(k, l, -j) = -q_prim_vf(eqn_idx%xi%end)%sf(k, l, j - 1)
+                    end if
+                end do
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, l, -j, q, i) = pb_in(k, l, j - 1, q, i)
+                                mv_in(k, l, -j, q, i) = mv_in(k, l, j - 1, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            else  !< bc_z%end
+                do j = 1, buff_size
+                    do i = 1, eqn_idx%mom%beg + 1
+                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p - (j - 1))
+                    end do
+
+                    q_prim_vf(eqn_idx%mom%end)%sf(k, l, p + j) = -q_prim_vf(eqn_idx%mom%end)%sf(k, l, p - (j - 1))
+
+                    do i = eqn_idx%E, sys_size
+                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p - (j - 1))
+                    end do
+
+                    if (chemistry .and. present(q_T_sf)) then
+                        q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p - (j - 1))
+                    end if
+
+                    if (elasticity) then
+                        do i = 1, shear_BC_flip_num
+                            q_prim_vf(shear_BC_flip_indices(3, i))%sf(k, l, p + j) = -q_prim_vf(shear_BC_flip_indices(3, &
+                                      & i))%sf(k, l, p - (j - 1))
+                        end do
+                    end if
+
+                    if (hyperelasticity) then
+                        q_prim_vf(eqn_idx%xi%end)%sf(k, l, p + j) = -q_prim_vf(eqn_idx%xi%end)%sf(k, l, p - (j - 1))
+                    end if
+                end do
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, l, p + j, q, i) = pb_in(k, l, p - (j - 1), q, i)
+                                mv_in(k, l, p + j, q, i) = mv_in(k, l, p - (j - 1), q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            end if
+        end if
+
+    end subroutine s_symmetry
+
+    !> Apply periodic boundary conditions by copying values from the opposite domain boundary.
+    subroutine s_periodic(q_prim_vf, bc_dir, bc_loc, k, l, pb_in, mv_in, q_T_sf)
+
+        $:GPU_ROUTINE(parallelism='[seq]')
+        type(scalar_field), dimension(sys_size), intent(inout)                                               :: q_prim_vf
+        real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
+        integer, intent(in)                                                                                  :: bc_dir, bc_loc
+        integer, intent(in)                                                                                  :: k, l
+        integer                                                                                              :: j, q, i
+        type(scalar_field), optional, intent(inout)                                                          :: q_T_sf
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  !< bc_x%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(m - (j - 1), k, l)
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(-j, k, l) = q_T_sf%sf(m - (j - 1), k, l)
+                    end do
+                end if
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(-j, k, l, q, i) = pb_in(m - (j - 1), k, l, q, i)
+                                mv_in(-j, k, l, q, i) = mv_in(m - (j - 1), k, l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            else  !< bc_x%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(j - 1, k, l)
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(m + j, k, l) = q_T_sf%sf(j - 1, k, l)
+                    end do
+                end if
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(m + j, k, l, q, i) = pb_in(j - 1, k, l, q, i)
+                                mv_in(m + j, k, l, q, i) = mv_in(j - 1, k, l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, n - (j - 1), l)
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, n - (j - 1), l)
+                    end do
+                end if
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, -j, l, q, i) = pb_in(k, n - (j - 1), l, q, i)
+                                mv_in(k, -j, l, q, i) = mv_in(k, n - (j - 1), l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            else  !< bc_y%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, j - 1, l)
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, j - 1, l)
+                    end do
+                end if
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, n + j, l, q, i) = pb_in(k, (j - 1), l, q, i)
+                                mv_in(k, n + j, l, q, i) = mv_in(k, (j - 1), l, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, p - (j - 1))
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, p - (j - 1))
+                    end do
+                end if
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, l, -j, q, i) = pb_in(k, l, p - (j - 1), q, i)
+                                mv_in(k, l, -j, q, i) = mv_in(k, l, p - (j - 1), q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            else  !< bc_z%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, j - 1)
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, j - 1)
+                    end do
+                end if
+
+                if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+                    do i = 1, nb
+                        do q = 1, nnode
+                            do j = 1, buff_size
+                                pb_in(k, l, p + j, q, i) = pb_in(k, l, j - 1, q, i)
+                                mv_in(k, l, p + j, q, i) = mv_in(k, l, j - 1, q, i)
+                            end do
+                        end do
+                    end do
+                end if
+            end if
+        end if
+
+    end subroutine s_periodic
+
+    !> Apply axis boundary conditions for cylindrical coordinates by reflecting values across the axis with azimuthal phase shift.
+    subroutine s_axis(q_prim_vf, pb_in, mv_in, k, l)
+
+        $:GPU_ROUTINE(parallelism='[seq]')
+        type(scalar_field), dimension(sys_size), intent(inout)                                               :: q_prim_vf
+        real(stp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), optional, intent(inout) :: pb_in, mv_in
+        integer, intent(in)                                                                                  :: k, l
+        integer                                                                                              :: j, q, i
+
+        do j = 1, buff_size
+            if (z_cc(l) < pi) then
+                do i = 1, eqn_idx%mom%beg
+                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l + ((p + 1)/2))
+                end do
+
+                q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, j - 1, l + ((p + 1)/2))
+
+                q_prim_vf(eqn_idx%mom%end)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%end)%sf(k, j - 1, l + ((p + 1)/2))
+
+                do i = eqn_idx%E, sys_size
+                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l + ((p + 1)/2))
+                end do
+            else
+                do i = 1, eqn_idx%mom%beg
+                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l - ((p + 1)/2))
+                end do
+
+                q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%beg + 1)%sf(k, j - 1, l - ((p + 1)/2))
+
+                q_prim_vf(eqn_idx%mom%end)%sf(k, -j, l) = -q_prim_vf(eqn_idx%mom%end)%sf(k, j - 1, l - ((p + 1)/2))
+
+                do i = eqn_idx%E, sys_size
+                    q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, j - 1, l - ((p + 1)/2))
+                end do
+            end if
+        end do
+
+        if (qbmm .and. .not. polytropic .and. present(pb_in) .and. present(mv_in)) then
+            do i = 1, nb
+                do q = 1, nnode
+                    do j = 1, buff_size
+                        if (z_cc(l) < pi) then
+                            pb_in(k, -j, l, q, i) = pb_in(k, j - 1, l + ((p + 1)/2), q, i)
+                            mv_in(k, -j, l, q, i) = mv_in(k, j - 1, l + ((p + 1)/2), q, i)
+                        else
+                            pb_in(k, -j, l, q, i) = pb_in(k, j - 1, l - ((p + 1)/2), q, i)
+                            mv_in(k, -j, l, q, i) = mv_in(k, j - 1, l - ((p + 1)/2), q, i)
+                        end if
+                    end do
+                end do
+            end do
+        end if
+
+    end subroutine s_axis
+
+    !> Apply slip wall boundary conditions by extrapolating scalars and reflecting the wall-normal velocity component.
+    subroutine s_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+
+        $:GPU_ROUTINE(function_name='s_slip_wall',parallelism='[seq]', cray_inline=True)
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+        integer, intent(in)                                    :: bc_dir, bc_loc
+        integer, intent(in)                                    :: k, l
+        integer                                                :: j, i
+        type(scalar_field), optional, intent(inout)            :: q_T_sf
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  !< bc_x%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb1
+                        else
+                            q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(0, k, l)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_x%isothermal_in) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(-j, k, l) = 2._wp*bc_x%Twall_in - q_T_sf%sf(j - 1, k, l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(-j, k, l) = q_T_sf%sf(0, k, l)
+                        end do
+                    end if
+                end if
+            else  !< bc_x%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve1
+                        else
+                            q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m, k, l)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_x%isothermal_out) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(m + j, k, l) = 2._wp*bc_x%Twall_out - q_T_sf%sf(m - (j - 1), k, l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m, k, l)
+                        end do
+                    end if
+                end if
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg + 1) then
+                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb2
+                        else
+                            q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, 0, l)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_y%isothermal_in) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, -j, l) = 2._wp*bc_y%Twall_in - q_T_sf%sf(k, j - 1, l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, 0, l)
+                        end do
+                    end if
+                end if
+            else  !< bc_y%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg + 1) then
+                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve2
+                        else
+                            q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n, l)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_y%isothermal_out) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, n + j, l) = 2._wp*bc_y%Twall_out - q_T_sf%sf(k, n - (j - 1), l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n, l)
+                        end do
+                    end if
+                end if
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%end) then
+                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb3
+                        else
+                            q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, 0)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_z%isothermal_in) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, -j) = 2._wp*bc_z%Twall_in - q_T_sf%sf(k, l, j - 1)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, 0)
+                        end do
+                    end if
+                end if
+            else  !< bc_z%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%end) then
+                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve3
+                        else
+                            q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_z%isothermal_out) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, p + j) = 2._wp*bc_z%Twall_out - q_T_sf%sf(k, l, p - (j - 1))
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p)
+                        end do
+                    end if
+                end if
+            end if
+        end if
+
+    end subroutine s_slip_wall
+
+    !> Apply no-slip wall boundary conditions by reflecting and negating all velocity components at the wall.
+    subroutine s_no_slip_wall(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+
+        $:GPU_ROUTINE(function_name='s_no_slip_wall',parallelism='[seq]', cray_inline=True)
+
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+        integer, intent(in)                                    :: bc_dir, bc_loc
+        integer, intent(in)                                    :: k, l
+        integer                                                :: j, i
+        type(scalar_field), optional, intent(inout)            :: q_T_sf
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  !< bc_x%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb1
+                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
+                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb2
+                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
+                            q_prim_vf(i)%sf(-j, k, l) = -q_prim_vf(i)%sf(j - 1, k, l) + 2._wp*bc_x%vb3
+                        else
+                            q_prim_vf(i)%sf(-j, k, l) = q_prim_vf(i)%sf(0, k, l)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_x%isothermal_in) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(-j, k, l) = 2._wp*bc_x%Twall_in - q_T_sf%sf(j - 1, k, l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(-j, k, l) = q_T_sf%sf(0, k, l)
+                        end do
+                    end if
+                end if
+            else  !< bc_x%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve1
+                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
+                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve2
+                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
+                            q_prim_vf(i)%sf(m + j, k, l) = -q_prim_vf(i)%sf(m - (j - 1), k, l) + 2._wp*bc_x%ve3
+                        else
+                            q_prim_vf(i)%sf(m + j, k, l) = q_prim_vf(i)%sf(m, k, l)
+                        end if
+                    end do
+                end do
+
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_x%isothermal_out) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(m + j, k, l) = 2._wp*bc_x%Twall_out - q_T_sf%sf(m - (j - 1), k, l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(m + j, k, l) = q_T_sf%sf(m, k, l)
+                        end do
+                    end if
+                end if
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb1
+                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
+                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb2
+                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
+                            q_prim_vf(i)%sf(k, -j, l) = -q_prim_vf(i)%sf(k, j - 1, l) + 2._wp*bc_y%vb3
+                        else
+                            q_prim_vf(i)%sf(k, -j, l) = q_prim_vf(i)%sf(k, 0, l)
+                        end if
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_y%isothermal_in) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, -j, l) = 2._wp*bc_y%Twall_in - q_T_sf%sf(k, j - 1, l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, -j, l) = q_T_sf%sf(k, 0, l)
+                        end do
+                    end if
+                end if
+            else  !< bc_y%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve1
+                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
+                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve2
+                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
+                            q_prim_vf(i)%sf(k, n + j, l) = -q_prim_vf(i)%sf(k, n - (j - 1), l) + 2._wp*bc_y%ve3
+                        else
+                            q_prim_vf(i)%sf(k, n + j, l) = q_prim_vf(i)%sf(k, n, l)
+                        end if
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_y%isothermal_out) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, n + j, l) = 2._wp*bc_y%Twall_out - q_T_sf%sf(k, n - (j - 1), l)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, n + j, l) = q_T_sf%sf(k, n, l)
+                        end do
+                    end if
+                end if
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb1
+                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
+                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb2
+                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
+                            q_prim_vf(i)%sf(k, l, -j) = -q_prim_vf(i)%sf(k, l, j - 1) + 2._wp*bc_z%vb3
+                        else
+                            q_prim_vf(i)%sf(k, l, -j) = q_prim_vf(i)%sf(k, l, 0)
+                        end if
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_z%isothermal_in) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, -j) = 2._wp*bc_z%Twall_in - q_T_sf%sf(k, l, j - 1)
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, -j) = q_T_sf%sf(k, l, 0)
+                        end do
+                    end if
+                end if
+            else  !< bc_z%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        if (i == eqn_idx%mom%beg) then
+                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve1
+                        else if (i == eqn_idx%mom%beg + 1 .and. num_dims > 1) then
+                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve2
+                        else if (i == eqn_idx%mom%beg + 2 .and. num_dims > 2) then
+                            q_prim_vf(i)%sf(k, l, p + j) = -q_prim_vf(i)%sf(k, l, p - (j - 1)) + 2._wp*bc_z%ve3
+                        else
+                            q_prim_vf(i)%sf(k, l, p + j) = q_prim_vf(i)%sf(k, l, p)
+                        end if
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    if (bc_z%isothermal_out) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, p + j) = 2._wp*bc_z%Twall_out - q_T_sf%sf(k, l, p - (j - 1))
+                        end do
+                    else
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, p + j) = q_T_sf%sf(k, l, p)
+                        end do
+                    end if
+                end if
+            end if
+        end if
+
+    end subroutine s_no_slip_wall
+
+    !> Apply Dirichlet boundary conditions by prescribing ghost cell values from stored boundary buffers.
+    subroutine s_dirichlet(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+
+        $:GPU_ROUTINE(function_name='s_dirichlet',parallelism='[seq]', cray_inline=True)
+        type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf
+        integer, intent(in)                                    :: bc_dir, bc_loc
+        integer, intent(in)                                    :: k, l
+        integer                                                :: j, i
+        type(scalar_field), optional, intent(inout)            :: q_T_sf
+
+#ifdef MFC_SIMULATION
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  ! bc_x%beg
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(-j, k, l) = bc_buffers(1, 1)%sf(i, k, l)
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(-j, k, l) = bc_buffers(1, 1)%sf(sys_size + 1, k, l)
+                    end do
+                end if
+            else  !< bc_x%end
+                do i = 1, sys_size
+                    do j = 1, buff_size
+                        q_prim_vf(i)%sf(m + j, k, l) = bc_buffers(1, 2)%sf(i, k, l)
+                    end do
+                end do
+                if (chemistry .and. present(q_T_sf)) then
+                    do j = 1, buff_size
+                        q_T_sf%sf(m + j, k, l) = bc_buffers(1, 2)%sf(sys_size + 1, k, l)
+                    end do
+                end if
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            #:if not MFC_CASE_OPTIMIZATION or num_dims > 1
+                if (bc_loc == -1) then  !< bc_y%beg
+                    do i = 1, sys_size
+                        do j = 1, buff_size
+                            q_prim_vf(i)%sf(k, -j, l) = bc_buffers(2, 1)%sf(k, i, l)
+                        end do
+                    end do
+                    if (chemistry .and. present(q_T_sf)) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, -j, l) = bc_buffers(2, 1)%sf(k, sys_size + 1, l)
+                        end do
+                    end if
+                else  !< bc_y%end
+                    do i = 1, sys_size
+                        do j = 1, buff_size
+                            q_prim_vf(i)%sf(k, n + j, l) = bc_buffers(2, 2)%sf(k, i, l)
+                        end do
+                    end do
+                    if (chemistry .and. present(q_T_sf)) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, n + j, l) = bc_buffers(2, 2)%sf(k, sys_size + 1, l)
+                        end do
+                    end if
+                end if
+            #:endif
+        else if (bc_dir == 3) then  !< z-direction
+            #:if not MFC_CASE_OPTIMIZATION or num_dims > 2
+                if (bc_loc == -1) then  !< bc_z%beg
+                    do i = 1, sys_size
+                        do j = 1, buff_size
+                            q_prim_vf(i)%sf(k, l, -j) = bc_buffers(3, 1)%sf(k, l, i)
+                        end do
+                    end do
+                    if (chemistry .and. present(q_T_sf)) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, -j) = bc_buffers(3, 1)%sf(k, l, sys_size + 1)
+                        end do
+                    end if
+                else  !< bc_z%end
+                    do i = 1, sys_size
+                        do j = 1, buff_size
+                            q_prim_vf(i)%sf(k, l, p + j) = bc_buffers(3, 2)%sf(k, l, i)
+                        end do
+                    end do
+                    if (chemistry .and. present(q_T_sf)) then
+                        do j = 1, buff_size
+                            q_T_sf%sf(k, l, p + j) = bc_buffers(3, 2)%sf(k, l, sys_size + 1)
+                        end do
+                    end if
+                end if
+            #:endif
+        end if
+#else
+        call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l, q_T_sf)
+#endif
+
+    end subroutine s_dirichlet
+
+    !> Extrapolate QBMM bubble pressure and mass-vapor variables into ghost cells by copying boundary values.
+    subroutine s_qbmm_extrapolation(bc_dir, bc_loc, k, l, pb_in, mv_in)
+
+        $:GPU_ROUTINE(parallelism='[seq]')
+        real(stp), optional, dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:,1:), intent(inout) :: pb_in, mv_in
+        integer, intent(in)                                                                                  :: bc_dir, bc_loc
+        integer, intent(in)                                                                                  :: k, l
+        integer                                                                                              :: j, q, i
+
+        if (bc_dir == 1) then  !< x-direction
+            if (bc_loc == -1) then  ! bc_x%beg
+                do i = 1, nb
+                    do q = 1, nnode
+                        do j = 1, buff_size
+                            pb_in(-j, k, l, q, i) = pb_in(0, k, l, q, i)
+                            mv_in(-j, k, l, q, i) = mv_in(0, k, l, q, i)
+                        end do
+                    end do
+                end do
+            else  !< bc_x%end
+                do i = 1, nb
+                    do q = 1, nnode
+                        do j = 1, buff_size
+                            pb_in(m + j, k, l, q, i) = pb_in(m, k, l, q, i)
+                            mv_in(m + j, k, l, q, i) = mv_in(m, k, l, q, i)
+                        end do
+                    end do
+                end do
+            end if
+        else if (bc_dir == 2) then  !< y-direction
+            if (bc_loc == -1) then  !< bc_y%beg
+                do i = 1, nb
+                    do q = 1, nnode
+                        do j = 1, buff_size
+                            pb_in(k, -j, l, q, i) = pb_in(k, 0, l, q, i)
+                            mv_in(k, -j, l, q, i) = mv_in(k, 0, l, q, i)
+                        end do
+                    end do
+                end do
+            else  !< bc_y%end
+                do i = 1, nb
+                    do q = 1, nnode
+                        do j = 1, buff_size
+                            pb_in(k, n + j, l, q, i) = pb_in(k, n, l, q, i)
+                            mv_in(k, n + j, l, q, i) = mv_in(k, n, l, q, i)
+                        end do
+                    end do
+                end do
+            end if
+        else if (bc_dir == 3) then  !< z-direction
+            if (bc_loc == -1) then  !< bc_z%beg
+                do i = 1, nb
+                    do q = 1, nnode
+                        do j = 1, buff_size
+                            pb_in(k, l, -j, q, i) = pb_in(k, l, 0, q, i)
+                            mv_in(k, l, -j, q, i) = mv_in(k, l, 0, q, i)
+                        end do
+                    end do
+                end do
+            else  !< bc_z%end
+                do i = 1, nb
+                    do q = 1, nnode
+                        do j = 1, buff_size
+                            pb_in(k, l, p + j, q, i) = pb_in(k, l, p, q, i)
+                            mv_in(k, l, p + j, q, i) = mv_in(k, l, p, q, i)
+                        end do
+                    end do
+                end do
+            end if
+        end if
+
+    end subroutine s_qbmm_extrapolation
+
+end module m_boundary_primitives