Skip to content
2 changes: 1 addition & 1 deletion a3fe/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.4.1"
__version__ = "0.4.2"
76 changes: 62 additions & 14 deletions a3fe/configuration/engine_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"SomdConfig",
]

import math as _math
import os as _os
from abc import ABC as _ABC
from abc import abstractmethod as _abstractmethod
Expand Down Expand Up @@ -117,9 +118,13 @@ class SomdConfig(_EngineConfig):

### Integrator - ncycles modified as required by a3fe ###
timestep: float = _Field(4.0, description="Timestep in femtoseconds(fs)")
max_nmoves: int = _Field(
250000,
description="Maximum number of moves per cycle. nmoves and ncycles are computed from runtime, timestep, and max_nmoves.",
)
runtime: _Union[int, float] = _Field(
5.0,
description="Runtime in nanoseconds(ns), and must be a multiple of timestep",
description="Runtime in nanoseconds(ns), must be a multiple of timestep and ncycles will be calculated from runtime and nmoves",
)

### Constraints ###
Expand Down Expand Up @@ -224,30 +229,72 @@ class SomdConfig(_EngineConfig):
default_factory=dict, description="Extra options to pass to the SOMD engine"
)

def _get_total_nmoves(self) -> int:
"""Calculate total number of moves from runtime and timestep."""
runtime_fs = _Decimal(str(self.runtime)) * _Decimal("1_000_000")
timestep = _Decimal(str(self.timestep))
return int(runtime_fs / timestep)

@property
def nmoves(self) -> int:
"""
Make sure runtime is a multiple of timestep
Number of moves per cycle.

If total_nmoves <= max_nmoves, returns total_nmoves (ncycles=1).
Otherwise returns the largest factor of total_nmoves that is both
<= max_nmoves and divisible by energy_frequency(default 200), ensuring that
energy output points align with every cycle boundary.
"""
# Convert runtime to femtoseconds (ns -> fs)
total_nmoves = self._get_total_nmoves()

if total_nmoves <= self.max_nmoves:
return total_nmoves

best = 1
for d in range(1, int(_math.isqrt(total_nmoves)) + 1):
if total_nmoves % d == 0:
for candidate in (d, total_nmoves // d):
if (
candidate <= self.max_nmoves
and candidate % self.energy_frequency == 0
):
best = max(best, candidate)

return best

@property
def ncycles(self) -> int:
"""Number of cycles, computed as total_nmoves / nmoves."""
return max(1, self._get_total_nmoves() // self.nmoves)

@_model_validator(mode="after")
def _validate_runtime_timestep_nmoves(self):
"""Validate that runtime is a multiple of both timestep and energy_frequency * timestep."""
if self.max_nmoves < self.energy_frequency:
raise ValueError(
f"max_nmoves ({self.max_nmoves}) must be >= energy_frequency "
f"({self.energy_frequency}) so that each cycle contains at least one energy output."
)

runtime_fs = _Decimal(str(self.runtime)) * _Decimal("1_000_000")
timestep = _Decimal(str(self.timestep))

# Check if runtime is a multiple of timestep
remainder = runtime_fs % timestep
if round(float(remainder), 4) != 0:
if round(float(runtime_fs % timestep), 4) != 0:
raise ValueError(
(
"Runtime must be a multiple of the timestep. "
f"Runtime is {self.runtime} ns ({runtime_fs} fs), "
f"and timestep is {self.timestep} fs."
)
f"Runtime must be a multiple of timestep. "
f"Runtime is {self.runtime} ns ({runtime_fs} fs), "
f"timestep is {self.timestep} fs."
)

# Calculate the number of moves
nmoves = round(float(runtime_fs) / float(timestep))
energy_block_fs = timestep * _Decimal(str(self.energy_frequency))
if round(float(runtime_fs % energy_block_fs), 4) != 0:
raise ValueError(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this was done before for timestep and runtime, but I think it might be better to validate all of this stuff in a validator, rather than in the property. Then, an error will be raised on instantiation/ assignment (we've set it to validate on assignment) rather than later when the user tries to call the property. The property can then just compute ncycles.

f"Runtime must be a multiple of energy_frequency * timestep "
f"({self.energy_frequency} * {self.timestep} fs = {float(energy_block_fs)} fs). "
f"Runtime is {self.runtime} ns ({runtime_fs} fs)."
)

return nmoves
return self

@_model_validator(mode="after")
def _check_rf_dielectric(self):
Expand Down Expand Up @@ -336,6 +383,7 @@ def write_config(
config_lines = [
"### Integrator ###",
f"timestep = {self.timestep} * femtosecond",
f"ncycles = {self.ncycles}",
f"nmoves = {self.nmoves}",
f"constraint = {self.constraint}",
f"hydrogen mass repartitioning factor = {self.hydrogen_mass_factor}",
Expand Down
Binary file modified a3fe/data/example_calc_set/mdm2_short/Calculation.pkl
Binary file not shown.
Binary file modified a3fe/data/example_calc_set/mdm2_short/bound/Leg.pkl
Binary file not shown.
Binary file modified a3fe/data/example_calc_set/mdm2_short/bound/restrain/Stage.pkl
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified a3fe/data/example_calc_set/t4l/Calculation.pkl
Binary file not shown.
Binary file modified a3fe/data/example_calc_set/t4l/bound/Leg.pkl
Binary file not shown.
Binary file modified a3fe/data/example_calc_set/t4l/bound/restrain/Stage.pkl
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified a3fe/data/example_restraint_stage/Stage.pkl
Binary file not shown.
89 changes: 89 additions & 0 deletions a3fe/tests/test_engine_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,95 @@ def test_charge_cutoff_validation(engine_config, charge, cutoff, should_pass):
engine_config(ligand_charge=charge, cutoff_type=cutoff, runtime=1)


@pytest.mark.parametrize(
"runtime,max_nmoves,timestep,expected_nmoves,expected_ncycles",
[
# total_nmoves < max_nmoves: single cycle
(0.008, 250000, 4.0, 2000, 1),
# total_nmoves == max_nmoves: single cycle boundary
(1.0, 250000, 4.0, 250000, 1),
# Clean division into max-sized cycles (adaptive 0.1 ns grid typical case)
(5.0, 250000, 4.0, 250000, 5),
# 0.1 ns grid not divisible by max_nmoves: largest energy_frequency-aligned factor
# 5.3 ns -> total=1_325_000, largest factor <=250_000 and %200==0 is 53_000
(5.3, 250000, 4.0, 53000, 25),
],
)
def test_ncycles_calculation(
somd_engine_config, runtime, max_nmoves, timestep, expected_nmoves, expected_ncycles
):
"""Test that nmoves and ncycles are correctly computed from runtime, max_nmoves and timestep."""
config = somd_engine_config(
runtime=runtime, max_nmoves=max_nmoves, timestep=timestep
)
assert config.nmoves == expected_nmoves
assert config.ncycles == expected_ncycles
assert config.nmoves * config.ncycles == config._get_total_nmoves()
assert config.nmoves % config.energy_frequency == 0


def test_ncycles_invalid_runtime(somd_engine_config):
"""Test that ValueError is raised when runtime is not a multiple of timestep or energy_frequency * timestep."""
with pytest.raises(ValueError, match="Runtime must be a multiple of timestep"):
somd_engine_config(runtime=5.0, timestep=3.0)
# 0.123 ns: total_nmoves=30750, 30750 % 200 = 150 != 0
with pytest.raises(ValueError, match="energy_frequency"):
somd_engine_config(runtime=0.123, timestep=4.0)
# 6.666 ns: total_nmoves=1_666_500, 1_666_500 % 200 = 100 != 0
with pytest.raises(ValueError, match="energy_frequency"):
somd_engine_config(runtime=6.666, timestep=4.0)


def test_ncycles_updates_on_runtime_change(somd_engine_config):
"""Test that nmoves and ncycles update when runtime is changed."""
config = somd_engine_config(runtime=5.0, max_nmoves=250000, timestep=4.0)
assert config.nmoves == 250000
assert config.ncycles == 5

config.runtime = 10.0
assert config.nmoves == 250000
assert config.ncycles == 10


def test_ncycles_updates_on_timestep_or_max_nmoves_change(somd_engine_config):
"""SSOT: changing timestep or max_nmoves re-derives nmoves/ncycles."""
config = somd_engine_config(runtime=5.0, timestep=4.0, max_nmoves=250000)
assert config.nmoves == 250000 and config.ncycles == 5

config.timestep = 2.0 # total_nmoves doubles to 2_500_000
assert config.nmoves == 250000
assert config.ncycles == 10

config.max_nmoves = 500000 # now single cycle fits? total=2_500_000 > 500_000
assert config.nmoves == 500000
assert config.ncycles == 5


def test_max_nmoves_below_energy_frequency_rejected(somd_engine_config):
"""max_nmoves must be >= energy_frequency to guarantee an energy output per cycle."""
with pytest.raises(ValueError, match="max_nmoves"):
somd_engine_config(runtime=1.0, timestep=4.0, max_nmoves=100)


def test_write_config_cycles_match_properties(somd_engine_config):
"""The written somd.cfg must contain ncycles/nmoves matching the computed properties."""
with TemporaryDirectory() as dirname:
config = somd_engine_config(runtime=10.0, timestep=4.0)
config.lambda_values = [0.0, 0.5, 1.0]
config.write_config(
run_dir=dirname,
lambda_val=0.0,
runtime=config.runtime,
top_file="somd.prm7",
coord_file="somd.rst7",
morph_file="somd.pert",
)
with open(os.path.join(dirname, config.get_file_name()), "r") as f:
content = f.read()
assert f"ncycles = {config.ncycles}" in content
assert f"nmoves = {config.nmoves}" in content


def test_ligand_charge_validation(engine_config):
"""Test that ligand charge validation works correctly."""

Expand Down
5 changes: 5 additions & 0 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
Change Log
===============

0.4.2
====================
- Added ``max_nmoves`` as a configurable field in ``SomdConfig`` and made ``nmoves``/``ncycles`` computed properties derived from ``runtime``, ``timestep``, ``max_nmoves`` and ``energy_frequency``.
This prevents memory overflow from single-cycle long runtimes while keeping ``runtime`` the single source of truth.

0.4.1
====================
- Fixed the statistical inefficiency timestep units from femtoseconds to nanoseconds.
Expand Down
41 changes: 23 additions & 18 deletions docs/guides.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ The most basic input that a3fe accepts is PDB files of the protein and crystallo
file for the ligand. Pre-parameterised inputs in AMBER-format are also accepted. The table below details the combinations
of input files that can be supplied to a3fe, and the names that they must be given (files for both the free and bound leg must
be provided). The preparation stage will be detected by a3fe when you instantiate a Calculation, and only the required preparation
steps will be carried out for each leg.
steps will be carried out for each leg.

You can also find out which input files are required for a given preparation stage for a given leg programmatically, e.g:

Expand All @@ -21,7 +21,7 @@ You can also find out which input files are required for a given preparation sta
.. list-table:: Preparation stage types and required input files
:widths: 25 25 25 50
:header-rows: 1

* - PreparationStage
- LegType
- Required Input Files
Expand Down Expand Up @@ -95,13 +95,13 @@ completely happy with the input files.
* If the above fails, this is often due to residue/ atom names which do not match the templates. Read the errors to find out which residues / atoms are causing the issues, then check the expected names in library which was loaded after typing ``source leaprc.protein.ff14SB`` e.g. ``cat $AMBERHOME/dat/leap/lib/amino12.lib``. Rename the offending atoms/ residues and repeat the above step.
* Finally, rename ``protein_fully_sanitised.pdb`` to ``protein.pdb`` and run a3fe again.

Alternatively, if you have a parameterised protein (protein.rst7 and protein.prm7), and ligand.sdf (and optionally waters.prm7 and waters.rst7), you can use the
Alternatively, if you have a parameterised protein (protein.rst7 and protein.prm7), and ligand.sdf (and optionally waters.prm7 and waters.rst7), you can use the
notebook supplied in a3fe/a3fe/data/example_run_dir/parameterise_and_assemble_input.ipynb to parameterise the ligand and create the parameterised input files required by a3fe.

Running Standard Non-Adaptive Calculations
*******************************************

Once you have the required files in `input` as described above, you can run a standard non-adaptive ABFE calculation. To run 5 replicates with 5 ns of sampling per window
Once you have the required files in `input` as described above, you can run a standard non-adaptive ABFE calculation. To run 5 replicates with 5 ns of sampling per window
(discarding 1 ns of this to equilibration):

.. code-block:: python
Expand All @@ -123,7 +123,7 @@ Customising Calculations
*************************

Engine Configuration
-----------------
--------------------

The default simulation engine is SOMD. You can customize its configuration by using :class:`a3fe.configuration.engine_config.SomdConfig`.
For example, to change the timestep, create a ``SomdConfig`` object and pass it to ``Calculation``:
Expand All @@ -132,17 +132,23 @@ For example, to change the timestep, create a ``SomdConfig`` object and pass it

engine_config = a3.SomdConfig(timestep=2.0) # fs (femtoseconds), Create configuration with custom parameters
calc = a3.Calculation(engine_config=engine_config) # Pass to Calculation during creation

# Or modify parameters after creating the Calculation
calc = a3.Calculation()
# Works if the calculation has not been setup yet
calc.engine_config.timestep = 2.0 # fs
# Works if the calculation has already been setup
calc.update_engine_config_option(timestep=2.0) # fs
# Before setup(): modify the engine_config directly
calc.engine_config.timestep = 2.0 # fs

# After setup(): use update_engine_config_option(option, value)
# to propagate the change to all sub-simulations
calc.update_engine_config_option("timestep", 2.0) # fs

.. warning::
After calling ``calc.setup()``, do not modify engine_config directly.
Use ``calc.update_engine_config_option()`` instead.
After calling ``calc.setup()``, always use ``calc.update_engine_config_option("option", value)``
rather than modifying ``engine_config`` directly.

.. note::
``nmoves`` and ``ncycles`` are computed properties derived from ``runtime``, ``timestep``, and
``max_nmoves``; they cannot be set directly.

To see a complete list of available configuration options, run ``somd-freenrg --help-config``
or inspect the :class:`a3.SomdConfig` API reference.
Expand Down Expand Up @@ -180,7 +186,7 @@ also set other options:

.. note::

The molecular dynamics simulations should be run on GPUs - they are unbearably slow on CPU. However, you may want to run the MBAR analysis on CPUs to minimise submissions to the CPU queue.
The molecular dynamics simulations should be run on GPUs - they are unbearably slow on CPU. However, you may want to run the MBAR analysis on CPUs to minimise submissions to the CPU queue.
To do this, you can supply an ``analysis_slurm_config`` which is different to the ``slurm_config``, which will only be used for the MBAR analysis.

.. code-block:: python
Expand Down Expand Up @@ -226,7 +232,7 @@ simulation time allocation, and equilibration detection algorithms.
# of 2 kcal mol-1
calc.get_optimal_lam_vals(delta_er = 2)
# Run adaptively with a runtime constant of 0.0005 kcal**2 mol-2 ns**-1
# Note that automatic equilibration detection with the paired t-test
# Note that automatic equilibration detection with the paired t-test
# method will also be carried out.
calc.run(adaptive=True, runtime_constant = 0.0005)
calc.wait()
Expand All @@ -244,7 +250,7 @@ simulation time allocation, and equilibration detection algorithms.

During the adaptive allocation of simulation time, the allocated runtime is computed taking into account the relative simulation cost. To obtain
comparable total simulation times to those described in the manuscript, you should set the reference simulation time to the cost (hr / ns) of the bound leg of the
MIF/ MIF180 complex ([input files here](https://github.com/michellab/Automated-ABFE-Paper/tree/main/simulations/initial_systems/mif/input)). The cost can be obtained
MIF/ MIF180 complex ([input files here](https://github.com/michellab/Automated-ABFE-Paper/tree/main/simulations/initial_systems/mif/input)). The cost can be obtained
by running a short simulation for the leg and checking the cost with e.g. ``ref_cost = calc.legs[0].tot_gpu_time / calc.legs[0].tot_simtime``. This should then be passed when
optimising the lambda schedule with e.g. ``calc.get_optimal_lam_vals(delta_er = 2, reference_sim_cost = ref_cost)``.

Expand All @@ -255,7 +261,7 @@ Analysis can be performed with:

.. code-block:: python

# Calculate the free energy changes using MBAR and
# Calculate the free energy changes using MBAR and
# generate a variety of plots to aid analysis.
# Run through SLURM as MBAR can be computationally intensive.
# Avoid costly RMSD analysis.
Expand All @@ -272,7 +278,7 @@ Analysis can be performed with:
``calc.analyse()`` generates a variety of outputs in the ``output`` directories of the calculation, leg, and stage directories. The most detailed
information is given in the stage output directories. You can get a detailed breakdown of the results as a pandas dataframe by running ``calc.get_results_df()``.

Convergence analysis involves repeatedly calculating the free energy changes with different subsets of the
Convergence analysis involves repeatedly calculating the free energy changes with different subsets of the
data, and is computationally intensive. Hence, it is implemented in a different function. To run convergence
analysis, enter ``calc.analyse_convergence()``. Plots of the free energy change against total simulation time
will be created in each output directory.
Expand Down Expand Up @@ -325,4 +331,3 @@ Since A3FE 0.2.0, ABFE calculations with charged ligands are supported using a c
calc = a3.Calculation(engine_config=engine_config) # Pass to Calculation

The default ``SomdConfig`` uses reaction field instead of PME. This is faster (around twice as fast for some of our systems) and has been shown to give equivalent results for neutral ligands in RBFE calculations - see https://pubs.acs.org/doi/full/10.1021/acs.jcim.0c01424.

Loading