Source code for icolos.core.workflow_steps.pmx.abfe

import shutil
from typing import List
from icolos.core.containers.compound import Compound
from icolos.core.workflow_steps.pmx.base import StepPMXBase
from pydantic import BaseModel
import os
from icolos.utils.enums.step_enums import StepGromacsEnum
from icolos.utils.execute_external.gromacs import GromacsExecutor
from icolos.utils.execute_external.pmx import PMXExecutor
from icolos.utils.enums.program_parameters import (
    GromacsEnum,
    PMXEnum,
    StepPMXEnum,
)
from icolos.utils.general.parallelization import SubtaskContainer

_PE = PMXEnum()
_GE = GromacsEnum()
_SGE = StepGromacsEnum()
_PSE = StepPMXEnum()


[docs]class StepPMXabfe(StepPMXBase, BaseModel):
    """
    Setup files for an ABFE calculation.
    """

    _gromacs_executor: GromacsExecutor = GromacsExecutor()

    def __init__(self, **data):
        super().__init__(**data)
        self._initialize_backend(PMXExecutor)
        self._check_backend_availability()
        self._gromacs_executor = GromacsExecutor(prefix_execution=_SGE.GROMACS_LOAD)

[docs]    def execute(self):
        """
        This step manages the setup of a pmx ABFE run for a set of compounds and a protein target.
        Expects:
        - docked compounds provided as an sdf file
        - protein apo structure
        - directory containing mdp files
        """

        assert self.work_dir is not None and os.path.isdir(self.work_dir)
        replicas = (
            self.settings.additional["replicas"]
            if "replicas" in self.settings.additional.keys()
            else 3
        )
        # miror the dir structure for the input files used by the rbfe workflow
        os.makedirs(os.path.join(self.work_dir, "input"), exist_ok=True)
        for folder in ["ligands", "mdp", "protein"]:
            os.makedirs(os.path.join(self.work_dir, "input", folder), exist_ok=True)

        mdp_dir = self.data.generic.get_argument_by_extension(
            ext="mdp", rtn_file_object=True
        )
        # write mdp files to the input dir
        mdp_dir.write(os.path.join(self.work_dir, "input/mdp"))

        # create directory structure
        for comp in self.get_compounds():
            ident = comp.get_index_string()
            os.makedirs(os.path.join(self.work_dir, ident), exist_ok=True)

        # load in the provided apo structure
        protein = self.data.generic.get_argument_by_extension(
            "pdb", rtn_file_object=True
        )
        protein.write(
            os.path.join(self.work_dir, "input/protein/protein.pdb"), join=False
        )
        # parametrise protein
        self._parametrise_protein(
            protein="protein.pdb", path="input/protein", output="protein.gro"
        )
        # self._clean_protein()

        self.execution.parallelization.max_length_sublists = 1
        self._subtask_container = SubtaskContainer(
            max_tries=self.execution.failure_policy.n_tries
        )
        self._subtask_container.load_data(self.get_compounds())
        self._execute_pmx_step_parallel(
            run_func=self._parametrise_nodes,
            step_id="parametrize ligands",
            result_checker=self._check_params,
        )

        # now
        self._subtask_container.load_data(self.get_compounds())
        self._execute_pmx_step_parallel(
            run_func=self._setup_abfe,
            step_id="pxm abfe",
            result_checker=self._find_nan_vals,
        )

        # now we make the rest of the dir structure
        for comp in self.get_compounds():
            ident = comp.get_index_string()
            shutil.copyfile(
                os.path.join(self.work_dir, "input/protein/posre.itp"),
                os.path.join(self.work_dir, ident, "complex/posre.itp"),
            )
            for wp in self.therm_cycle_branches:
                wppath = os.path.join(self.work_dir, ident, wp)
                # copy the posre.itp file from input/protein to each

                # stateA/stateB - coupled + decoupled states
                for state in self.states:
                    statepath = os.path.join(wppath, state)
                    os.makedirs(statepath, exist_ok=True)

                    # run1/run2/run3
                    for r in range(1, replicas + 1):
                        runpath = os.path.join(statepath, f"run{r}")
                        os.makedirs(runpath, exist_ok=True)

                        # em/eq_posre/eq/transitions
                        # TODO: this differs from the equil used for RBFE - can we get away without the extra equilibration?
                        for sim in self.settings.additional[_PSE.SIM_TYPES]:
                            simpath = os.path.join(runpath, sim)
                            os.makedirs(simpath, exist_ok=True)

    def _setup_abfe(self, jobs):
        """
        Executes pmx abfe, moves the resulting built files to the right dir
        """
        if isinstance(jobs, list):
            comp = jobs[0]

        args = {
            "-pt": os.path.join(self.work_dir, "input/protein/topol.top"),
            "-lt": os.path.join(
                self.work_dir,
                "input/ligands",
                comp.get_index_string(),
                "MOL.acpype/MOL_GMX.itp",
            ),
            "-pc": os.path.join(self.work_dir, "input/protein/protein.gro"),
            "-lc": os.path.join(
                self.work_dir,
                "input/ligands",
                comp.get_index_string(),
                "MOL.acpype/MOL_GMX.gro",
            ),
            "--build": "",
        }
        self._backend_executor.execute(
            command=_PE.ABFE,
            arguments=self.get_arguments(args),
            location=os.path.join(self.work_dir, comp.get_index_string()),
            check=True,
        )
        # note that this is stochastic, and sometimes it generates bad restraaints/nan values
        # we will simply resubmit n times

    def _find_nan_vals(self, next_batch: List[str]) -> List[List[bool]]:
        """
        Looks throuh the dirs specified in jobs, reads restraints.info
        """
        # sublist length set to 1 for this step
        batch_results = []
        for subtask in next_batch:
            subtask_results = []
            for comp in subtask:
                with open(
                    os.path.join(
                        self.work_dir, comp.get_index_string(), "restraints.info"
                    ),
                    "r",
                ) as f:
                    lines = f.readlines()

                subtask_results.append(any(["nan" in l for l in lines]))
            batch_results.append(subtask_results)
        return batch_results

    def _check_params(self, batch: List[List[Compound]]) -> List[List[bool]]:
        """
        check ligand parameters have been generated properly
        """
        output_files = ["MOL.itp", "MOL.mol2"]
        results = []
        for subjob in batch:
            subjob_results = []
            for job in subjob:
                subjob_results.append(
                    all(
                        [
                            os.path.isfile(
                                os.path.join(self.work_dir, job.get_index_string(), f)
                            )
                            for f in output_files
                        ]
                    )
                )
            results.append(subjob_results)
        return results