Source code for maize.steps.mai.docking.adv

"""Autodock Vina implementation"""

# pylint: disable=import-outside-toplevel, import-error

import json
import logging
from pathlib import Path
import re
import shutil
import tarfile
from typing import Annotated, Any, Literal, Optional
import xml.etree.ElementTree as ET

import numpy as np
from numpy.typing import NDArray
import pytest

from maize.core.node import Node
from maize.core.interface import Parameter, Flag, FileParameter, Suffix, Input, Output
from maize.utilities.chem import Isomer, IsomerCollection
from maize.utilities.testing import TestRig
from maize.utilities.validation import SuccessValidator, FileValidator
from maize.utilities.resources import cpu_count

AD_HIGH_ENERGY = 1000
SCORE_ONLY_RESULT_REGEX = re.compile(r"\s*Estimated Free Energy of Binding\s*\:\s+(-?\d+\.\d+)\s*")


log = logging.getLogger("run")


def _adv_score_parser_meeko(props: dict[str, str]) -> float:
    """Parse scores from Vina output."""
    log.debug("Parsing SDF properties '%s'", props)
    value = float(json.loads(props.get("meeko", ""))["free_energy"])
    log.debug("Parsed value '%s' from properties", value)
    return value


def _adgpu_score_parser(
    file: Annotated[Path, Suffix("xml")], log: Optional["logging.Logger"] = None
) -> dict[int, dict[str, float | int]]:
    """Parse scores from an AutoDockGPU XML output file"""
    if not file.exists():
        raise FileNotFoundError(f"XML file at '{file.as_posix()}' does not exist")

    tree = ET.parse(file)
    if (res_section := tree.find("result")) is None or (
        rmsd_section := res_section.find("rmsd_table")
    ) is None:
        raise KeyError(f"XML file at '{file.as_posix()}' is malformed or empty")

    results = {}
    for res in rmsd_section:
        if log is not None:
            log.debug(
                "Parsing run '%s' with energy '%s'",
                res.attrib["run"],
                res.attrib["binding_energy"],
            )
        results[int(res.attrib["run"])] = {
            "energy": float(res.attrib["binding_energy"]),
            "cluster_rmsd": float(res.attrib["cluster_rmsd"]),
            "rmsd": float(res.attrib["reference_rmsd"]),
            "cluster": int(res.attrib["rank"]),
        }

    return results


def _list_of_dicts2dict_of_lists(data: list[dict[str, Any]]) -> dict[str, list[Any]]:
    """Convert from a list of dictionaries to a dictionary of lists"""
    return {k: [dic[k] for dic in data] for k in data[0]}


# Helper functions using meeko to convert to PDBQT...
def _mol2pdbqt(file: Path, isomer: "Isomer") -> None:
    """Converts an isomer to a PDBQT file using meeko"""
    from meeko import MoleculePreparation

    preparator = MoleculePreparation()
    preparator.prepare(isomer._molecule)
    preparator.write_pdbqt_file(file)


# ...and from DLG (a kind of PDBQT file) to SDF
def _adv2sdf(inp_file: Path, sdf: Path) -> None:
    """Converts an AD DLG file to SDF using meeko"""
    from meeko import PDBQTMolecule, RDKitMolCreate

    with inp_file.open() as inp:
        mol = PDBQTMolecule(inp.read(), is_dlg=inp_file.suffix == ".dlg", skip_typing=True)
    with sdf.open("w") as sdfout:
        out, failures = RDKitMolCreate.write_sd_string(mol)
        sdfout.write(out)
    if len(failures) > 0:
        raise IOError(f"Meeko failed to write file '{sdf.as_posix()}'")


[docs] class PreparePDBQT(Node): """Prepares a receptor for docking with Vina.""" required_callables = ["prepare_receptor"] """ Requires various scripts and tools: prepare_receptor Included in ``AutoDockTools``. """ _RepairType = Literal["bonds_hydrogens", "bonds", "hydrogens", "checkhydrogens", "None"] _CleanupType = Literal["nphs", "lps", "waters", "nonstdres", "deleteAltB"] inp: Input[Annotated[Path, Suffix(".pdb")]] = Input() """Receptor structure without ligand""" out: Output[Annotated[Path, Suffix(".pdbqt")]] = Output() """Tar archive of all grid files""" repairs: Parameter[_RepairType] = Parameter(default="None") """Types of repairs to be done to the PDB file""" preserve_charges: Flag = Flag(default=False) """Whether to preserve existing charges instead of adding Gasteiger charges""" cleanup_protein: Parameter[list[_CleanupType]] = Parameter( default_factory=lambda: ["nphs", "lps", "waters", "nonstdres"] ) """Cleanup options""" remove_nonstd: Flag = Flag(default=False) """Remove non-standard residues""" def run(self) -> None: structure = self.inp.receive() receptor_pdbqt = Path("rec.pdbqt") command = ( f"{self.runnable['prepare_receptor']} " f"-A '{self.repairs.value}' " f"-U '{'_'.join(self.cleanup_protein.value)}' " f"-r {structure.as_posix()} " f"-o {receptor_pdbqt.as_posix()} " ) if self.preserve_charges.value: command += "-C " if self.remove_nonstd.value: command += "-e " self.run_command(command, validators=[FileValidator(receptor_pdbqt)], verbose=True) self.out.send(receptor_pdbqt)
# TODO Allow anchored / constrained docking, see: # https://github.com/ccsb-scripps/AutoDock-GPU/wiki/Anchored-docking
[docs] class PrepareGrid(Node): """Prepares a receptor for docking with AutoDock4.""" required_callables = ["prepare_receptor", "write_gpf", "autogrid"] """ Requires various scripts and tools: write_gpf Script to create GPF output with all possible atomtypes, `from here <https://github.com/diogomart/write-autogrid-config>`_. prepare_receptor Included in ``AutoDockTools``. autogrid Included in the normal CPU-only version of AutoDock """ required_packages = ["meeko"] """Requires a custom environment with ``meeko`` installed""" inp_structure: Input[Annotated[Path, Suffix(".pdb", ".pdbqt")]] = Input() """Receptor structure without ligand""" inp_ligand: Input[Isomer] = Input(optional=True) """Reference ligand structure, if not provided requires `search_center` to be set""" out: Output[Annotated[Path, Suffix("tar")]] = Output() """Tar archive of all grid files""" search_center: Parameter[tuple[float, float, float]] = Parameter( default=(np.nan, np.nan, np.nan) ) """Center of the search space for docking, required if `inp_ligand` is not given""" search_range: Parameter[tuple[float, float, float]] = Parameter(default=(15.0, 15.0, 15.0)) """Range of the search space for docking""" def run(self) -> None: structure = self.inp_structure.receive() # Create receptor PDBQT (if needed) receptor_pdbqt = Path("rec.pdbqt") if structure.suffix == ".pdbqt": shutil.move(structure, receptor_pdbqt) else: self.run_command( f"{self.runnable['prepare_receptor']} " f"-r {structure.as_posix()} " f"-o {receptor_pdbqt.as_posix()}", validators=[FileValidator(receptor_pdbqt)], ) # Create temporary ligand PDBQT if self.inp_ligand.ready(): lig = self.inp_ligand.receive() lig_pdbqt = Path("lig.pdbqt") _mol2pdbqt(lig_pdbqt, lig) command = ( f"{self.runnable['write_gpf']} " f"-l {lig_pdbqt.as_posix()} " f"{receptor_pdbqt.as_posix()}" ) else: assert all(np.isfinite(c) for c in self.search_center.value) box_config = Path("box.txt") with box_config.open("w") as conf: for axis, coord, size in zip( ("x", "y", "z"), self.search_center.value, self.search_range.value ): conf.write(f"center_{axis} = {coord:5.3f}\n") conf.write(f"size_{axis} = {size:5.3f}\n") command = ( f"{self.runnable['write_gpf']} " f"-b {box_config.as_posix()} " f"{receptor_pdbqt.as_posix()}" ) # Create GPF, includes search geometry and index of needed maps gpf = receptor_pdbqt.with_suffix(".gpf") self.run_command(command, validators=[FileValidator(gpf)]) # Create maps glg = gpf.with_suffix(".glg") fld = glg.with_suffix(".maps.fld") self.run_command( f"{self.runnable['autogrid']} -p {gpf.as_posix()} -l {glg.as_posix()}", validators=[FileValidator(glg), FileValidator(fld)], ) # Wrap it all up tar = Path("grid.tar") with tarfile.open(tar, "w") as archive: for file in Path().glob("*.map"): archive.add(file) for file in (receptor_pdbqt, gpf, glg, fld): archive.add(file) self.out.send(tar)
[docs] class AutoDockGPU(Node): """ Runs AutoDock on the GPU [#santos2021]_. Notes ----- Clone the repo from `here <https://github.com/ccsb-scripps/AutoDock-GPU>`_, load modules for the compiler and CUDA, set ``GPU_INCLUDE_PATH`` and ``GPU_LIBRARY_PATH``, and run ``make DEVICE=CUDA``. This also requires `meeko <https://github.com/forlilab/Meeko>`_ to convert to and from pdbqt files, specify `mk_prepare` and `mk_export`. If you get very high docking scores this often means that the ligand is outside of the grid. This can be due to a map that is too small (increase ``search_range``) or a misplaced box that is hard to access (modify ``search_center``). References ---------- .. [#santos2021] Santos-Martins, D. et al. Accelerating AutoDock4 with GPUs and Gradient-Based Local Search. J. Chem. Theory Comput. 17, 1060-1073 (2021). """ required_callables = ["autodock_gpu"] """Requires the ``autodock_gpu`` executable""" required_packages = ["meeko"] """Requires a custom environment with ``meeko==0.4`` installed""" inp: Input[list[IsomerCollection]] = Input() """ List of molecules to dock, each molecule can have multiple isomers, these will be docked separately. """ out: Output[list[IsomerCollection]] = Output(optional=True) """ Docked molecules with conformations and scores attached. Also include per-conformer clustering information performed by AutoDock, use the keys 'rmsd', 'cluster_rmsd', 'cluster' to access. """ out_scores: Output[NDArray[np.float32]] = Output() """Docking scores, the best for each docked IsomerCollection""" ref_ligand: Parameter[Isomer] = Parameter(optional=True) """Optional reference ligand for RMSD analysis""" grid_file: FileParameter[Path] = FileParameter() """The protein grid file, all internally referenced files must be available""" seed: Parameter[int] = Parameter(default=42) """The default seed""" heuristics: Parameter[int] = Parameter(default=1) """Number of evaluations for ligand-based automatic search""" heurmax: Parameter[int] = Parameter(default=12000000) """Heuristics evaluation limit""" nrun: Parameter[int] = Parameter(default=20) """LGA runs""" population_size: Parameter[int] = Parameter(default=150) """LGA population size""" lsit: Parameter[int] = Parameter(default=300) """Local search iterations""" derivtypes: Parameter[dict[str, str]] = Parameter(default_factory=dict) """Atomtype mappings to add to ``derivtype``, e.g. NA->N""" strict: Flag = Flag(default=False) """When set, raises an exception if docking a molecule failed, otherwise logs a warning""" scores_only: Flag = Flag(default=False) """If ``True``, will only return the scores and no conformers""" def run(self) -> None: mols = self.inp.receive() # Molecule inputs inputs = Path("inputs") inputs.mkdir() # Convert all ligands to pdbqt and collect # their paths and names in a batch file batch_file = Path("batch.txt") with batch_file.open("w") as file: file.write(f"{self.grid_file.filepath.as_posix()}\n") for mol in mols: for isomer in mol.molecules: # Tools like REINVENT rely on getting the same number of scores out # as molecules, so we can't filter out failed embeddings earlier... if isomer.n_conformers == 0: self.logger.warning( "No embedding for '%s' ('%s'), skipping...", isomer.inchi, isomer.to_smiles(), ) continue ligand = inputs / f"{isomer.inchi}.pdbqt" # Create pdbqt input _mol2pdbqt(ligand, isomer) file.write(f"{ligand.absolute().as_posix()}\n") file.write(f"{isomer.inchi}\n") command = ( f"{self.runnable['autodock_gpu']} --filelist {batch_file.as_posix()} " f"--heuristics {self.heuristics.value} --nrun {self.nrun.value} " f"--psize {self.population_size.value} --lsit {self.lsit.value} " f"--seed {self.seed.value} --heurmax {self.heurmax.value}" ) # Possible reference ligand if self.ref_ligand.is_set: ref_ligand = Path("ref_ligand.pdbqt") _mol2pdbqt(ref_ligand, self.ref_ligand.value) command += f" --xraylfile {ref_ligand.as_posix()}" # Possible derivtypes if self.derivtypes.value: derivtypes = "/".join(f"{key}={value}" for key, value in self.derivtypes.value.items()) command += f" --derivtype {derivtypes}" validators = [SuccessValidator("All jobs ran without errors")] if self.strict.value else [] with self.gpus(1): self.run_command(command, verbose=True, validators=validators) # Collect outputs for mol in mols: for isomer in mol.molecules: isomer.score_tag = "energy" output = Path(f"{isomer.inchi}.xml") # Failed dockings and missing embeddings get a NaN try: # Isomer scores are in the order of the indices results = _adgpu_score_parser(output, log=self.logger) except (KeyError, FileNotFoundError) as err: if self.strict.value: raise err self.logger.warning("Docking isomer '%s' failed", isomer.inchi) isomer.scores = np.full(self.nrun.value, np.nan) continue res_transpose = _list_of_dicts2dict_of_lists(list(results.values())) # High energies indicate grid problems if any(ener > AD_HIGH_ENERGY for ener in res_transpose["energy"]): self.logger.warning( "Isomer '%s' ('%s') has runs with high energy poses. This indicates " "a possible lack of grid coverage or a poorly-defined search space. " "Adjust `search_range` and `search_center` during grid preparation.", isomer.inchi, mol.smiles, ) # Convenience score attribute isomer.set_tag("score_type", "oracle") isomer.scores = np.array(res_transpose["energy"]) self.logger.info("Parsed isomer '%s', score %s", isomer.inchi, isomer.scores.min()) # We only parse the conformers if the user asks for them, # otherwise it unnecessarily slows things like REINVENT down if not self.scores_only.value: # This allows us to convert all pdbqt outputs # into one SDF, with the scoring order sdf_out = Path(f"{isomer.inchi}-out.sdf") _adv2sdf(output.with_suffix(".dlg"), sdf_out) # Add all conformers and set their coords isomer.update_conformers_from_sdf(sdf_out) # AD gives us lots of useful information for each conformer, e.g. energy # reference RMSD and cluster, we tag each conformer with this information for key, vals in res_transpose.items(): for conf, val in zip(isomer.conformers, vals): conf.set_tag(key, val) if not self.scores_only.value: self.out.send(mols) self.out_scores.send(np.array([mol.best_score for mol in mols]))
class _Vina(Node, register=False): """Base for all Vina variants""" inp: Input[list[IsomerCollection]] = Input() """List of molecules to dock""" out: Output[list[IsomerCollection]] = Output() """Docked molecules with conformations and scores attached""" seed: Parameter[int] = Parameter(default=42) """The default seed""" n_jobs: Parameter[int] = Parameter(default=cpu_count()) """Number of docking runs to perform in parallel""" n_poses: Parameter[int] = Parameter(default=1) """Number of poses to generate""" receptor: FileParameter[Annotated[Path, Suffix("pdbqt")]] = FileParameter() """Path to the receptor structure""" search_center: Parameter[tuple[float, float, float]] = Parameter() """Center of the search space for docking""" search_range: Parameter[tuple[float, float, float]] = Parameter(default=(15.0, 15.0, 15.0)) """Range of the search space for docking""" def prepare(self) -> None: super().prepare() import meeko if meeko.__version__ == "0.5.0": raise ImportError( "Vina nodes are incompatible with meeko 0.5.0 due " "to an upstream parsing issue with PDBQT files" ) def _parse_adv_outputs( self, mols: list[IsomerCollection], mol_outputs: list[list[Path]] ) -> None: """Parses ADV output, including conformers and scores from PDBQT or DLG outputs""" moldict = {iso.inchi: iso for mol in mols for iso in mol.molecules} outdict = {file.stem.strip("_out"): file for folder in mol_outputs for file in folder} for i, (key, file) in enumerate(outdict.items()): isomer = moldict[key] self.logger.info("Parsing isomer %s: '%s'", i, isomer) if not file.exists() or file.stat().st_size == 0: self.logger.warning("Docking failed for '%s' (%s)", isomer.inchi, isomer) continue _adv2sdf(file, file.with_suffix(".sdf")) isomer.update_conformers_from_sdf( file.with_suffix(".sdf"), score_parser=_adv_score_parser_meeko )
[docs] class Vina(_Vina): """ Runs Vina [#eberhardt2021]_ on a molecule input. The step expects to either find a ``vina`` executable in the ``PATH``, an appropriate module defined in ``config.toml``, or a module specified using the :attr:`~maize.core.node.Node.modules` attribute. References ---------- .. [#eberhardt2021] Eberhardt, J., Santos-Martins, D., Tillack, A. F. & Forli, S. AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field, and Python Bindings. J. Chem. Inf. Model. 61, 3891-3898 (2021). .. [#trott2010] Trott, O. & Olson, A. J. AutoDock Vina: Improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading. Journal of Computational Chemistry 31, 455-461 (2010). """ required_callables = ["vina"] """Requires the ``vina`` executable""" required_packages = ["meeko"] """Requires a custom environment with ``meeko==0.4`` installed""" def run(self) -> None: mols = self.inp.receive() x, y, z = self.search_center.value dx, dy, dz = self.search_range.value # Collect all docking commands to be executed, create directories lig_temp = Path("lig-temp.pdbqt") commands: list[str] = [] mol_outputs: list[list[Path]] = [] for i, mol in enumerate(mols): mol_path = Path(f"mol-{i}") mol_path.mkdir() isomer_outputs: list[Path] = [] self.logger.info("Docking molecule %s: '%s'", i, mol) for j, isomer in enumerate(mol.molecules): self.logger.debug(" Docking isomer %s: '%s'", j, isomer) iso_path = mol_path / f"isomer-{j}" iso_path.mkdir() ligand = iso_path / "input.pdbqt" docked = iso_path / f"{isomer.inchi}_out.pdbqt" try: _mol2pdbqt(lig_temp, isomer) _clean_pdbqt_atomtypes(lig_temp, ligand) except ValueError as err: self.logger.warning( "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err ) command = ( f"{self.runnable['vina']} --receptor {self.receptor.filepath.as_posix()} " f"--ligand {ligand.as_posix()} " f"--cpu 1 --seed {self.seed.value} --out {docked.as_posix()} " f"--num_modes {self.n_poses.value} " f"--center_x {x} --center_y {y} --center_z {z} " f"--size_x {dx} --size_y {dy} --size_z {dz} " ) commands.append(command) isomer_outputs.append(docked) mol_outputs.append(isomer_outputs) # Run all commands at once self.run_multi( commands, verbose=True, raise_on_failure=False, n_jobs=self.n_jobs.value, ) # Convert each pose to SDF, update isomer conformation self._parse_adv_outputs(mols, mol_outputs) self.out.send(mols)
def _clean_pdbqt_atomtypes(pdbqt_in: Path, pdbqt_out: Path) -> None: """Replaces ``G0`` and ``CG0`` atomtypes with normal carbons.""" with pdbqt_in.open() as inp, pdbqt_out.open("w") as out: out.write(re.sub("(CG0)|(G0)", "C", inp.read()))
[docs] class VinaGPU(_Vina): """ Runs Vina-GPU [#ding2023]_ on a molecule input. The step expects to either find a ``vina`` executable in the ``PATH``, an appropriate module defined in ``config.toml``, or a module specified using the :attr:`~maize.core.node.Node.modules` attribute. Notes ----- The interface is mostly the same as Vina's, but requires some additional handling of the custom compiled kernels, a small change in the commandline parameters, and allows for docking a directory of ligands at once. The source can be found `here <https://github.com/DeltaGroupNJUPT/Vina-GPU-2.0>`_. Installation requires both the *boost* sources and installed headers, and ``-DOPENCL_3_0`` should *not* be specified (contrary to the official installation instructions). References ---------- .. [#ding2023] Ding, J. et al. Vina-GPU 2.0: Further Accelerating AutoDock Vina and Its Derivatives with Graphics Processing Units. J. Chem. Inf. Model. (2023) doi:10.1021/acs.jcim.2c01504. .. [#trott2010] Trott, O. & Olson, A. J. AutoDock Vina: Improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading. Journal of Computational Chemistry 31, 455-461 (2010). """ required_callables = ["vinagpu"] """Requires the ``vinagpu`` executable""" required_packages = ["meeko"] """Requires a custom environment with ``meeko==0.4`` installed""" def run(self) -> None: mols = self.inp.receive() # VinaGPU requires custom built kernels in the same directory, # so we copy them from the install location kernel_dir = Path(self.runnable["vinagpu"]).parent self.logger.debug("Looking for kernels in '%s'", kernel_dir.as_posix()) for i in (1, 2): kernel = kernel_dir / f"Kernel{i}_Opt.bin" if not kernel.exists(): raise FileNotFoundError( "VinaGPU requires the 'Kernel_Opt.bin' files to " "be present in the Vina-GPU binary folder" ) shutil.copy(kernel, self.work_dir) x, y, z = self.search_center.value dx, dy, dz = self.search_range.value lig_temp = Path("lig-temp.pdbqt") inputs, outputs = Path("inputs"), Path("outputs") inputs.mkdir() outputs.mkdir() mol_docked = [] for i, mol in enumerate(mols): self.logger.info("Docking molecule %s: '%s'", i, mol) docked = [] for j, isomer in enumerate(mol.molecules): self.logger.debug(" Docking isomer %s: '%s'", j, isomer) ligand = inputs / f"{isomer.inchi}.pdbqt" docked.append(outputs / f"{isomer.inchi}_out.pdbqt") try: _mol2pdbqt(lig_temp, isomer) _clean_pdbqt_atomtypes(lig_temp, ligand) except ValueError as err: self.logger.warning( "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err ) mol_docked.append(docked) command = ( f"{self.runnable['vinagpu']} --receptor {self.receptor.filepath.as_posix()} " f"--ligand_directory {inputs.as_posix()} " f"--output_directory {outputs.as_posix()} " f"--thread 8000 --seed {self.seed.value} " f"--num_modes {self.n_poses.value} " f"--center_x {x} --center_y {y} --center_z {z} " f"--size_x {dx} --size_y {dy} --size_z {dz} " ) self.run_command( command, verbose=True, raise_on_failure=False, # validators=[SuccessValidator("...done")], ) # Convert each pose to SDF, update isomer conformation self.logger.debug("Docking outputs: '%s'", list(outputs.iterdir())) self.logger.debug("Parsing: '%s'", mol_docked) self._parse_adv_outputs(mols, mol_docked) self.out.send(mols)
[docs] class QuickVinaGPU(_Vina): """ Runs QuickVina2 or QuickVina-W for GPUs [#ding2023]_ on a molecule input. For an overview, see `this <https://qvina.github.io/>`_. The step expects to either find a ``quickvina`` executable in the ``PATH``, an appropriate module defined in ``config.toml``, or a module specified using the :attr:`~maize.core.node.Node.modules` attribute. Notes ----- The interface is mostly the same as Vina's, but requires some additional handling of the custom compiled kernels, a small change in the commandline parameters, and allows for docking a directory of ligands at once. The source can be found `here <https://github.com/DeltaGroupNJUPT/Vina-GPU-2.0>`_. Installation requires both the *boost* sources and installed headers, and ``-DOPENCL_3_0`` should *not* be specified (contrary to the official installation instructions). References ---------- .. [#hassan2017] Hassan, N. M., Alhossary, A. A., Mu, Y. & Kwoh, C.-K. Protein-Ligand Blind Docking Using QuickVina-W With Inter-Process Spatio-Temporal Integration. Sci Rep 7, 15451 (2017). .. [#alhossary2015] Alhossary, A., Handoko, S. D., Mu, Y. & Kwoh, C.-K. Fast, accurate, and reliable molecular docking with QuickVina 2. Bioinformatics 31, 2214-2216 (2015). """ required_callables = ["quickvina"] """Requires the ``quickvina`` executable""" required_packages = ["meeko"] """Requires a custom environment with ``meeko==0.4`` installed""" def run(self) -> None: mols = self.inp.receive() x, y, z = self.search_center.value dx, dy, dz = self.search_range.value # VinaGPU requires custom built kernels in the same directory, # so we copy them from the install location kernel_dir = Path(self.runnable["quickvina"]).parent self.logger.debug("Looking for kernels in '%s'", kernel_dir.as_posix()) if not (kernel_dir / "Kernel2_Opt.bin").exists(): raise FileNotFoundError( "VinaGPU requires the 'Kernel_Opt.bin' files to " "be present in the Vina-GPU binary folder" ) shutil.copy(kernel_dir / "Kernel2_Opt.bin", self.work_dir / "Kernel2_Opt.bin") shutil.copytree(kernel_dir / "OpenCL", self.work_dir / "OpenCL") # Collect all docking commands to be executed, create directories lig_temp = Path("lig-temp.pdbqt") commands: list[str] = [] mol_outputs: list[list[Path]] = [] for i, mol in enumerate(mols): mol_path = Path(f"mol-{i}") mol_path.mkdir() isomer_outputs: list[Path] = [] self.logger.info("Docking molecule %s: '%s'", i, mol) for j, isomer in enumerate(mol.molecules): self.logger.debug(" Docking isomer %s: '%s'", j, isomer) iso_path = mol_path / f"isomer-{j}" iso_path.mkdir() ligand = iso_path / "input.pdbqt" docked = iso_path / f"{isomer.inchi}_out.pdbqt" try: _mol2pdbqt(lig_temp, isomer) _clean_pdbqt_atomtypes(lig_temp, ligand) except ValueError as err: self.logger.warning( "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err ) command = ( f"{self.runnable['quickvina']} --receptor {self.receptor.filepath.as_posix()} " f"--ligand {ligand.as_posix()} " f"--seed {self.seed.value} --out {docked.as_posix()} " f"--thread 8000 " f"--num_modes {self.n_poses.value} " f"--center_x {x} --center_y {y} --center_z {z} " f"--size_x {dx} --size_y {dy} --size_z {dz} " ) commands.append(command) isomer_outputs.append(docked) mol_outputs.append(isomer_outputs) # Run all commands at once self.run_multi( commands, verbose=True, raise_on_failure=False, validators=[SuccessValidator("Writing output")], n_jobs=self.n_jobs.value, ) # Convert each pose to SDF, update isomer conformation self._parse_adv_outputs(mols, mol_outputs) self.out.send(mols)
[docs] class VinaScore(Node): """ Runs Vina scoring [#eberhardt2021]_ on a molecule input. The step expects to either find a ``vina`` executable in the ``PATH``, an appropriate module defined in ``config.toml``, or a module specified using the :attr:`~maize.core.node.Node.modules` attribute. """ required_callables = ["vina"] """Requires the ``vina`` executable""" required_packages = ["meeko"] """Requires a custom environment with ``meeko==0.4`` installed""" inp: Input[list[IsomerCollection]] = Input() """List of molecules to dock""" out: Output[list[IsomerCollection]] = Output(optional=True) """Molecules with scores attached.""" out_scores: Output[NDArray[np.float32]] = Output() """Docking scores, the best for each docked IsomerCollection""" n_jobs: Parameter[int] = Parameter(default=cpu_count()) """Number of docking runs to perform in parallel""" receptor: FileParameter[Annotated[Path, Suffix("pdbqt")]] = FileParameter() """Path to the receptor structure""" def run(self) -> None: mols = self.inp.receive() # Collect all docking commands to be executed, create directories lig_temp = Path("lig-temp.pdbqt") commands: list[str] = [] ligands: list[str] = [] for i, mol in enumerate(mols): self.logger.info("Scoring molecule %s: '%s'", i, mol) for j, isomer in enumerate(mol.molecules): self.logger.debug(" Scoring isomer %s: '%s'", j, isomer) ligand = Path(f"{isomer.inchi}_input.pdbqt") try: _mol2pdbqt(lig_temp, isomer) _clean_pdbqt_atomtypes(lig_temp, ligand) except ValueError as err: self.logger.warning( "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err ) command = ( f"{self.runnable['vina']} --receptor {self.receptor.filepath.as_posix()} " f"--ligand {ligand.as_posix()} --autobox --score_only" ) commands.append(command) ligands.append(isomer.inchi) # Run all commands at once results = self.run_multi( commands=commands, verbose=True, raise_on_failure=False, validators=[SuccessValidator("Estimated Free Energy")], n_jobs=self.n_jobs.value, ) idx = 0 for mol in mols: for isomer in mol.molecules: isomer.score_tag = "energy" isomer.set_tag("score_type", "oracle") score = np.nan if isomer.inchi in ligands: if match := re.search(SCORE_ONLY_RESULT_REGEX, results[idx].stdout.decode()): score = float(match.group(1)) idx += 1 isomer.set_tag("energy", score) self.logger.info("Parsed isomer '%s', score %s", isomer.inchi, isomer.scores.min()) self.out_scores.send(np.array([mol.best_score for mol in mols])) self.out.send(mols)
# 1UYD previously published with Icolos (IcolosData/molecules/1UYD) @pytest.fixture def protein_path(shared_datadir: Any) -> Any: return shared_datadir / "1UYD_apo.pdb" @pytest.fixture def receptor_path(shared_datadir: Any) -> Any: return shared_datadir / "1UYD_fixed.pdbqt" @pytest.fixture def ligand_path(shared_datadir: Any) -> Any: return shared_datadir / "1UYD_ligand.sdf" # From AD GPU @pytest.fixture def grid_path(shared_datadir: Any) -> Any: return shared_datadir / "1stp" / "1stp_protein.maps.fld" class TestSuiteAutodock: def test_PreparePDBQT(self, temp_working_dir: Any, protein_path: Any, test_config: Any) -> None: rig = TestRig(PreparePDBQT, config=test_config) params: list[dict[str, Any]] = [ {"repairs": "None"}, { "repairs": "bonds_hydrogens", "cleanup_protein": ["lps", "waters"], "remove_nonstd": True, }, {"repairs": "checkhydrogens", "cleanup_protein": ["nphs", "nonstdres"]}, ] for param in params: res = rig.setup_run(inputs={"inp": [protein_path]}, parameters=param) file = res["out"].get() assert file is not None assert file.exists() def test_AutoDockGPU( self, temp_working_dir: Any, grid_path: Any, ligand_path: Any, test_config: Any ) -> None: rig = TestRig(AutoDockGPU, config=test_config) mol = IsomerCollection.from_sdf(ligand_path) mol.embed() # SMILES from 1UYD data (Icolos) mol_fail = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC") mol_fail.embed() res = rig.setup_run( parameters={"grid_file": grid_path, "derivtypes": {"NA": "N", "SA": "S"}}, inputs={"inp": [[mol, mol_fail]]}, ) docked = res["out"].get() assert docked is not None assert len(docked) == 2 assert docked[0].scored assert not docked[1].scored assert docked[0].molecules[0].n_conformers == 20 assert -5.0 < docked[0].best_score < -2.0 def test_Vina(self, temp_working_dir: Any, receptor_path: Any, test_config: Any) -> None: """Test Autodock in isolation""" rig = TestRig(Vina, config=test_config) params = { "search_center": (3.3, 11.5, 24.8), "receptor": receptor_path, "n_poses": 4, } mol = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC") mol.embed() res = rig.setup_run(parameters=params, inputs={"inp": [[mol]]}) docked = res["out"].get() assert docked is not None assert len(docked) == 1 assert docked[0].scored assert docked[0].molecules[0].n_conformers == 4 assert -11.0 < docked[0].best_score < -7.0 def test_QuickVinaGPU( self, temp_working_dir: Any, receptor_path: Any, test_config: Any ) -> None: """Test Autodock in isolation""" rig = TestRig(QuickVinaGPU, config=test_config) params = { "search_center": (3.3, 11.5, 24.8), "receptor": receptor_path, "n_poses": 4, } mol = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC") mol.embed() res = rig.setup_run(parameters=params, inputs={"inp": [[mol]]}) docked = res["out"].get() assert docked is not None assert len(docked) == 1 assert docked[0].scored assert docked[0].molecules[0].n_conformers == 4 assert -11.0 < docked[0].best_score < -7.0 def test_VinaGPU(self, temp_working_dir: Any, receptor_path: Any, test_config: Any) -> None: """Test Autodock in isolation""" rig = TestRig(VinaGPU, config=test_config) params = { "search_center": (3.3, 11.5, 24.8), "receptor": receptor_path, "n_poses": 4, } mol = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC") mol.embed() res = rig.setup_run(parameters=params, inputs={"inp": [[mol]]}) docked = res["out"].get() assert docked is not None assert len(docked) == 1 assert docked[0].scored assert docked[0].molecules[0].n_conformers == 4 assert -11.0 < docked[0].best_score < -7.0 def test_VinaScore(self, temp_working_dir: Any, receptor_path: Any, test_config: Any) -> None: """Test Vina in isolation""" rig = TestRig(VinaScore, config=test_config) params = {"receptor": receptor_path} mol1 = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC") mol1.embed() mol2 = IsomerCollection.from_smiles("Nc1nc(Cl)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC") mol2.embed() res = rig.setup_run(parameters=params, inputs={"inp": [[mol1, mol2]]}) docked = res["out"].get() assert docked is not None assert len(docked) == 2 assert all(dock.scored for dock in docked) scores = res["out_scores"].get() assert scores is not None assert len(scores) == 2 assert (scores < 0).all()