Source code for maize.steps.mai.docking.adv

"""Autodock Vina implementation"""

# pylint: disable=import-outside-toplevel, import-error

import json
import logging
from pathlib import Path
import re
import shutil
import tarfile
from typing import Annotated, Any, Literal, Optional
import xml.etree.ElementTree as ET

import numpy as np
from numpy.typing import NDArray
import pytest

from maize.core.node import Node
from maize.core.interface import Parameter, Flag, FileParameter, Suffix, Input, Output
from maize.utilities.chem import Isomer, IsomerCollection
from maize.utilities.testing import TestRig
from maize.utilities.validation import SuccessValidator, FileValidator
from maize.utilities.resources import cpu_count

AD_HIGH_ENERGY = 1000
SCORE_ONLY_RESULT_REGEX = re.compile(r"\s*Estimated Free Energy of Binding\s*\:\s+(-?\d+\.\d+)\s*")


log = logging.getLogger("run")


def _adv_score_parser_meeko(props: dict[str, str]) -> float:
    """Parse scores from Vina output."""
    log.debug("Parsing SDF properties '%s'", props)
    value = float(json.loads(props.get("meeko", ""))["free_energy"])
    log.debug("Parsed value '%s' from properties", value)
    return value


def _adgpu_score_parser(
    file: Annotated[Path, Suffix("xml")], log: Optional["logging.Logger"] = None
) -> dict[int, dict[str, float | int]]:
    """Parse scores from an AutoDockGPU XML output file"""
    if not file.exists():
        raise FileNotFoundError(f"XML file at '{file.as_posix()}' does not exist")

    tree = ET.parse(file)
    if (res_section := tree.find("result")) is None or (
        rmsd_section := res_section.find("rmsd_table")
    ) is None:
        raise KeyError(f"XML file at '{file.as_posix()}' is malformed or empty")

    results = {}
    for res in rmsd_section:
        if log is not None:
            log.debug(
                "Parsing run '%s' with energy '%s'",
                res.attrib["run"],
                res.attrib["binding_energy"],
            )
        results[int(res.attrib["run"])] = {
            "energy": float(res.attrib["binding_energy"]),
            "cluster_rmsd": float(res.attrib["cluster_rmsd"]),
            "rmsd": float(res.attrib["reference_rmsd"]),
            "cluster": int(res.attrib["rank"]),
        }

    return results


def _list_of_dicts2dict_of_lists(data: list[dict[str, Any]]) -> dict[str, list[Any]]:
    """Convert from a list of dictionaries to a dictionary of lists"""
    return {k: [dic[k] for dic in data] for k in data[0]}


# Helper functions using meeko to convert to PDBQT...
def _mol2pdbqt(file: Path, isomer: "Isomer") -> None:
    """Converts an isomer to a PDBQT file using meeko"""
    from meeko import MoleculePreparation

    preparator = MoleculePreparation()
    preparator.prepare(isomer._molecule)
    preparator.write_pdbqt_file(file)


# ...and from DLG (a kind of PDBQT file) to SDF
def _adv2sdf(inp_file: Path, sdf: Path) -> None:
    """Converts an AD DLG file to SDF using meeko"""
    from meeko import PDBQTMolecule, RDKitMolCreate

    with inp_file.open() as inp:
        mol = PDBQTMolecule(inp.read(), is_dlg=inp_file.suffix == ".dlg", skip_typing=True)
    with sdf.open("w") as sdfout:
        out, failures = RDKitMolCreate.write_sd_string(mol)
        sdfout.write(out)
    if len(failures) > 0:
        raise IOError(f"Meeko failed to write file '{sdf.as_posix()}'")



[docs]
class PreparePDBQT(Node):
    """Prepares a receptor for docking with Vina."""

    required_callables = ["prepare_receptor"]
    """
    Requires various scripts and tools:

    prepare_receptor
        Included in ``AutoDockTools``.

    """
    _RepairType = Literal["bonds_hydrogens", "bonds", "hydrogens", "checkhydrogens", "None"]
    _CleanupType = Literal["nphs", "lps", "waters", "nonstdres", "deleteAltB"]

    inp: Input[Annotated[Path, Suffix(".pdb")]] = Input()
    """Receptor structure without ligand"""

    out: Output[Annotated[Path, Suffix(".pdbqt")]] = Output()
    """Tar archive of all grid files"""

    repairs: Parameter[_RepairType] = Parameter(default="None")
    """Types of repairs to be done to the PDB file"""

    preserve_charges: Flag = Flag(default=False)
    """Whether to preserve existing charges instead of adding Gasteiger charges"""

    cleanup_protein: Parameter[list[_CleanupType]] = Parameter(
        default_factory=lambda: ["nphs", "lps", "waters", "nonstdres"]
    )
    """Cleanup options"""

    remove_nonstd: Flag = Flag(default=False)
    """Remove non-standard residues"""

    def run(self) -> None:
        structure = self.inp.receive()

        receptor_pdbqt = Path("rec.pdbqt")
        command = (
            f"{self.runnable['prepare_receptor']} "
            f"-A '{self.repairs.value}' "
            f"-U '{'_'.join(self.cleanup_protein.value)}' "
            f"-r {structure.as_posix()} "
            f"-o {receptor_pdbqt.as_posix()} "
        )
        if self.preserve_charges.value:
            command += "-C "
        if self.remove_nonstd.value:
            command += "-e "

        self.run_command(command, validators=[FileValidator(receptor_pdbqt)], verbose=True)
        self.out.send(receptor_pdbqt)



# TODO Allow anchored / constrained docking, see:
# https://github.com/ccsb-scripps/AutoDock-GPU/wiki/Anchored-docking

[docs]
class PrepareGrid(Node):
    """Prepares a receptor for docking with AutoDock4."""

    required_callables = ["prepare_receptor", "write_gpf", "autogrid"]
    """
    Requires various scripts and tools:

    write_gpf
        Script to create GPF output with all possible atomtypes,
        `from here <https://github.com/diogomart/write-autogrid-config>`_.

    prepare_receptor
        Included in ``AutoDockTools``.

    autogrid
        Included in the normal CPU-only version of AutoDock

    """

    required_packages = ["meeko"]
    """Requires a custom environment with ``meeko`` installed"""

    inp_structure: Input[Annotated[Path, Suffix(".pdb", ".pdbqt")]] = Input()
    """Receptor structure without ligand"""

    inp_ligand: Input[Isomer] = Input(optional=True)
    """Reference ligand structure, if not provided requires `search_center` to be set"""

    out: Output[Annotated[Path, Suffix("tar")]] = Output()
    """Tar archive of all grid files"""

    search_center: Parameter[tuple[float, float, float]] = Parameter(
        default=(np.nan, np.nan, np.nan)
    )
    """Center of the search space for docking, required if `inp_ligand` is not given"""

    search_range: Parameter[tuple[float, float, float]] = Parameter(default=(15.0, 15.0, 15.0))
    """Range of the search space for docking"""

    def run(self) -> None:
        structure = self.inp_structure.receive()

        # Create receptor PDBQT (if needed)
        receptor_pdbqt = Path("rec.pdbqt")
        if structure.suffix == ".pdbqt":
            shutil.move(structure, receptor_pdbqt)
        else:
            self.run_command(
                f"{self.runnable['prepare_receptor']} "
                f"-r {structure.as_posix()} "
                f"-o {receptor_pdbqt.as_posix()}",
                validators=[FileValidator(receptor_pdbqt)],
            )

        # Create temporary ligand PDBQT
        if self.inp_ligand.ready():
            lig = self.inp_ligand.receive()
            lig_pdbqt = Path("lig.pdbqt")
            _mol2pdbqt(lig_pdbqt, lig)
            command = (
                f"{self.runnable['write_gpf']} "
                f"-l {lig_pdbqt.as_posix()} "
                f"{receptor_pdbqt.as_posix()}"
            )
        else:
            assert all(np.isfinite(c) for c in self.search_center.value)
            box_config = Path("box.txt")
            with box_config.open("w") as conf:
                for axis, coord, size in zip(
                    ("x", "y", "z"), self.search_center.value, self.search_range.value
                ):
                    conf.write(f"center_{axis} = {coord:5.3f}\n")
                    conf.write(f"size_{axis} = {size:5.3f}\n")
            command = (
                f"{self.runnable['write_gpf']} "
                f"-b {box_config.as_posix()} "
                f"{receptor_pdbqt.as_posix()}"
            )

        # Create GPF, includes search geometry and index of needed maps
        gpf = receptor_pdbqt.with_suffix(".gpf")
        self.run_command(command, validators=[FileValidator(gpf)])

        # Create maps
        glg = gpf.with_suffix(".glg")
        fld = glg.with_suffix(".maps.fld")
        self.run_command(
            f"{self.runnable['autogrid']} -p {gpf.as_posix()} -l {glg.as_posix()}",
            validators=[FileValidator(glg), FileValidator(fld)],
        )

        # Wrap it all up
        tar = Path("grid.tar")
        with tarfile.open(tar, "w") as archive:
            for file in Path().glob("*.map"):
                archive.add(file)
            for file in (receptor_pdbqt, gpf, glg, fld):
                archive.add(file)
        self.out.send(tar)




[docs]
class AutoDockGPU(Node):
    """
    Runs AutoDock on the GPU [#santos2021]_.

    Notes
    -----
    Clone the repo from `here <https://github.com/ccsb-scripps/AutoDock-GPU>`_,
    load modules for the compiler and CUDA, set ``GPU_INCLUDE_PATH`` and
    ``GPU_LIBRARY_PATH``, and run ``make DEVICE=CUDA``. This also requires
    `meeko <https://github.com/forlilab/Meeko>`_ to convert to and from pdbqt
    files, specify `mk_prepare` and `mk_export`.

    If you get very high docking scores this often means that the ligand is outside
    of the grid. This can be due to a map that is too small (increase ``search_range``)
    or a misplaced box that is hard to access (modify ``search_center``).

    References
    ----------
    .. [#santos2021] Santos-Martins, D. et al. Accelerating AutoDock4 with GPUs
       and Gradient-Based Local Search. J. Chem. Theory Comput. 17, 1060-1073 (2021).

    """

    required_callables = ["autodock_gpu"]
    """Requires the ``autodock_gpu`` executable"""

    required_packages = ["meeko"]
    """Requires a custom environment with ``meeko==0.4`` installed"""

    inp: Input[list[IsomerCollection]] = Input()
    """
    List of molecules to dock, each molecule can have multiple isomers,
    these will be docked separately.

    """

    out: Output[list[IsomerCollection]] = Output(optional=True)
    """
    Docked molecules with conformations and scores attached. Also
    include per-conformer clustering information performed by
    AutoDock, use the keys 'rmsd', 'cluster_rmsd', 'cluster' to access.

    """

    out_scores: Output[NDArray[np.float32]] = Output()
    """Docking scores, the best for each docked IsomerCollection"""

    ref_ligand: Parameter[Isomer] = Parameter(optional=True)
    """Optional reference ligand for RMSD analysis"""

    grid_file: FileParameter[Path] = FileParameter()
    """The protein grid file, all internally referenced files must be available"""

    seed: Parameter[int] = Parameter(default=42)
    """The default seed"""

    heuristics: Parameter[int] = Parameter(default=1)
    """Number of evaluations for ligand-based automatic search"""

    heurmax: Parameter[int] = Parameter(default=12000000)
    """Heuristics evaluation limit"""

    nrun: Parameter[int] = Parameter(default=20)
    """LGA runs"""

    population_size: Parameter[int] = Parameter(default=150)
    """LGA population size"""

    lsit: Parameter[int] = Parameter(default=300)
    """Local search iterations"""

    derivtypes: Parameter[dict[str, str]] = Parameter(default_factory=dict)
    """Atomtype mappings to add to ``derivtype``, e.g. NA->N"""

    strict: Flag = Flag(default=False)
    """When set, raises an exception if docking a molecule failed, otherwise logs a warning"""

    scores_only: Flag = Flag(default=False)
    """If ``True``, will only return the scores and no conformers"""

    def run(self) -> None:
        mols = self.inp.receive()

        # Molecule inputs
        inputs = Path("inputs")
        inputs.mkdir()

        # Convert all ligands to pdbqt and collect
        # their paths and names in a batch file
        batch_file = Path("batch.txt")
        with batch_file.open("w") as file:
            file.write(f"{self.grid_file.filepath.as_posix()}\n")
            for mol in mols:
                for isomer in mol.molecules:
                    # Tools like REINVENT rely on getting the same number of scores out
                    # as molecules, so we can't filter out failed embeddings earlier...
                    if isomer.n_conformers == 0:
                        self.logger.warning(
                            "No embedding for '%s' ('%s'), skipping...",
                            isomer.inchi,
                            isomer.to_smiles(),
                        )
                        continue

                    ligand = inputs / f"{isomer.inchi}.pdbqt"

                    # Create pdbqt input
                    _mol2pdbqt(ligand, isomer)

                    file.write(f"{ligand.absolute().as_posix()}\n")
                    file.write(f"{isomer.inchi}\n")

        command = (
            f"{self.runnable['autodock_gpu']} --filelist {batch_file.as_posix()} "
            f"--heuristics {self.heuristics.value} --nrun {self.nrun.value} "
            f"--psize {self.population_size.value} --lsit {self.lsit.value} "
            f"--seed {self.seed.value} --heurmax {self.heurmax.value}"
        )

        # Possible reference ligand
        if self.ref_ligand.is_set:
            ref_ligand = Path("ref_ligand.pdbqt")
            _mol2pdbqt(ref_ligand, self.ref_ligand.value)
            command += f" --xraylfile {ref_ligand.as_posix()}"

        # Possible derivtypes
        if self.derivtypes.value:
            derivtypes = "/".join(f"{key}={value}" for key, value in self.derivtypes.value.items())
            command += f" --derivtype {derivtypes}"

        validators = [SuccessValidator("All jobs ran without errors")] if self.strict.value else []
        with self.gpus(1):
            self.run_command(command, verbose=True, validators=validators)

        # Collect outputs
        for mol in mols:
            for isomer in mol.molecules:
                isomer.score_tag = "energy"
                output = Path(f"{isomer.inchi}.xml")

                # Failed dockings and missing embeddings get a NaN
                try:
                    # Isomer scores are in the order of the indices
                    results = _adgpu_score_parser(output, log=self.logger)
                except (KeyError, FileNotFoundError) as err:
                    if self.strict.value:
                        raise err
                    self.logger.warning("Docking isomer '%s' failed", isomer.inchi)
                    isomer.scores = np.full(self.nrun.value, np.nan)
                    continue

                res_transpose = _list_of_dicts2dict_of_lists(list(results.values()))

                # High energies indicate grid problems
                if any(ener > AD_HIGH_ENERGY for ener in res_transpose["energy"]):
                    self.logger.warning(
                        "Isomer '%s' ('%s') has runs with high energy poses. This indicates "
                        "a possible lack of grid coverage or a poorly-defined search space. "
                        "Adjust `search_range` and `search_center` during grid preparation.",
                        isomer.inchi,
                        mol.smiles,
                    )

                # Convenience score attribute
                isomer.set_tag("score_type", "oracle")
                isomer.scores = np.array(res_transpose["energy"])
                self.logger.info("Parsed isomer '%s', score %s", isomer.inchi, isomer.scores.min())

                # We only parse the conformers if the user asks for them,
                # otherwise it unnecessarily slows things like REINVENT down
                if not self.scores_only.value:
                    # This allows us to convert all pdbqt outputs
                    # into one SDF, with the scoring order
                    sdf_out = Path(f"{isomer.inchi}-out.sdf")
                    _adv2sdf(output.with_suffix(".dlg"), sdf_out)

                    # Add all conformers and set their coords
                    isomer.update_conformers_from_sdf(sdf_out)

                    # AD gives us lots of useful information for each conformer, e.g. energy
                    # reference RMSD and cluster, we tag each conformer with this information
                    for key, vals in res_transpose.items():
                        for conf, val in zip(isomer.conformers, vals):
                            conf.set_tag(key, val)

        if not self.scores_only.value:
            self.out.send(mols)
        self.out_scores.send(np.array([mol.best_score for mol in mols]))



class _Vina(Node, register=False):
    """Base for all Vina variants"""

    inp: Input[list[IsomerCollection]] = Input()
    """List of molecules to dock"""

    out: Output[list[IsomerCollection]] = Output()
    """Docked molecules with conformations and scores attached"""

    seed: Parameter[int] = Parameter(default=42)
    """The default seed"""

    n_jobs: Parameter[int] = Parameter(default=cpu_count())
    """Number of docking runs to perform in parallel"""

    n_poses: Parameter[int] = Parameter(default=1)
    """Number of poses to generate"""

    receptor: FileParameter[Annotated[Path, Suffix("pdbqt")]] = FileParameter()
    """Path to the receptor structure"""

    search_center: Parameter[tuple[float, float, float]] = Parameter()
    """Center of the search space for docking"""

    search_range: Parameter[tuple[float, float, float]] = Parameter(default=(15.0, 15.0, 15.0))
    """Range of the search space for docking"""

    def prepare(self) -> None:
        super().prepare()
        import meeko

        if meeko.__version__ == "0.5.0":
            raise ImportError(
                "Vina nodes are incompatible with meeko 0.5.0 due "
                "to an upstream parsing issue with PDBQT files"
            )

    def _parse_adv_outputs(
        self, mols: list[IsomerCollection], mol_outputs: list[list[Path]]
    ) -> None:
        """Parses ADV output, including conformers and scores from PDBQT or DLG outputs"""
        moldict = {iso.inchi: iso for mol in mols for iso in mol.molecules}
        outdict = {file.stem.strip("_out"): file for folder in mol_outputs for file in folder}
        for i, (key, file) in enumerate(outdict.items()):
            isomer = moldict[key]
            self.logger.info("Parsing isomer %s: '%s'", i, isomer)
            if not file.exists() or file.stat().st_size == 0:
                self.logger.warning("Docking failed for '%s' (%s)", isomer.inchi, isomer)
                continue
            _adv2sdf(file, file.with_suffix(".sdf"))
            isomer.update_conformers_from_sdf(
                file.with_suffix(".sdf"), score_parser=_adv_score_parser_meeko
            )



[docs]
class Vina(_Vina):
    """
    Runs Vina [#eberhardt2021]_ on a molecule input.

    The step expects to either find a ``vina`` executable in the ``PATH``,
    an appropriate module defined in ``config.toml``, or a module specified
    using the :attr:`~maize.core.node.Node.modules` attribute.

    References
    ----------
    .. [#eberhardt2021] Eberhardt, J., Santos-Martins, D., Tillack, A. F. & Forli, S.
       AutoDock Vina 1.2.0: New Docking Methods, Expanded Force Field,
       and Python Bindings. J. Chem. Inf. Model. 61, 3891-3898 (2021).

    .. [#trott2010] Trott, O. & Olson, A. J. AutoDock Vina: Improving the speed and accuracy
       of docking with a new scoring function, efficient optimization, and
       multithreading. Journal of Computational Chemistry 31, 455-461 (2010).

    """

    required_callables = ["vina"]
    """Requires the ``vina`` executable"""

    required_packages = ["meeko"]
    """Requires a custom environment with ``meeko==0.4`` installed"""

    def run(self) -> None:
        mols = self.inp.receive()

        x, y, z = self.search_center.value
        dx, dy, dz = self.search_range.value

        # Collect all docking commands to be executed, create directories
        lig_temp = Path("lig-temp.pdbqt")
        commands: list[str] = []
        mol_outputs: list[list[Path]] = []
        for i, mol in enumerate(mols):
            mol_path = Path(f"mol-{i}")
            mol_path.mkdir()
            isomer_outputs: list[Path] = []
            self.logger.info("Docking molecule %s: '%s'", i, mol)
            for j, isomer in enumerate(mol.molecules):
                self.logger.debug("  Docking isomer %s: '%s'", j, isomer)
                iso_path = mol_path / f"isomer-{j}"
                iso_path.mkdir()
                ligand = iso_path / "input.pdbqt"
                docked = iso_path / f"{isomer.inchi}_out.pdbqt"
                try:
                    _mol2pdbqt(lig_temp, isomer)
                    _clean_pdbqt_atomtypes(lig_temp, ligand)
                except ValueError as err:
                    self.logger.warning(
                        "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err
                    )

                command = (
                    f"{self.runnable['vina']} --receptor {self.receptor.filepath.as_posix()} "
                    f"--ligand {ligand.as_posix()} "
                    f"--cpu 1 --seed {self.seed.value} --out {docked.as_posix()} "
                    f"--num_modes {self.n_poses.value} "
                    f"--center_x {x} --center_y {y} --center_z {z} "
                    f"--size_x {dx} --size_y {dy} --size_z {dz} "
                )
                commands.append(command)
                isomer_outputs.append(docked)
            mol_outputs.append(isomer_outputs)

        # Run all commands at once
        self.run_multi(
            commands,
            verbose=True,
            raise_on_failure=False,
            n_jobs=self.n_jobs.value,
        )

        # Convert each pose to SDF, update isomer conformation
        self._parse_adv_outputs(mols, mol_outputs)
        self.out.send(mols)



def _clean_pdbqt_atomtypes(pdbqt_in: Path, pdbqt_out: Path) -> None:
    """Replaces ``G0`` and ``CG0`` atomtypes with normal carbons."""
    with pdbqt_in.open() as inp, pdbqt_out.open("w") as out:
        out.write(re.sub("(CG0)|(G0)", "C", inp.read()))



[docs]
class VinaGPU(_Vina):
    """
    Runs Vina-GPU [#ding2023]_ on a molecule input.

    The step expects to either find a ``vina`` executable in the ``PATH``,
    an appropriate module defined in ``config.toml``, or a module specified
    using the :attr:`~maize.core.node.Node.modules` attribute.

    Notes
    -----
    The interface is mostly the same as Vina's, but requires some additional handling
    of the custom compiled kernels, a small change in the commandline parameters, and
    allows for docking a directory of ligands at once. The source can be found
    `here <https://github.com/DeltaGroupNJUPT/Vina-GPU-2.0>`_. Installation requires
    both the *boost* sources and installed headers, and ``-DOPENCL_3_0`` should *not*
    be specified (contrary to the official installation instructions).

    References
    ----------
    .. [#ding2023] Ding, J. et al. Vina-GPU 2.0: Further Accelerating AutoDock Vina
       and Its Derivatives with Graphics Processing Units. J. Chem. Inf. Model. (2023)
       doi:10.1021/acs.jcim.2c01504.

    .. [#trott2010] Trott, O. & Olson, A. J. AutoDock Vina: Improving the speed and accuracy
       of docking with a new scoring function, efficient optimization, and
       multithreading. Journal of Computational Chemistry 31, 455-461 (2010).

    """

    required_callables = ["vinagpu"]
    """Requires the ``vinagpu`` executable"""

    required_packages = ["meeko"]
    """Requires a custom environment with ``meeko==0.4`` installed"""

    def run(self) -> None:
        mols = self.inp.receive()

        # VinaGPU requires custom built kernels in the same directory,
        # so we copy them from the install location
        kernel_dir = Path(self.runnable["vinagpu"]).parent
        self.logger.debug("Looking for kernels in '%s'", kernel_dir.as_posix())
        for i in (1, 2):
            kernel = kernel_dir / f"Kernel{i}_Opt.bin"
            if not kernel.exists():
                raise FileNotFoundError(
                    "VinaGPU requires the 'Kernel_Opt.bin' files to "
                    "be present in the Vina-GPU binary folder"
                )
            shutil.copy(kernel, self.work_dir)

        x, y, z = self.search_center.value
        dx, dy, dz = self.search_range.value

        lig_temp = Path("lig-temp.pdbqt")
        inputs, outputs = Path("inputs"), Path("outputs")
        inputs.mkdir()
        outputs.mkdir()
        mol_docked = []
        for i, mol in enumerate(mols):
            self.logger.info("Docking molecule %s: '%s'", i, mol)
            docked = []
            for j, isomer in enumerate(mol.molecules):
                self.logger.debug("  Docking isomer %s: '%s'", j, isomer)
                ligand = inputs / f"{isomer.inchi}.pdbqt"
                docked.append(outputs / f"{isomer.inchi}_out.pdbqt")
                try:
                    _mol2pdbqt(lig_temp, isomer)
                    _clean_pdbqt_atomtypes(lig_temp, ligand)
                except ValueError as err:
                    self.logger.warning(
                        "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err
                    )
            mol_docked.append(docked)

        command = (
            f"{self.runnable['vinagpu']} --receptor {self.receptor.filepath.as_posix()} "
            f"--ligand_directory {inputs.as_posix()} "
            f"--output_directory {outputs.as_posix()} "
            f"--thread 8000 --seed {self.seed.value} "
            f"--num_modes {self.n_poses.value} "
            f"--center_x {x} --center_y {y} --center_z {z} "
            f"--size_x {dx} --size_y {dy} --size_z {dz} "
        )
        self.run_command(
            command,
            verbose=True,
            raise_on_failure=False,
            # validators=[SuccessValidator("...done")],
        )
        # Convert each pose to SDF, update isomer conformation
        self.logger.debug("Docking outputs: '%s'", list(outputs.iterdir()))
        self.logger.debug("Parsing: '%s'", mol_docked)
        self._parse_adv_outputs(mols, mol_docked)
        self.out.send(mols)




[docs]
class QuickVinaGPU(_Vina):
    """
    Runs QuickVina2 or QuickVina-W for GPUs [#ding2023]_ on a molecule input.
    For an overview, see `this <https://qvina.github.io/>`_.

    The step expects to either find a ``quickvina`` executable in the ``PATH``,
    an appropriate module defined in ``config.toml``, or a module specified
    using the :attr:`~maize.core.node.Node.modules` attribute.

    Notes
    -----
    The interface is mostly the same as Vina's, but requires some additional handling
    of the custom compiled kernels, a small change in the commandline parameters, and
    allows for docking a directory of ligands at once. The source can be found
    `here <https://github.com/DeltaGroupNJUPT/Vina-GPU-2.0>`_. Installation requires
    both the *boost* sources and installed headers, and ``-DOPENCL_3_0`` should *not*
    be specified (contrary to the official installation instructions).

    References
    ----------
    .. [#hassan2017] Hassan, N. M., Alhossary, A. A., Mu, Y. & Kwoh, C.-K.
       Protein-Ligand Blind Docking Using QuickVina-W With Inter-Process
       Spatio-Temporal Integration. Sci Rep 7, 15451 (2017).

    .. [#alhossary2015] Alhossary, A., Handoko, S. D., Mu, Y. & Kwoh, C.-K.
       Fast, accurate, and reliable molecular docking with QuickVina 2.
       Bioinformatics 31, 2214-2216 (2015).

    """

    required_callables = ["quickvina"]
    """Requires the ``quickvina`` executable"""

    required_packages = ["meeko"]
    """Requires a custom environment with ``meeko==0.4`` installed"""

    def run(self) -> None:
        mols = self.inp.receive()

        x, y, z = self.search_center.value
        dx, dy, dz = self.search_range.value

        # VinaGPU requires custom built kernels in the same directory,
        # so we copy them from the install location
        kernel_dir = Path(self.runnable["quickvina"]).parent
        self.logger.debug("Looking for kernels in '%s'", kernel_dir.as_posix())
        if not (kernel_dir / "Kernel2_Opt.bin").exists():
            raise FileNotFoundError(
                "VinaGPU requires the 'Kernel_Opt.bin' files to "
                "be present in the Vina-GPU binary folder"
            )
        shutil.copy(kernel_dir / "Kernel2_Opt.bin", self.work_dir / "Kernel2_Opt.bin")
        shutil.copytree(kernel_dir / "OpenCL", self.work_dir / "OpenCL")

        # Collect all docking commands to be executed, create directories
        lig_temp = Path("lig-temp.pdbqt")
        commands: list[str] = []
        mol_outputs: list[list[Path]] = []
        for i, mol in enumerate(mols):
            mol_path = Path(f"mol-{i}")
            mol_path.mkdir()
            isomer_outputs: list[Path] = []
            self.logger.info("Docking molecule %s: '%s'", i, mol)
            for j, isomer in enumerate(mol.molecules):
                self.logger.debug("  Docking isomer %s: '%s'", j, isomer)
                iso_path = mol_path / f"isomer-{j}"
                iso_path.mkdir()
                ligand = iso_path / "input.pdbqt"
                docked = iso_path / f"{isomer.inchi}_out.pdbqt"
                try:
                    _mol2pdbqt(lig_temp, isomer)
                    _clean_pdbqt_atomtypes(lig_temp, ligand)
                except ValueError as err:
                    self.logger.warning(
                        "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err
                    )

                command = (
                    f"{self.runnable['quickvina']} --receptor {self.receptor.filepath.as_posix()} "
                    f"--ligand {ligand.as_posix()} "
                    f"--seed {self.seed.value} --out {docked.as_posix()} "
                    f"--thread 8000 "
                    f"--num_modes {self.n_poses.value} "
                    f"--center_x {x} --center_y {y} --center_z {z} "
                    f"--size_x {dx} --size_y {dy} --size_z {dz} "
                )
                commands.append(command)
                isomer_outputs.append(docked)
            mol_outputs.append(isomer_outputs)

        # Run all commands at once
        self.run_multi(
            commands,
            verbose=True,
            raise_on_failure=False,
            validators=[SuccessValidator("Writing output")],
            n_jobs=self.n_jobs.value,
        )

        # Convert each pose to SDF, update isomer conformation
        self._parse_adv_outputs(mols, mol_outputs)
        self.out.send(mols)




[docs]
class VinaScore(Node):
    """
    Runs Vina scoring [#eberhardt2021]_ on a molecule input.

    The step expects to either find a ``vina`` executable in the ``PATH``,
    an appropriate module defined in ``config.toml``, or a module specified
    using the :attr:`~maize.core.node.Node.modules` attribute.

    """

    required_callables = ["vina"]
    """Requires the ``vina`` executable"""

    required_packages = ["meeko"]
    """Requires a custom environment with ``meeko==0.4`` installed"""

    inp: Input[list[IsomerCollection]] = Input()
    """List of molecules to dock"""

    out: Output[list[IsomerCollection]] = Output(optional=True)
    """Molecules with scores attached."""

    out_scores: Output[NDArray[np.float32]] = Output()
    """Docking scores, the best for each docked IsomerCollection"""

    n_jobs: Parameter[int] = Parameter(default=cpu_count())
    """Number of docking runs to perform in parallel"""

    receptor: FileParameter[Annotated[Path, Suffix("pdbqt")]] = FileParameter()
    """Path to the receptor structure"""

    def run(self) -> None:
        mols = self.inp.receive()

        # Collect all docking commands to be executed, create directories
        lig_temp = Path("lig-temp.pdbqt")
        commands: list[str] = []
        ligands: list[str] = []
        for i, mol in enumerate(mols):
            self.logger.info("Scoring molecule %s: '%s'", i, mol)
            for j, isomer in enumerate(mol.molecules):
                self.logger.debug("  Scoring isomer %s: '%s'", j, isomer)
                ligand = Path(f"{isomer.inchi}_input.pdbqt")
                try:
                    _mol2pdbqt(lig_temp, isomer)
                    _clean_pdbqt_atomtypes(lig_temp, ligand)
                except ValueError as err:
                    self.logger.warning(
                        "Skipping '%s' due to PDBQT conversion error:\n %s", isomer, err
                    )

                command = (
                    f"{self.runnable['vina']} --receptor {self.receptor.filepath.as_posix()} "
                    f"--ligand {ligand.as_posix()} --autobox --score_only"
                )
                commands.append(command)
                ligands.append(isomer.inchi)

        # Run all commands at once
        results = self.run_multi(
            commands=commands,
            verbose=True,
            raise_on_failure=False,
            validators=[SuccessValidator("Estimated Free Energy")],
            n_jobs=self.n_jobs.value,
        )

        idx = 0
        for mol in mols:
            for isomer in mol.molecules:
                isomer.score_tag = "energy"
                isomer.set_tag("score_type", "oracle")
                score = np.nan
                if isomer.inchi in ligands:
                    if match := re.search(SCORE_ONLY_RESULT_REGEX, results[idx].stdout.decode()):
                        score = float(match.group(1))
                    idx += 1
                isomer.set_tag("energy", score)
                self.logger.info("Parsed isomer '%s', score %s", isomer.inchi, isomer.scores.min())

        self.out_scores.send(np.array([mol.best_score for mol in mols]))
        self.out.send(mols)



# 1UYD previously published with Icolos (IcolosData/molecules/1UYD)
@pytest.fixture
def protein_path(shared_datadir: Any) -> Any:
    return shared_datadir / "1UYD_apo.pdb"


@pytest.fixture
def receptor_path(shared_datadir: Any) -> Any:
    return shared_datadir / "1UYD_fixed.pdbqt"


@pytest.fixture
def ligand_path(shared_datadir: Any) -> Any:
    return shared_datadir / "1UYD_ligand.sdf"


# From AD GPU
@pytest.fixture
def grid_path(shared_datadir: Any) -> Any:
    return shared_datadir / "1stp" / "1stp_protein.maps.fld"


class TestSuiteAutodock:
    def test_PreparePDBQT(self, temp_working_dir: Any, protein_path: Any, test_config: Any) -> None:
        rig = TestRig(PreparePDBQT, config=test_config)
        params: list[dict[str, Any]] = [
            {"repairs": "None"},
            {
                "repairs": "bonds_hydrogens",
                "cleanup_protein": ["lps", "waters"],
                "remove_nonstd": True,
            },
            {"repairs": "checkhydrogens", "cleanup_protein": ["nphs", "nonstdres"]},
        ]
        for param in params:
            res = rig.setup_run(inputs={"inp": [protein_path]}, parameters=param)
            file = res["out"].get()
            assert file is not None
            assert file.exists()

    def test_AutoDockGPU(
        self, temp_working_dir: Any, grid_path: Any, ligand_path: Any, test_config: Any
    ) -> None:
        rig = TestRig(AutoDockGPU, config=test_config)
        mol = IsomerCollection.from_sdf(ligand_path)
        mol.embed()
        # SMILES from 1UYD data (Icolos)
        mol_fail = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC")
        mol_fail.embed()
        res = rig.setup_run(
            parameters={"grid_file": grid_path, "derivtypes": {"NA": "N", "SA": "S"}},
            inputs={"inp": [[mol, mol_fail]]},
        )
        docked = res["out"].get()
        assert docked is not None
        assert len(docked) == 2
        assert docked[0].scored
        assert not docked[1].scored
        assert docked[0].molecules[0].n_conformers == 20
        assert -5.0 < docked[0].best_score < -2.0

    def test_Vina(self, temp_working_dir: Any, receptor_path: Any, test_config: Any) -> None:
        """Test Autodock in isolation"""
        rig = TestRig(Vina, config=test_config)
        params = {
            "search_center": (3.3, 11.5, 24.8),
            "receptor": receptor_path,
            "n_poses": 4,
        }
        mol = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC")
        mol.embed()
        res = rig.setup_run(parameters=params, inputs={"inp": [[mol]]})
        docked = res["out"].get()
        assert docked is not None
        assert len(docked) == 1
        assert docked[0].scored
        assert docked[0].molecules[0].n_conformers == 4
        assert -11.0 < docked[0].best_score < -7.0

    def test_QuickVinaGPU(
        self, temp_working_dir: Any, receptor_path: Any, test_config: Any
    ) -> None:
        """Test Autodock in isolation"""
        rig = TestRig(QuickVinaGPU, config=test_config)
        params = {
            "search_center": (3.3, 11.5, 24.8),
            "receptor": receptor_path,
            "n_poses": 4,
        }
        mol = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC")
        mol.embed()
        res = rig.setup_run(parameters=params, inputs={"inp": [[mol]]})
        docked = res["out"].get()
        assert docked is not None
        assert len(docked) == 1
        assert docked[0].scored
        assert docked[0].molecules[0].n_conformers == 4
        assert -11.0 < docked[0].best_score < -7.0

    def test_VinaGPU(self, temp_working_dir: Any, receptor_path: Any, test_config: Any) -> None:
        """Test Autodock in isolation"""
        rig = TestRig(VinaGPU, config=test_config)
        params = {
            "search_center": (3.3, 11.5, 24.8),
            "receptor": receptor_path,
            "n_poses": 4,
        }
        mol = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC")
        mol.embed()
        res = rig.setup_run(parameters=params, inputs={"inp": [[mol]]})
        docked = res["out"].get()
        assert docked is not None
        assert len(docked) == 1
        assert docked[0].scored
        assert docked[0].molecules[0].n_conformers == 4
        assert -11.0 < docked[0].best_score < -7.0

    def test_VinaScore(self, temp_working_dir: Any, receptor_path: Any, test_config: Any) -> None:
        """Test Vina in isolation"""
        rig = TestRig(VinaScore, config=test_config)
        params = {"receptor": receptor_path}
        mol1 = IsomerCollection.from_smiles("Nc1nc(F)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC")
        mol1.embed()
        mol2 = IsomerCollection.from_smiles("Nc1nc(Cl)nc(c12)n(CCCC)c(n2)Cc3cc(OC)ccc3OC")
        mol2.embed()
        res = rig.setup_run(parameters=params, inputs={"inp": [[mol1, mol2]]})
        docked = res["out"].get()
        assert docked is not None
        assert len(docked) == 2
        assert all(dock.scored for dock in docked)
        scores = res["out_scores"].get()
        assert scores is not None
        assert len(scores) == 2
        assert (scores < 0).all()