Source code for maize.steps.mai.docking.rocs

"""ROCS shape-matching implementation, original code by Michael Dodds"""

# pylint: disable=import-outside-toplevel, import-error

from pathlib import Path
from typing import Annotated, Any, Literal

import numpy as np
from numpy.typing import NDArray
import pytest

from maize.core.node import Node
from maize.core.interface import Parameter, Flag, FileParameter, Suffix, Input, Output
from maize.utilities.chem.chem import ChemistryException, Isomer
from maize.utilities.testing import TestRig
from maize.utilities.io import Config

from maize.utilities.chem import IsomerCollection


_SimMeasureType = Literal["Tanimoto", "RefTversky", "FitTversky"]
_SCORE_METHODS = {
    "Tanimoto": ("GetTanimoto", "GetColorTanimoto"),
    "RefTversky": ("GetRefTversky", "GetRefColorTversky"),
    "FitTversky": ("GetFitTversky", "GetFitColorTversky"),
}
_SCORE_COMBO = {
    "Tanimoto": "OEHighestTanimotoCombo",
    "RefTversky": "OEHighestRefTverskyCombo",
    "FitTversky": "OEHighestFitTverskyCombo",
}


# Potentially useful if we need to do more stuff with OpenEye in the future:
# https://gist.github.com/bannanc/810ccc4636b930a4522636baab1965a6
def _isomer2oe(isomer: Isomer) -> Any:
    """Convert a maize isomer to an ``OEMol`` object"""
    from openeye import oechem

    sdf = isomer.to_mol_block()
    ims = oechem.oemolistream()
    ims.SetFormat(oechem.OEFormat_SDF)
    ims.openstring(sdf)
    return oechem.OEMol(next(ims.GetOEMols()))


def _oe2isomer_conf(oemol: Any, isomer: Isomer) -> None:
    """Convert an ``OEMol`` object to a conformer added to an `Isomer`"""
    from openeye import oechem

    # OpenEye adds pseudo-atoms during ROCS (donor, acceptor, etc.), we remove
    # these and add explicit hydrogens before conversion so RDKit doesn't choke
    for atom in oemol.GetAtoms():
        if atom.GetAtomicNum() == 0:
            oemol.DeleteAtom(atom)
    oechem.OEPlaceHydrogens(oemol)

    # This procedure saves us from having to write to a file
    oms = oechem.oemolostream()
    oms.SetFormat(oechem.OEFormat_SDF)
    oms.openstring()
    oechem.OEWriteMolecule(oms, oemol)
    isomer.update_conformers_from_mol_block(oms.GetString().decode("UTF-8"))


def _gen_conf(oemol: Any, omega: Any, max_stereo: int = 0) -> None:
    """Generate enantiomers using OEFlipper"""
    from openeye import oeomega, oechem

    # OEMol object, max. number of stereocenters, force flip, enum nitrogen
    enantiomers = [
        oechem.OEMol(isomer)
        for isomer in oeomega.OEFlipper(oemol.GetActive(), max_stereo, False, True)
    ]
    first = enantiomers[0]
    omega.Build(first)
    oemol = oechem.OEMol(first.SCMol())
    oemol.DeleteConfs()

    # Add conformers to OEMol object
    for enantiomer in enantiomers:
        omega.Build(enantiomer)
        for conf in enantiomer.GetConfs():
            oemol.NewConf(conf)


def _prepare_overlay(shape_query: Path) -> Any:
    """Prepares the shape query and returns an overlay"""
    from openeye import oeshape

    query = oeshape.OEShapeQuery()
    oeshape.OEReadShapeQuery(shape_query.as_posix(), query)
    overlay = oeshape.OEOverlay()
    overlay.SetupRef(query)
    return overlay


def _score(
    oemol: Any,
    similarity_measure: _SimMeasureType,
    overlay: Any,
    shape_weight: float = 0.5,
    color_weight: float = 0.5,
) -> tuple[Any, float]:
    """Calculate the ROCS score"""
    from openeye import oeshape

    score = oeshape.OEBestOverlayScore()
    combo = getattr(oeshape, _SCORE_COMBO[similarity_measure])
    overlay.BestOverlay(score, oemol, combo())
    shape, color = (getattr(score, met)() for met in _SCORE_METHODS[similarity_measure])
    return score, shape_weight * shape + color_weight * color


[docs] class ROCS(Node): """ Performs ROCS shape-match scoring [#grant1996]_. Notes ----- Requires a maize environment with ``openeye-toolkit`` installed. OpenEye in turn requires the OE_LICENSE environment variable to be set to a valid license file. References ---------- .. [#grant1996] Grant, J. A., Gallardo, M. A. & Pickup, B. T. A fast method of molecular shape comparison: A simple application of a Gaussian description of molecular shape. Journal of Computational Chemistry 17, 1653-1666 (1996). See also the `full list of related publications <https://docs.eyesopen.com/applications/rocs/pub.html>`_. """ required_packages = ["openeye"] inp: Input[list[IsomerCollection]] = Input() """List of molecules to be scored""" out: Output[list[IsomerCollection]] = Output() """List of molecules with conformers best matching the query""" out_scores: Output[NDArray[np.float32]] = Output() """Score output""" query: FileParameter[Annotated[Path, Suffix("sq")]] = FileParameter() """Reference query molecule""" max_stereo: Parameter[int] = Parameter(default=10) """Maximum number of stereocenters to be enumerated in molecule""" max_confs: Parameter[int] = Parameter(default=200) """Maximum number of conformers generated per stereoisomer""" energy_window: Parameter[int] = Parameter(default=10) """Difference between lowest and highest energy conformer""" similarity_measure: Parameter[_SimMeasureType] = Parameter(default="Tanimoto") """Similarity between reference and molecule""" color_weight: Parameter[float] = Parameter(default=0.5) """Weight applied to the color-matching score""" shape_weight: Parameter[float] = Parameter(default=0.5) """Weight applied to the shape-matching score""" scores_only: Flag = Flag(default=True) """Whether to only output scores, without poses""" strict: Flag = Flag(default=False) """If ``True`` will fail and raise an exception when failing to score a molecule""" gpu: Flag = Flag(default=True) """Whether to use the GPU""" def run(self) -> None: from openeye import oechem, oeomega, oequacpac, oeshape # Conformer generation options omega_options = oeomega.OEOmegaOptions() omega_options.SetStrictStereo(False) omega_options.SetEnergyWindow(self.energy_window.value) omega_options.SetMaxConfs(self.max_confs.value) omega_options.GetTorDriveOptions().SetUseGPU(self.gpu.value) omega = oeomega.OEOmega(omega_options) overlay = _prepare_overlay(self.query.filepath) prep = oeshape.OEOverlapPrep() # Normalize weights in case they don't add up to 1.0 shape_weight = self.shape_weight.value color_weight = self.color_weight.value weight_sum = shape_weight + color_weight shape_weight, color_weight = shape_weight / weight_sum, color_weight / weight_sum scores = [] mols = self.inp.receive() for mol in mols: if len(mol.molecules) > 1: self.logger.warning( "Molecule '%s' has more than one isomer. ROCS performs it's own " "conformer generation and will ignore all but the first isomer.", mol.smiles, ) isomer = mol.molecules[0] oemol = _isomer2oe(isomer) oequacpac.OEGetReasonableProtomer(oemol) # Create conformers _gen_conf(oemol, omega=omega, max_stereo=self.max_stereo.value) # Non-zero returncode indicates an error if omega.Build(oemol): if self.strict.value: raise ChemistryException(f"Omega failed to build '{isomer.inchi}'") self.logger.warning("Omega failed to build '%s'", isomer.inchi) score = np.nan scores.append(score) isomer.scores = np.array([score]) continue prep.Prep(oemol) scorer, result = _score( oemol, similarity_measure=self.similarity_measure.value, overlay=overlay, shape_weight=shape_weight, color_weight=color_weight, ) self.logger.info("Shape matched '%s' with a score of %s", isomer.inchi, result) scores.append(result) isomer.scores = np.array([result]) if not self.scores_only.value: docked = oechem.OEGraphMol( oemol.GetConf(oechem.OEHasConfIdx(scorer.GetFitConfIdx())) ) _oe2isomer_conf(docked, isomer) self.out_scores.send(np.array(scores)) if not self.scores_only.value: self.out.send(mols)
# Published COX-2 inhibitor: https://go.drugbank.com/drugs/DB03477 @pytest.fixture def shape_query(shared_datadir: Path) -> Path: return shared_datadir / "S58.sq" @pytest.fixture def example_mol(shared_datadir: Path) -> IsomerCollection: return IsomerCollection.from_sdf(shared_datadir / "S58.sdf") def test_rocs( shape_query: Path, example_mol: IsomerCollection, test_config: Config, temp_working_dir: Path, ) -> None: """Test our step in isolation""" other_mol = IsomerCollection.from_smiles("CCCO") other_mol.embed() mols = [example_mol, other_mol] rig = TestRig(ROCS, config=test_config) res = rig.setup_run( inputs={"inp": [mols]}, parameters={"query": shape_query, "scores_only": False}, ) docked = res["out"].get() scores = res["out_scores"].get() assert docked is not None assert scores is not None assert len(docked) == 2 assert len(scores) == 2 for mol in docked: assert mol.scored for iso in mol.molecules: assert iso.scores is not None assert 0 < iso.scores[0] < 1