Source code for maize.steps.mai.molecule.mol

"""Molecule handling steps"""

# pylint: disable=import-outside-toplevel, import-error

import json
from pathlib import Path
import random
from typing import Annotated, Any, List, Union

import numpy as np
from numpy.typing import NDArray

from maize.core.node import Node
from maize.core.interface import Input, Output, Parameter, FileParameter, Suffix, Flag
from maize.utilities.chem.chem import ChemistryException, Isomer, load_sdf_library
from maize.utilities.testing import TestRig

from maize.utilities.chem import IsomerCollection


[docs] class Smiles2Molecules(Node): """ Converts SMILES codes into a set of molecules with distinct isomers and conformers using the RDKit embedding functionality. See Also -------- :class:`~maize.steps.mai.molecule.Gypsum` : A more advanced procedure for producing different isomers and high-energy conformers. """ inp: Input[list[str]] = Input() """SMILES input""" out: Output[list[IsomerCollection]] = Output() """Molecule output""" n_conformers: Parameter[int] = Parameter(default=1) """Number of conformers to generate""" n_variants: Parameter[int] = Parameter(default=1) """Maximum number of stereoisomers to generate""" embed: Flag = Flag(default=True) """ Whether to create embeddings for the molecule. May not be required if passing it on to another embedding system. """ def run(self) -> None: smiles = self.inp.receive() mols: list[IsomerCollection] = [] n_variants = self.n_variants.value if self.embed.value else 0 for i, smi in enumerate(smiles): self.logger.info("Embedding %s/%s ('%s')", i + 1, len(smiles), smi.strip()) try: mol = IsomerCollection.from_smiles(smi, max_isomers=n_variants) if self.embed.value: mol.embed(self.n_conformers.value) except ChemistryException as err: self.logger.warning("Unable to create '%s' (%s), not sanitizing...", smi, err) if "SMILES Parse Error" in err.args[0]: mol = IsomerCollection([]) mol.smiles = smi else: mol = IsomerCollection.from_smiles(smi, max_isomers=0, sanitize=False) mols.append(mol) self.out.send(mols)
[docs] class SaveMolecule(Node): """Save a molecule to an SDF file.""" inp: Input[IsomerCollection] = Input() """Molecule input""" path: FileParameter[Annotated[Path, Suffix("sdf")]] = FileParameter(exist_required=False) """SDF output destination""" def run(self) -> None: mol = self.inp.receive() self.logger.info("Received '%s'", mol) mol.to_sdf(self.path.value)
[docs] class LoadMolecule(Node): """Load a molecule from an SDF file.""" out: Output[Isomer] = Output() """Isomer output""" path: Input[Annotated[Path, Suffix("sdf")]] = Input() """Path to the SDF file""" def run(self) -> None: input_path = self.path.receive() mol = IsomerCollection.from_sdf(input_path) self.out.send(mol.molecules[0])
[docs] class LoadSmiles(Node): """Load SMILES codes from a ``.smi`` file.""" path: FileParameter[Annotated[Path, Suffix("smi")]] = FileParameter() """SMILES file input""" out: Output[list[str]] = Output() """SMILES output""" sample: Parameter[int] = Parameter(optional=True) """Take a sample of SMILES""" def run(self) -> None: with self.path.filepath.open() as file: smiles = [smi.strip("\n") for smi in file.readlines()] if self.sample.is_set: smiles = random.choices(smiles, k=self.sample.value) self.out.send(smiles)
[docs] class ToSmiles(Node): """ transform an isomer or IsomerCollection to SMILES """ inp: Input[Union[Isomer,IsomerCollection]] = Input() """SMILES output""" out: Output[List[str]] = Output() """SMILES output""" def run(self) -> None: smiles = self.inp.receive().to_smiles() if isinstance(smiles,str): # catch the case where used with single isomer smiles = [smiles] self.out.send(smiles)
[docs] class SaveLibrary(Node): """Save a list of molecules to multiple SDF files.""" inp: Input[list[IsomerCollection]] = Input() """Molecule library input""" base_path: FileParameter[Path] = FileParameter(exist_required=False) """Base output file path name without a suffix, i.e. /path/to/output""" def run(self) -> None: mols = self.inp.receive() base = self.base_path.value for i, mol in enumerate(mols): file = base.with_name(f"{base.name}{i}.sdf") mol.to_sdf(file)
[docs] class SaveScores(Node): """Save VINA Scores to a JSON file.""" inp: Input[NDArray[np.float32]] = Input() """Molecule input""" path: FileParameter[Annotated[Path, Suffix("json")]] = FileParameter(exist_required=False) """JSON output destination""" def run(self) -> None: scores = self.inp.receive() self.logger.info(f"Received #{len(scores):d} scores") with open(self.path.value, "w") as f: json.dump(list(scores), f)
[docs] class LoadLibrary(Node): """Load a small molecule library from an SDF file""" path: FileParameter[Annotated[Path, Suffix("sdf")]] = FileParameter() """Input SDF file""" out: Output[list[IsomerCollection]] = Output() """Molecule output, each entry in the SDF is parsed as a separate molecule""" def run(self) -> None: mols = load_sdf_library(self.path.filepath, split_strategy="none") self.out.send(mols)
class TestSuiteMol: def test_Smiles2Molecules(self, test_config: Any) -> None: rig = TestRig(Smiles2Molecules, config=test_config) smiles = ["Nc1ncnc(c12)n(CCCC)c(n2)Cc3cccc(c3)OC"] res = rig.setup_run( inputs={"inp": [smiles]}, parameters={"n_conformers": 2, "n_isomers": 2} ) raw = res["out"].get() assert raw is not None mol = raw[0] assert mol.n_isomers <= 2 assert not mol.scored assert mol.molecules[0].n_conformers == 2 assert mol.molecules[0].charge == 0 assert mol.molecules[0].n_atoms == 44 def test_SaveMolecule(self, tmp_path: Path, test_config: Any) -> None: rig = TestRig(SaveMolecule, config=test_config) mol = IsomerCollection.from_smiles("Nc1ncnc(c12)n(CCCC)c(n2)Cc3cccc(c3)OC") rig.setup_run(inputs={"inp": mol}, parameters={"path": tmp_path / "file.sdf"}) assert (tmp_path / "file.sdf").exists() def test_LoadSmiles(self, shared_datadir: Path, test_config: Any) -> None: rig = TestRig(LoadSmiles, config=test_config) res = rig.setup_run(parameters={"path": shared_datadir / "test.smi"}) mol = res["out"].get() assert mol is not None assert len(mol) == 1 assert mol[0] == "Nc1ncnc(c12)n(CCCC)c(n2)Cc3cccc(c3)OC" def test_LoadLibrary(self, shared_datadir: Path, test_config: Any) -> None: rig = TestRig(LoadLibrary, config=test_config) res = rig.setup_run(parameters={"path": shared_datadir / "1UYD_ligands.sdf"}) mols = res["out"].get() assert mols is not None assert len(mols) == 3 def test_SaveLibrary(self, tmp_path: Path, test_config: Any) -> None: rig = TestRig(SaveLibrary, config=test_config) mols = [ IsomerCollection.from_smiles("Nc1ncnc(c12)n(CCCC)c(n2)Cc3cccc(c3)OC"), IsomerCollection.from_smiles("Nc1ncnc(c12)n(CCCC)c(n2)Cc3ccc(OC)cc3") ] base = tmp_path / "mol" rig.setup_run(inputs={"inp": [mols]}, parameters={"base_path": base}) assert base.with_name("mol0.sdf").exists() assert base.with_name("mol1.sdf").exists()