Source code for icolos.core.workflow_steps.io.embedder

from copy import deepcopy

from pydantic import BaseModel
from rdkit import Chem, RDLogger
from rdkit.Chem import AllChem

from icolos.core.containers.compound import Conformer
from icolos.utils.general.icolos_exceptions import StepFailed
from icolos.utils.enums.step_enums import StepEmbeddingEnum
from icolos.core.workflow_steps.io.base import StepIOBase

from icolos.core.workflow_steps.step import _LE
from icolos.utils.general.convenience_functions import *
from icolos.utils.smiles import to_mol

_SEE = StepEmbeddingEnum()


[docs]class StepEmbedding(StepIOBase, BaseModel): def __init__(self, **data): super().__init__(**data) # extend parameters with defaults if _SEE.EMBED_AS not in self.settings.additional.keys(): self.settings.additional[_SEE.EMBED_AS] = _SEE.EMBED_AS_ENUMERATIONS self._logger.log( f'No embedding level specified, defaulting to "{_SEE.EMBED_AS_ENUMERATIONS}".', _LE.INFO, ) def _smile_to_molecule(self, smile: str) -> Chem.Mol: mol = to_mol(smile) if mol is None: self._logger.log( f"The smile {smile} could not be transformed into a molecule and will be skipped.", _LE.WARNING, ) return mol def _embed_with_RDKit(self, smile: str, parameters: dict) -> Chem.Mol: molecule = self._smile_to_molecule(smile) # deactivate logger to suppress "missing Hs messages" RDLogger.DisableLog("rdApp.*") try: embed_code = AllChem.EmbedMolecule( molecule, randomSeed=42, useRandomCoords=True ) # TODO: what's the exception here? except: self._logger.log( f'Could not embed molecule with SMILES "{smile}", critical error in "RDkit".', _LE.WARNING, ) return None status = 0 if embed_code != -1: status = AllChem.UFFOptimizeMolecule(molecule, maxIters=600) if status == 1: self._logger.log( f"The 3D coordinate generation of molecule {smile} did not converge in time.", _LE.WARNING, ) else: self._logger.log( f"Could not embed molecule {smile} - no 3D coordinates have been generated.", _LE.WARNING, ) RDLogger.EnableLog("rdApp.*") # add hydrogens to the molecule (if specified) if nested_get(parameters, [_SEE.RDKIT_PROTONATE], default=True): molecule = Chem.AddHs(molecule, addCoords=True) if embed_code != -1 and status == 0: return molecule else: return None def _get_embedding_method(self, parameters: dict) -> str: method = nested_get(parameters, [_SEE.METHOD], default=None) if method is None: error = "Embedding method not set." self._logger.log(error, _LE.ERROR) raise StepFailed(error) return method.upper() def _embed_molecule(self, smile: str, parameters: dict) -> Chem.Mol: method = self._get_embedding_method(parameters) if method == _SEE.METHOD_RDKIT: return self._embed_with_RDKit(smile, parameters) else: self._logger.log( f"Specified embedding method {method} not available.", _LE.ERROR )
[docs] def execute(self): # TODO: REFACTOR parameters = deepcopy(self.settings.arguments.parameters) embed_as = self.settings.additional[_SEE.EMBED_AS] for compound in self.get_compounds(): if embed_as == _SEE.EMBED_AS_ENUMERATIONS: enum_buffer = deepcopy(compound.get_enumerations()) compound.clear_enumerations() for enumeration in enum_buffer: enumeration.clear_molecule() enumeration.clear_conformers() molecule = self._embed_molecule( smile=enumeration.get_smile(), parameters=parameters ) if molecule is not None: enumeration.set_molecule(molecule) compound.add_enumeration(enumeration) self._logger.log( f"Embedding for compound {compound.get_index_string()} (name: {compound.get_name()}) completed ({len(compound)} of {len(enum_buffer)} enumerations successful).", _LE.INFO, ) elif embed_as == _SEE.EMBED_AS_CONFORMERS: # TODO: double-check this bit for enumeration in compound.get_enumerations(): enumeration.clear_conformers() molecule = self._embed_molecule( smile=enumeration.get_smile(), parameters=parameters ) if molecule is not None: conformer = Conformer( conformer=molecule, enumeration_object=enumeration ) enumeration.add_conformer(conformer, auto_update=True) number_successful = len( [ True for enum in compound.get_enumerations() if enum[0].get_molecule() is not None ] ) self._logger.log( f"Embedding for compound {compound.get_index_string()} (name: {compound.get_name()}) completed ({number_successful} of {len(compound)} enumerations successful).", _LE.INFO, ) else: ValueError( f'Value "{embed_as}" for parameter "embed_as" not supported.' )