Source code for icolos.core.workflow_steps.calculation.shaep

from icolos.utils.execute_external.execute import Executor
from icolos.core.workflow_steps.calculation.base import StepCalculationBase
from icolos.core.workflow_steps.step import _LE
from icolos.utils.enums.step_enums import StepShaepEnum
from icolos.utils.enums.program_parameters import PantherEnum, ShaepEnum
from icolos.core.containers.compound import Conformer
import tempfile
from pydantic import BaseModel
import os

_SSE = StepShaepEnum()
_SE = ShaepEnum()
_PE = PantherEnum()


[docs]class StepShaep(StepCalculationBase, BaseModel): def __init__(self, **data): super().__init__(**data) self._initialize_backend(executor=Executor) def _prepare_tmp_input_dir(self): tmp_dir = tempfile.mkdtemp() return tmp_dir def _execute_backend(self, conf_path: str, tmp_dir: str, ni_path: str): arguments = [ os.path.join(tmp_dir, ni_path), conf_path, os.path.join(tmp_dir, _SE.OUTPUT_SIMILARITY), ] self._backend_executor.execute( command=_SE.SHAEP_EXECUTABLE, arguments=arguments, check=True ) def _parse_output(self, tmp_dir: str, conformer: Conformer): with open(os.path.join(tmp_dir, _SE.OUTPUT_SIMILARITY), "r") as f: # TODO: add support for multiple input structures; ignore the names (all will be in one line), but from # position 8 (index 7 in python) onwards, the shape and esp similarities are reported in the same # order as the input, i.e. <7 other values> mol1_shape mol1_esp mol2_shape ... parts = f.readlines()[1].split("\t") conformer.get_molecule().SetProp(_SE.TAG_SHAPE_SIMILARITY, str(parts[7])) conformer.get_molecule().SetProp(_SE.TAG_ESP_SIMILARITY, str(parts[8]))
[docs] def execute(self): number_rescored = 0 for compound in self.get_compounds(): for enumeration in compound.get_enumerations(): if len(enumeration.get_conformers()) == 0: self._logger.log( f"Found no conformers for enumeration {enumeration} for compound {compound}.", _LE.WARNING, ) # we can still execute shaep at the enumeration level, if the compounds are correcty annotated they should be written out ok. Will be slower though # easiest for now is to add the enumeration mol object as a single conformer and run that through shaep mol = enumeration.get_molecule() conf = Conformer(conformer=mol) enumeration.add_conformer(conf) # TODO: ShaEP allow batch execution for any number of compounds (parsing gets more difficult though) # Implement that to avoid overhead from file system issues # TODO: Refactor and add comments for conformer in enumeration.get_conformers(): tmp_dir = self._prepare_tmp_input_dir() conf_path = os.path.join(tmp_dir, _SE.CONFORMER_PATH) ni_file = self.data.generic.get_files_by_extension("mol2")[0] ni_file.write(tmp_dir) conformer.write(conf_path) self._execute_backend(conf_path, tmp_dir, ni_file.get_file_name()) self._parse_output(tmp_dir, conformer) self._logger.log( f"Finished shaep execution for conformer {enumeration.get_index_string()}.", _LE.DEBUG, ) number_rescored += 1 self._remove_temporary(tmp_dir) self._logger.log(f"Executed ShaEP for {number_rescored} conformers.", _LE.INFO)