Source code for icolos.core.workflow_steps.schrodinger.macromodel

import os
import subprocess
from typing import Tuple, List

from pydantic import BaseModel, PrivateAttr
from rdkit import Chem

from icolos.core.workflow_steps.schrodinger.base import StepSchrodingerBase
from icolos.utils.execute_external.macromodel import MacromodelExecutor

from icolos.utils.general.molecules import get_charge_for_molecule

from icolos.core.containers.compound import Enumeration, Conformer

from icolos.utils.enums.program_parameters import (
    MacromodelEnum,
)
from icolos.utils.enums.step_enums import StepMacromodelEnum
from icolos.core.workflow_steps.step import _LE, _CTE
from icolos.core.step_utils.sdconvert_util import SDConvertUtil

_EE = MacromodelEnum()
_MMSE = StepMacromodelEnum()


[docs]class StepMacromodel(StepSchrodingerBase, BaseModel):
[docs] class Config: underscore_attrs_are_private = True
_sdconvert_util = PrivateAttr() def __init__(self, **data): super().__init__(**data) # initialize the executor and test availability self._initialize_backend(executor=MacromodelExecutor) self._check_backend_availability() # prepare sdconvert utility self._sdconvert_util = SDConvertUtil( prefix_execution=self.execution.prefix_execution, binary_location=self.execution.binary_location, ) # extend parameters with the COM file default, if not present if _MMSE.COM_FILE not in self.settings.arguments.parameters.keys(): self.settings.arguments.parameters[_MMSE.COM_FILE] = _MMSE.COM_FILE_DEFAULT def _execute_macromodel(self, com_file: str) -> subprocess.CompletedProcess: self._logger.log( f"Executing MacroModel backend for com_file {com_file}.", _LE.DEBUG ) arguments = [] for key in self.settings.arguments.parameters.keys(): # TODO: disentangle "special behaviour" for this key - move the com_file specification to a separate block # in the configuration if key != _MMSE.COM_FILE: arguments.append(key) arguments.append(str(self.settings.arguments.parameters[key])) for flag in self.settings.arguments.flags: arguments.append(str(flag)) arguments.append(com_file) self._apply_token_guard() result = self._backend_executor.execute( command=_EE.MACROMODEL, arguments=arguments, check=True ) return result def _set_formal_charge(self, parameters: dict, molecule: Chem.Mol) -> dict: charge = get_charge_for_molecule(molecule) parameters[_EE.XTB_CHRG] = charge self._logger.log(f"Set charge for molecule to {charge}.", _LE.DEBUG) return parameters def _prepare_file_paths(self, tmp_dir: str) -> Tuple[str, str, str]: # generate the paths to the temporary files mae_input = os.path.join(tmp_dir, _MMSE.MAE_INPUT) mae_output = os.path.join(tmp_dir, _MMSE.MAE_OUTPUT) sdf_output = os.path.join(tmp_dir, _MMSE.SDF_OUTPUT) return mae_input, mae_output, sdf_output def _prepare_settings_file(self, tmp_dir: str) -> str: path_settings_file = os.path.join(tmp_dir, _MMSE.COM_FILE_PATH) # join the input and output paths (at the beginning of the COM file) and the # settings from either the default or the configuration together complete_com = "\n".join( [ os.path.join(tmp_dir, _MMSE.MAE_INPUT), os.path.join(tmp_dir, _MMSE.MAE_OUTPUT), self.settings.arguments.parameters[_MMSE.COM_FILE], ] ) with open(path_settings_file, "w") as f: f.writelines(complete_com) return path_settings_file def _prepare_run_files( self, tmp_dir: str, enumeration: Enumeration ) -> Tuple[str, str, str, str, str]: # generate the file paths (NOT populated yet) mae_input, mae_output, sdf_output = self._prepare_file_paths(tmp_dir) # write the input SDF file and translate it into Schrodingers native MAE format sdf_input = self._prepare_temp_input(tmp_dir, enumeration.get_molecule()) self._sdconvert_util.sdf2mae(sdf_input, mae_input) # write out the settings file com_file = self._prepare_settings_file(tmp_dir) return sdf_input, mae_input, mae_output, sdf_output, com_file def _parse_macromodel_result( self, sdf_output: str, enumeration: Enumeration ) -> List[Conformer]: charge = str( get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False) ) mol_supplier = Chem.SDMolSupplier(sdf_output, removeHs=False) conformers = [] for mol_id, mol in enumerate(mol_supplier): # note, that formal charge information would be kept if available before (i.e. it retains tags) mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge) conformers.append(Conformer(conformer=mol)) return conformers
[docs] def execute(self): for compound in self.get_compounds(): for enumeration in compound.get_enumerations(): if not self._input_object_valid(enumeration): continue # set up tmp_dir = self._move_to_temp_dir() # get the paths to the MAE and SDF input and output files and the COM file (settings) ( sdf_input, mae_input, mae_output, sdf_output, com_file, ) = self._prepare_run_files(tmp_dir=tmp_dir, enumeration=enumeration) # execute MacroModel, obtain the output SDF and switch back the working directory to what it was before _ = self._execute_macromodel(com_file=com_file) os.listdir(tmp_dir) self._sdconvert_util.mae2sdf(mae_file=mae_output, sdf_file=sdf_output) self._restore_working_dir() # parse output conformers = self._parse_macromodel_result(sdf_output, enumeration) enumeration.clear_conformers() enumeration.add_conformers(conformers=conformers, auto_update=True) self._logger.log( f"Executed MacroModel and obtained {len(conformers)} conformers for enumeration {enumeration.get_index_string()}.", _LE.INFO, ) self._remove_temporary(tmp_dir)