Source code for icolos.core.workflow_steps.confgen.crest

import os
from typing import List

from pydantic import BaseModel
from rdkit import Chem
from copy import deepcopy

from icolos.utils.execute_external.crest import CrestExecutor

from icolos.utils.general.molecules import get_charge_for_molecule

from icolos.core.containers.compound import Enumeration, Conformer

from icolos.utils.enums.program_parameters import CrestEnum, CrestOutputEnum
from icolos.core.workflow_steps.step import _LE, _CTE
from icolos.core.workflow_steps.confgen.base import StepConfgenBase

_EE = CrestEnum()
_COE = CrestOutputEnum()


[docs]class StepCREST(StepConfgenBase, BaseModel): def __init__(self, **data): super().__init__(**data) # initialize the executor and test availability self._initialize_backend(executor=CrestExecutor) self._check_backend_availability() def _get_energies_from_XYZ(self, path) -> list: energies = [] with open(path, "r") as f: lines = f.readlines() for line in lines: if line.startswith(_COE.PREFIX_ENERGIES_XYZ): energies.append(line.lstrip().rstrip()) return energies def _parse_CREST_result( self, dir_path: str, enumeration: Enumeration ) -> List[Conformer]: """Function to parse the result from CREST.""" # CREST will output a variety of files to "dir_path" conformers_sdf = os.path.join(dir_path, _COE.CREST_CONFORMERS_SDF) conformers_xyz = os.path.join(dir_path, _COE.CREST_CONFORMERS_XYZ) # as the energies are lost in the SDF output, we will add them as a tag energies = self._get_energies_from_XYZ(conformers_xyz) charge = str( get_charge_for_molecule(enumeration.get_molecule(), add_as_tag=False) ) mol_supplier = Chem.SDMolSupplier(conformers_sdf, removeHs=False) result = [] for mol_id, mol in enumerate(mol_supplier): mol.SetProp(_CTE.CONFORMER_ENERGY_TAG, energies[mol_id]) mol.SetProp(_CTE.FORMAL_CHARGE_TAG, charge) result.append(Conformer(conformer=mol)) return result def _set_formal_charge(self, parameters: dict, molecule: Chem.Mol) -> dict: charge = get_charge_for_molecule(molecule, add_as_tag=False) parameters[_EE.CREST_CHRG] = charge self._logger.log(f"Set charge for molecule to {charge}.", _LE.DEBUG) return parameters def _set_number_cores(self, parameters: dict) -> dict: """Function for parallelization of task, setting the number of cores to be used.""" parameters[_EE.CREST_T] = int(self.execution.parallelization.jobs) return parameters def _prepare_settings(self, tmp_dir: str, enumeration: Enumeration) -> list: # first position is the input (SDF) file; the internal input at this stage is a molecule # -> write it to a temporary SDF file (undocumented input functionality) and add the path settings = [self._prepare_temp_input(tmp_dir, enumeration.get_molecule())] # add flags for flag in self.settings.arguments.flags: settings.append(flag) # add parameters parameters = deepcopy(self.settings.arguments.parameters) # update / over-write fields that need a specific value or are defined elsewhere parameters = self._set_number_cores(parameters) parameters = self._set_formal_charge(parameters, enumeration.get_molecule()) # flatten the dictionary into a list for command-line execution for key in parameters.keys(): settings.append(key) settings.append(parameters[key]) return settings
[docs] def execute(self): for compound in self.get_compounds(): for enumeration in compound.get_enumerations(): if not self._input_object_valid(enumeration): continue # set up tmp_dir = self._move_to_temp_dir() # the call to CREST starts with the path to the input file, followed by arguments and flags settings = self._prepare_settings(tmp_dir, enumeration=enumeration) self._logger.log( f"Executing CREST backend in folder {tmp_dir}.", _LE.DEBUG ) self._backend_executor.execute( command=_EE.CREST, arguments=settings, check=False ) self._restore_working_dir() conformers = self._parse_CREST_result(tmp_dir, enumeration=enumeration) enumeration.clear_conformers() enumeration.add_conformers(conformers=conformers, auto_update=True) self._logger.log( f"Executed CREST and obtained {len(conformers)} for enumeration {enumeration.get_index_string()}", _LE.INFO, ) self._remove_temporary(tmp_dir)