Source code for icolos.core.containers.compound

from copy import deepcopy
from typing import List
from rdkit import Chem
from icolos.core.step_utils.obabel_structconvert import OBabelStructConvert

from icolos.utils.enums.compound_enums import (
    CompoundContainerEnum,
    EnumerationContainerEnum,
)
from icolos.utils.enums.program_parameters import SchrodingerExecutablesEnum
from icolos.core.step_utils.structconvert import StructConvert
from icolos.utils.general.icolos_exceptions import ContainerCorrupted
from icolos.utils.enums.write_out_enums import WriteOutEnum
from typing import Union
import numpy as np
import os

_WE = WriteOutEnum()


[docs]class Conformer: """This class is a storage class for individual conformers associated with a given Enumeration.""" def __init__( self, conformer: Chem.Mol = None, conformer_id: int = None, enumeration_object=None, ): self._conformer = conformer self._conformer_id = conformer_id self._enumeration_object = enumeration_object self._extra_data_dictionary = {}
[docs] def get_compound_name(self) -> str: if self.get_enumeration_object() is not None: return self.get_enumeration_object().get_compound_name()
[docs] def get_index_string(self) -> str: enum_obj = self.get_enumeration_object() enum_str = "" if enum_obj is not None: enum_str = enum_obj.get_index_string() conf_str = "" if self.get_conformer_id() is not None: conf_str = str(self.get_conformer_id()) return ":".join([enum_str, conf_str])
[docs] def add_extra_data(self, key: str, data): self._extra_data_dictionary[key] = data
[docs] def get_extra_data(self) -> dict: return self._extra_data_dictionary
[docs] def clear_extra_data(self): self._extra_data_dictionary = {}
[docs] def set_enumeration_object(self, enumeration_object): self._enumeration_object = enumeration_object
[docs] def get_enumeration_object(self): return self._enumeration_object
[docs] def get_molecule(self) -> Chem.Mol: return self._conformer
[docs] def set_molecule(self, conformer: Chem.Mol): self._conformer = conformer
[docs] def set_conformer_id(self, conformer_id: int): self._conformer_id = conformer_id
[docs] def get_conformer_id(self) -> int: return self._conformer_id
[docs] def empty(self) -> bool: if self.get_molecule() is None: return True return False
def _clone(self): clone = Conformer( conformer=deepcopy(self.get_molecule()), conformer_id=self.get_conformer_id(), enumeration_object=self.get_enumeration_object(), ) clone._extra_data_dictionary = deepcopy(self.get_extra_data()) return clone def __copy__(self): return self._clone() def __deepcopy__(self, memo): return self._clone() def __repr__(self): parent_enumeration_id = ( None if self.get_enumeration_object() is None else self.get_enumeration_object().get_enumeration_id() ) return "<Icolos conformer: id=%s, parent enumeration: %s>" % ( self.get_conformer_id(), parent_enumeration_id, ) def __str__(self): return self.__repr__()
[docs] def write(self, path: str, format_=_WE.SDF): writer = Chem.SDWriter(path) molecule = self.get_molecule() molecule.SetProp(_WE.RDKIT_NAME, self.get_index_string()) molecule.SetProp(_WE.INDEX_STRING, self.get_index_string()) writer.write(molecule) writer.close() if format_ == _WE.PDB: pdb_path = path.split(".")[0] + ".pdb" # convert the written sdf file to a pdb with OB converter = OBabelStructConvert() converter.sdf2pdb(sdf_file=path, pdb_file=pdb_path) os.remove(path)
[docs] def update_coordinates(self, path: str): old = self.get_molecule() for mol in Chem.SDMolSupplier(path, removeHs=False): mol.SetProp(_WE.RDKIT_NAME, old.GetProp(_WE.RDKIT_NAME)) for prop in old.GetPropNames(): mol.SetProp(prop, old.GetProp(prop)) self.set_molecule(mol) # only one molecule expected at this stage, so stop after first run break self.write("".join([path, "_out"]))
[docs]class Enumeration: """This class bundles all information on an enumeration, especially all conformers generated.""" def __init__( self, compound_object=None, smile: str = "", molecule: Chem.Mol = None, original_smile: str = None, enumeration_id: int = None, ): self._MC = CompoundContainerEnum() self._EC = EnumerationContainerEnum() self._smile = smile self._compound_object = compound_object self._molecule = molecule self._original_smile = original_smile self._enumeration_id = enumeration_id self._conformers = []
[docs] def empty(self) -> bool: if len(self.get_conformers()) == 0: return True return False
[docs] def get_compound_name(self) -> str: if self.get_compound_object() is not None: return self.get_compound_object().get_name()
def _get_next_conformer_id(self) -> int: ids = [conf.get_conformer_id() for conf in self.get_conformers()] if len(ids) == 0: return 0 else: return max(ids) + 1
[docs] def sort_conformers( self, by_tag: Union[str, List[str]], reverse: bool = True, aggregation="sum" ): conformers = self.get_conformers() if isinstance(by_tag, str): conformers = sorted( conformers, key=lambda x: float(x.get_molecule().GetProp(by_tag)), reverse=reverse, ) self._conformers = conformers self.reset_conformer_ids() elif isinstance(by_tag, list): # need to normalise the values, calculate max and min of each tag in the series def normalise_tag(value, tag): all_tag_values = [ float(conf.get_molecule().GetProp(tag)) for conf in conformers ] max_tag = np.max(all_tag_values) min_tag = np.min(all_tag_values) return (float(value) - min_tag) / (max_tag - min_tag) # if we specify multiple tags, aggregate according the the provided aggregation function if aggregation == "sum": conformers = sorted( conformers, key=lambda x: np.sum( [ float(normalise_tag(x.get_molecule().GetProp(i), i)) for i in by_tag ] ), reverse=reverse, ) self._conformers = conformers elif aggregation == "product": conformers = sorted( conformers, key=lambda x: np.product( [ float(normalise_tag(x.get_molecule().GetProp(i), i)) for i in by_tag ] ), reverse=reverse, ) self._conformers = conformers else: raise AttributeError( "Only sum or product aggregation modes are currently supported - ABORT" )
# for ligand in self.ligands: # ligand.set_conformers(sorted(ligand.get_conformers(), # key=lambda x: float(x.GetProp(_ROE.GLIDE_DOCKING_SCORE)), reverse=False)) # ligand.add_tags_to_conformers()
[docs] def find_conformer(self, conformer_id: int) -> Conformer: conf = [ conf for conf in self.get_conformers() if conf.get_conformer_id() == conformer_id ] if len(conf) == 0: raise IndexError(f"Could not find conformer with id {conformer_id}.") elif len(conf) > 1: raise ContainerCorrupted( f"More than one conformer with id {conformer_id} found in the same Enumeration instance (compound_number: {self.get_enumeration_id()})." ) return conf[0]
[docs] def get_conformer_ids(self) -> List[int]: ids = [conf.get_conformer_id() for conf in self.get_conformers()] return ids
[docs] def reset_conformer_ids(self): for new_id, conf in enumerate(self.get_conformers()): conf.set_conformer_id(conformer_id=new_id)
[docs] def add_conformer(self, conformer: Conformer, auto_update: bool = True): """Add a new conformer. If "auto_update" is True, the Enumeration class will be set to "self" and the conformer_id will be set to the next free index.""" conformer = deepcopy(conformer) if auto_update: conformer.set_enumeration_object(self) conformer.set_conformer_id(self._get_next_conformer_id()) self._conformers.append(conformer)
[docs] def add_conformers(self, conformers: List[Conformer], auto_update: bool = True): """Add new conformers. If "auto_update" is True, the Enumeration class will be set to "self" and the conformer_id will be set to the next free index.""" for conformer in conformers: self.add_conformer(conformer=conformer, auto_update=auto_update)
[docs] def get_index_string(self) -> str: comp_obj = self.get_compound_object() comp_str = "" if comp_obj is not None: comp_str = comp_obj.get_index_string() enum_str = "" if self.get_enumeration_id() is not None: enum_str = str(self.get_enumeration_id()) return ":".join([comp_str, enum_str])
[docs] def clean_failed_conformers(self): # all conformers, where the molecule has been set to None by a function can be considered to have failed for idx in list(reversed(range(len(self._conformers)))): if self._conformers[idx].get_molecule() is None: del self._conformers[idx] self.reset_conformer_ids()
[docs] def clear_molecule(self): self._molecule = None
[docs] def clear_conformers(self): self._conformers = []
[docs] def get_conformers(self) -> List[Conformer]: return self._conformers
[docs] def clone_conformers(self) -> List[Conformer]: return [deepcopy(conf) for conf in self._conformers]
[docs] def set_compound_object(self, compound_object): self._compound_object = compound_object
[docs] def get_compound_object(self): return self._compound_object
[docs] def set_enumeration_id(self, enumeration_id: int): self._enumeration_id = enumeration_id
[docs] def get_enumeration_id(self) -> int: return self._enumeration_id
[docs] def set_smile(self, smile: str): self._smile = smile
[docs] def get_smile(self) -> str: return self._smile
[docs] def set_molecule(self, molecule: Chem.Mol): self._molecule = molecule
[docs] def get_molecule(self) -> Chem.Mol: return self._molecule
[docs] def set_original_smile(self, original_smile: str): self._original_smile = original_smile
[docs] def get_original_smile(self) -> str: return self._original_smile
def _clone(self): clone = Enumeration( compound_object=self.get_compound_object(), smile=self.get_smile(), molecule=deepcopy(self.get_molecule()), original_smile=self.get_original_smile(), enumeration_id=self.get_enumeration_id(), ) for conf in self.get_conformers(): conf = deepcopy(conf) conf.set_enumeration_object(enumeration_object=clone) clone.add_conformer(conf, auto_update=False) return clone def __copy__(self): return self._clone() def __deepcopy__(self, memo): return self._clone() def __repr__(self): parent_compound_id = ( None if self.get_compound_object() is None else self.get_compound_object().get_compound_number() ) return ( "<Icolos enumeration: id=%s, smile=%s, parent compound: %s, num_conformers: %i>" % ( self.get_enumeration_id(), self.get_smile(), parent_compound_id, len(self._conformers), ) ) def __str__(self): return self.__repr__() def __iter__(self): return iter(self._conformers) def __getitem__(self, key: int) -> Conformer: return self._conformers[key] def __len__(self) -> int: return len(self.get_conformers())
[docs]class Compound: """This class bundles all information on a molecule and serves mainly to group enumerations.""" def __init__(self, name: str = "", compound_number: int = None): self._CC = CompoundContainerEnum() self._EC = EnumerationContainerEnum() self._name = name self._compound_number = compound_number self._enumerations = [] def __repr__(self): return "<Icolos compound: name=%s, compound_number=%s, enumerations=%s>" % ( self.get_name(), self.get_compound_number(), len(self.get_enumerations()), ) def __str__(self): return self.__repr__()
[docs] def get_index_string(self) -> str: if self.get_compound_number() is not None: return str(self.get_compound_number()) else: return ""
[docs] def set_name(self, name: str): self._name = name
[docs] def get_name(self) -> str: return self._name
[docs] def set_compound_number(self, compound_number: int): self._compound_number = compound_number
[docs] def get_compound_number(self) -> int: return self._compound_number
[docs] def add_enumeration(self, enumeration: Enumeration, auto_update: bool = True): """Add a new enumeration. If "auto_update" is True, the Compound class will be set to "self" and the enumeration_id will be set to the next free index.""" enumeration = deepcopy(enumeration) if auto_update: enumeration.set_compound_object(self) enumeration.set_enumeration_id(self._get_next_enumeration_id()) self._enumerations.append(enumeration)
[docs] def add_enumerations( self, enumerations: List[Enumeration], auto_update: bool = True ): """Add new enumerations. If "auto_update" is True, the Compound class will be set to "self" and the enumeration_id will be set to the next free index.""" for enumeration in enumerations: self.add_enumeration(enumeration=enumeration, auto_update=auto_update)
[docs] def clear_enumerations(self): self._enumerations = []
def find_enumeration(self, idx: int): for enum in self.get_enumerations(): if enum.get_enumeration_id() == idx: return enum
[docs] def get_enumerations(self) -> List[Enumeration]: return self._enumerations
def _clone(self): clone = Compound( name=self.get_name(), compound_number=self.get_compound_number() ) for enum in self.get_enumerations(): enum = deepcopy(enum) enum.set_compound_object(compound_object=clone) clone.add_enumeration(enum, auto_update=False) return clone def __iter__(self): return iter(self._enumerations) def __copy__(self): return self._clone() def __deepcopy__(self, memo): return self._clone() def __getitem__(self, key: int) -> Enumeration: return self._enumerations[key] def __len__(self) -> int: return len(self.get_enumerations()) def _get_next_enumeration_id(self): ids = [enum.get_enumeration_id() for enum in self.get_enumerations()] if len(ids) == 0: return 0 else: return max(ids) + 1
[docs] def find_enumeration(self, enumeration_id: int) -> Enumeration: enum = [ enum for enum in self.get_enumerations() if enum.get_enumeration_id() == enumeration_id ] if len(enum) == 0: raise IndexError(f"Could not find enumeration with id {enumeration_id}.") elif len(enum) > 1: raise ContainerCorrupted( f"More than one enumeration with id {enumeration_id} found in the same Compound instance (compound_number: {self.get_compound_number()})." ) return enum[0]
[docs] def get_enumeration_ids(self) -> List[int]: ids = [enum.get_enumeration_id() for enum in self.get_enumerations()] return ids
[docs] def reset_enumeration_ids(self): for new_id, enum in enumerate(self.get_enumerations()): enum.set_enumeration_id(enumeration_id=new_id)
[docs] def reset_all_ids(self): self.reset_enumeration_ids() for enum in self.get_enumerations(): enum.reset_conformer_ids()
[docs] def update_all_relations(self): for enum in self.get_enumerations(): enum.set_compound_object(self) for conf in enum.get_conformers(): conf.set_enumeration_object(enum)
[docs] def empty(self) -> bool: if len(self.get_enumerations()) == 0: return True return False
[docs] def unroll_conformers(self) -> List[Conformer]: conformers = [] for enum in self.get_enumerations(): # guard against empty enumerations that might be used when constructing more complex data flows if enum.empty(): continue for conf in enum.get_conformers(): conformers.append(conf) return conformers
# TODO: Replacing these three functions by a wrapper object
[docs]def get_compound_by_id(compounds: List[Compound], id: int) -> Compound: for compound in compounds: if compound.get_compound_number() == id: return compound raise ValueError( f"Could not find compound with id {id} in list of length {len(compounds)}." )
[docs]def get_compound_by_name(compounds: List[Compound], name: str) -> Compound: for compound in compounds: if compound.get_name() == name: return compound raise ValueError( f"Could not find compound with name {name} in list of length {len(compounds)}." )
[docs]def unroll_conformers(compounds: List[Compound]) -> List[Conformer]: all_conformers = [] for comp in compounds: all_conformers = all_conformers + comp.unroll_conformers() return all_conformers
[docs]def unroll_enumerations(compounds: List[Compound]) -> List[Enumeration]: all_enumerations = [] for comp in compounds: all_enumerations = all_enumerations + comp.get_enumerations() return all_enumerations