import random
import rdkit.Chem as rkc
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import SaltRemover
from rdkit.Chem import rdmolops
def _initialiseNeutralisationReactions():
patts = (
# Imidazoles
("[n+;H]", "n"),
# Amines
("[N+;!H0]", "N"),
# Carboxylic acids and alcohols
("[$([O-]);!$([O-][#7])]", "O"),
# Thiols
("[S-;X1]", "S"),
# Sulfonamides
("[$([N-;X2]S(=O)=O)]", "N"),
# Enamines
("[$([N-;X2][C,N]=C)]", "N"),
# Tetrazoles
("[n-]", "[nH]"),
# Sulfoxides
("[$([S-]=O)]", "S"),
# Amides
("[$([N-]C=O)]", "N"),
)
return [(Chem.MolFromSmarts(x), Chem.MolFromSmiles(y, False)) for x, y in patts]
def _neutralise_charges(mol, reactions=None):
if reactions is None:
reactions = _initialiseNeutralisationReactions()
replaced = False
for i, (reactant, product) in enumerate(reactions):
while mol.HasSubstructMatch(reactant):
replaced = True
rms = AllChem.ReplaceSubstructs(mol, reactant, product)
mol = rms[0]
if replaced:
return mol, True
else:
return mol, False
def _get_largest_fragment(mol):
frags = rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True)
maxmol = None
for mol in frags:
if mol is None:
continue
if maxmol is None:
maxmol = mol
if maxmol.GetNumHeavyAtoms() < mol.GetNumHeavyAtoms():
maxmol = mol
return maxmol
_saltremover = SaltRemover.SaltRemover()
def _valid_size(
mol, min_heavy_atoms, max_heavy_atoms, element_list, remove_long_side_chains
):
"""Filters molecules on number of heavy atoms and atom types"""
mol = _rare_filters(mol)
if mol:
correct_size = min_heavy_atoms < mol.GetNumHeavyAtoms() < max_heavy_atoms
if not correct_size:
return
valid_elements = all(
[atom.GetAtomicNum() in element_list for atom in mol.GetAtoms()]
)
if not valid_elements:
return
has_long_sidechains = False
if remove_long_side_chains:
# remove aliphatic side chains with at least 5 carbons not in a ring
sma = "[CR0]-[CR0]-[CR0]-[CR0]-[CR0]"
has_long_sidechains = mol.HasSubstructMatch(Chem.MolFromSmarts(sma))
return correct_size and valid_elements and not has_long_sidechains
def _rare_filters(mol):
if mol:
ciano_filter = "[C-]#[N+]"
oh_filter = "[OH+]"
sulfur_filter = "[SH]"
if (
not mol.HasSubstructMatch(Chem.MolFromSmarts(ciano_filter))
and not mol.HasSubstructMatch(Chem.MolFromSmarts(oh_filter))
and not mol.HasSubstructMatch(Chem.MolFromSmarts(sulfur_filter))
):
return mol
[docs]def convert_to_rdkit_smiles(smiles):
return Chem.MolToSmiles(
Chem.MolFromSmiles(smiles, sanitize=False), isomericSmiles=True
)
[docs]def randomize_smiles(smiles, random_type="restricted"):
"""
Returns a random SMILES given a SMILES of a molecule.
:param random_type: The type (unrestricted, restricted) of randomization performed.
:return : A random SMILES string of the same molecule or None if the molecule is invalid.
"""
mol = Chem.MolFromSmiles(smiles)
if not mol:
return None
if random_type == "unrestricted":
return rkc.MolToSmiles(mol, canonical=False, doRandom=True, isomericSmiles=True)
if random_type == "restricted":
new_atom_order = list(range(mol.GetNumHeavyAtoms()))
random.shuffle(new_atom_order)
random_mol = rkc.RenumberAtoms(mol, newOrder=new_atom_order)
return rkc.MolToSmiles(random_mol, canonical=False, isomericSmiles=True)
raise ValueError("Type '{}' is not valid".format(random_type))
[docs]def to_mol(smi):
"""
Creates a Mol object from a SMILES string.
:param smi: SMILES string.
:return: A Mol object or None if it's not valid.
"""
if smi:
return rkc.MolFromSmiles(smi)
[docs]def to_smiles(mol):
"""
Converts a Mol object into a canonical SMILES string.
:param mol: Mol object.
:return: A SMILES string.
"""
return rkc.MolToSmiles(mol, isomericSmiles=True)