Source code for bonafide.features.functional_group
"""Functional group match feature based on predefined SMARTS patterns."""
from __future__ import annotations
import os
from collections import defaultdict
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
import pandas as pd
from bonafide.utils.base_featurizer import BaseFeaturizer
from bonafide.utils.io_ import read_smarts
if TYPE_CHECKING:
from rdkit import Chem
[docs]
class Bonafide2DAtomFunctionalGroupMatch(BaseFeaturizer):
"""Feature factory for the 2D atom feature "functional_group_match", implemented within this
package.
The index of this feature is 27 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.functional_group" in the _feature_config.toml file.
"""
_functional_groups_smarts: Dict[str, List[Tuple[str, Chem.rdchem.Mol]]]
custom_groups: List[List[str]]
key_level: str
def __init__(self) -> None:
self.extraction_mode = "multi"
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-functional_group_match`` feature."""
# Read in the functional group SMARTS patterns file
self._read_functional_group_smarts()
if self._err is not None:
return
# Select key level
smarts_list = self._functional_groups_smarts[self.key_level]
# Do substructure matching for all SMARTS
atom_idx_to_key = defaultdict(list)
for fg_name, smarts_mol in smarts_list:
matches = self.mol.GetSubstructMatches(smarts_mol)
for match in matches:
for atom_idx in match:
atom_idx_to_key[atom_idx].append(fg_name)
# Add "no match" for atoms without any functional group matches
for atom_idx in range(self.mol.GetNumAtoms()):
if atom_idx not in atom_idx_to_key:
atom_idx_to_key[atom_idx].append("no match")
# Write the features to the results dictionary
for idx, value in atom_idx_to_key.items():
res = list(set(value))
res.sort()
self.results[idx] = {self.feature_name: ",".join(res)}
[docs]
def _read_functional_group_smarts(self) -> None:
"""Read the functional groups SMARTS patterns and store them in a hierarchical dictionary.
The SMARTS patterns are only read and processed if not already done, i.e. if the
``_functional_groups_smarts`` attribute is not empty.
Returns
-------
None
"""
if self._functional_groups_smarts != {}:
return
# Read the functional groups SMARTS from the library file
_file_path = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"utils",
"SMARTS_RX.txt",
)
# Check if the file exists
if not os.path.isfile(_file_path):
self._err = f"Functional groups SMARTS file not found at '{_file_path}'"
return
_df = pd.read_csv(
_file_path,
sep=" ",
usecols=[0, 1, 2, 3],
header=1,
names=["l0", "l1", "l2", "smarts"],
)
# Add custom functional groups
_df_new = self._add_custom_functional_groups(df=_df)
if self._err is not None:
return
# Validate and format the SMARTS DataFrame
assert isinstance(_df_new, pd.DataFrame) # for type checker
self._validate_and_format_smarts_df(df=_df_new)
[docs]
def _add_custom_functional_groups(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
"""Add custom functional groups to the functional groups SMARTS DataFrame.
Returns
-------
Optional[pd.DataFrame]
The updated functional groups SMARTS DataFrame, or ``None`` if a custom functional
group name was provided that already exists in the original SMARTS DataFrame.
"""
for custom_fg in self.custom_groups:
_name = custom_fg[0]
_smarts = custom_fg[1]
# Check if the custom functional group name already exists
if any(
[(_name in list(df["l2"])), (_name in list(df["l1"])), (_name in list(df["l0"]))]
):
self._err = f"Custom functional group name '{_name}' already exists"
return None
# Append the custom functional group to the DataFrame
df = pd.concat(
[
df,
pd.DataFrame(
{
"l0": [_name],
"l1": [_name],
"l2": [_name],
"smarts": [_smarts],
}
),
],
ignore_index=True,
)
return df