Source code for bonafide.features.rdkit_bond

"""RDKit features for bonds."""

import numpy as np
from rdkit.Chem.rdMolTransforms import GetBondLength

from bonafide.utils.base_featurizer import BaseFeaturizer
from bonafide.utils.helper_functions_chemistry import get_ring_classification


[docs] class _Rdkit2DBondRingInfo(BaseFeaturizer): """Parent feature factory for the 2D bond features calculated based on RDKit's ``GetRingInfo()``. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def _analyze_rings(self) -> None: """Get the RDKit ring information bond features. Returns ------- None """ # Get the RDKit ring info analysis, either from the cache or by calculating it # Only cache BondRings() instead of entire GetRingInfo() object to avoid potential memory # errors _feature_name = "rdkit2d-global-bond_ring_info" if _feature_name not in self.global_feature_cache[self.conformer_idx]: ring_info = self.mol.GetRingInfo().BondRings() self.global_feature_cache[self.conformer_idx][_feature_name] = ring_info else: ring_info = self.global_feature_cache[self.conformer_idx][_feature_name] all_sizes = [] for target_ring_type in [ "aromatic_carbocycle", "aromatic_heterocycle", "nonaromatic_carbocycle", "nonaromatic_heterocycle", ]: sizes = [] for ring in ring_info: if self.atom_bond_idx in ring: if ( get_ring_classification(mol=self.mol, ring_indices=ring, idx_type="bond") == target_ring_type ): sizes.append(len(ring)) if sizes == []: res = "none" else: all_sizes.extend(sizes) res = ",".join([str(s) for s in sizes]) if self.atom_bond_idx not in self.results: self.results[self.atom_bond_idx] = {} self.results[self.atom_bond_idx][f"rdkit2D-bond-ring_info_{target_ring_type}"] = res all_sizes.sort() if all_sizes == []: res = "none" else: res = ",".join([str(s) for s in all_sizes]) self.results[self.atom_bond_idx]["rdkit2D-bond-ring_info"] = res
[docs] class Rdkit2DBondBeginAtomIndex(BaseFeaturizer): """Feature factory for the 2D bond feature "begin_atom_index", calculated with rdkit. The index of this feature is 536 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-begin_atom_index`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: bond.GetBeginAtomIdx()}
[docs] class Rdkit2DBondBeginAtomMapNumber(BaseFeaturizer): """Feature factory for the 2D bond feature "begin_atom_map_number", calculated with rdkit. The index of this feature is 537 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-begin_atom_map_number`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: bond.GetBeginAtom().GetAtomMapNum()}
[docs] class Rdkit2DBondBondOrder(BaseFeaturizer): """Feature factory for the 2D bond feature "bond_order", calculated with rdkit. The index of this feature is 538 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-bond_order`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: bond.GetBondTypeAsDouble()}
[docs] class Rdkit2DBondBondType(BaseFeaturizer): """Feature factory for the 2D bond feature "bond_type", calculated with rdkit. The index of this feature is 539 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-bond_type`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: str(bond.GetBondType())}
[docs] class Rdkit2DBondEndAtomIndex(BaseFeaturizer): """Feature factory for the 2D bond feature "end_atom_index", calculated with rdkit. The index of this feature is 540 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-end_atom_index`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: bond.GetEndAtomIdx()}
[docs] class Rdkit2DBondEndAtomMapNumber(BaseFeaturizer): """Feature factory for the 2D bond feature "end_atom_map_number", calculated with rdkit. The index of this feature is 541 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-end_atom_map_number`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: bond.GetEndAtom().GetAtomMapNum()}
[docs] class Rdkit2DBondIsAromatic(BaseFeaturizer): """Feature factory for the 2D bond feature "is_aromatic", calculated with rdkit. The index of this feature is 542 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-is_aromatic`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: bond.GetIsAromatic()}
[docs] class Rdkit2DBondIsConjugated(BaseFeaturizer): """Feature factory for the 2D bond feature "is_conjugated", calculated with rdkit. The index of this feature is 543 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-is_conjugated`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: bond.GetIsConjugated()}
[docs] class Rdkit2DBondNNeighbors(BaseFeaturizer): """Feature factory for the 2D bond feature "n_neighbors", calculated with rdkit. The index of this feature is 544 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-n_neighbors`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom = bond.GetBeginAtom() end_atom = bond.GetEndAtom() n_neighbors = len(begin_atom.GetNeighbors()) + len(end_atom.GetNeighbors()) self.results[self.atom_bond_idx] = { self.feature_name: n_neighbors - 2 } # exclude the bond atoms itself
[docs] class Rdkit2DBondNeighboringAtomsIndices(BaseFeaturizer): """Feature factory for the 2D bond feature "neighboring_atoms_indices", calculated with rdkit. The index of this feature is 545 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-neighboring_atoms_indices`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom = bond.GetBeginAtom() end_atom = bond.GetEndAtom() neighbor_indices = [atom.GetIdx() for atom in begin_atom.GetNeighbors()] neighbor_indices.extend([atom.GetIdx() for atom in end_atom.GetNeighbors()]) neighbor_indices = [ idx for idx in neighbor_indices if idx not in (begin_atom.GetIdx(), end_atom.GetIdx()) ] neighbor_indices = list(set(neighbor_indices)) neighbor_indices.sort() self.results[self.atom_bond_idx] = { self.feature_name: ",".join([str(idx) for idx in neighbor_indices]) }
[docs] class Rdkit2DBondNeighboringAtomsMapNumbers(BaseFeaturizer): """Feature factory for the 2D bond feature "neighboring_atoms_map_numbers", calculated with rdkit. The index of this feature is 546 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-neighboring_atoms_map_numbers`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom = bond.GetBeginAtom() end_atom = bond.GetEndAtom() _bond_indices = (begin_atom.GetIdx(), end_atom.GetIdx()) neighbor_dict = {} for neighbor in begin_atom.GetNeighbors(): if neighbor.GetIdx() in _bond_indices: continue neighbor_dict[neighbor.GetIdx()] = neighbor.GetAtomMapNum() for neighbor in end_atom.GetNeighbors(): if neighbor.GetIdx() in _bond_indices: continue neighbor_dict[neighbor.GetIdx()] = neighbor.GetAtomMapNum() neighbor_map_nums = list(neighbor_dict.values()) neighbor_map_nums.sort() self.results[self.atom_bond_idx] = { self.feature_name: ",".join([str(idx) for idx in neighbor_map_nums]) }
[docs] class Rdkit2DBondNeighboringBondsIndices(BaseFeaturizer): """Feature factory for the 2D bond feature "neighboring_bonds_indices", calculated with rdkit. The index of this feature is 547 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-neighboring_bonds_indices`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom = bond.GetBeginAtom() end_atom = bond.GetEndAtom() neighbor_indices = [bond.GetIdx() for bond in begin_atom.GetBonds()] neighbor_indices.extend([bond.GetIdx() for bond in end_atom.GetBonds()]) neighbor_indices = [idx for idx in neighbor_indices if idx != self.atom_bond_idx] neighbor_indices = list(set(neighbor_indices)) neighbor_indices.sort() self.results[self.atom_bond_idx] = { self.feature_name: ",".join([str(idx) for idx in neighbor_indices]) }
[docs] class Rdkit2DBondRingInfo(_Rdkit2DBondRingInfo): """Feature factory for the 2D bond feature "ring_info", calculated with rdkit. The index of this feature is 548 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-ring_info`` feature.""" self._analyze_rings()
[docs] class Rdkit2DBondRingInfoAromaticCarbocycle(_Rdkit2DBondRingInfo): """Feature factory for the 2D bond feature "ring_info_aromatic_carbocycle", calculated with rdkit. The index of this feature is 549 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-ring_info_aromatic_carbocycle`` feature.""" self._analyze_rings()
[docs] class Rdkit2DBondRingInfoAromaticHeterocycle(_Rdkit2DBondRingInfo): """Feature factory for the 2D bond feature "ring_info_aromatic_heterocycle", calculated with rdkit. The index of this feature is 550 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-ring_info_aromatic_heterocycle`` feature.""" self._analyze_rings()
[docs] class Rdkit2DBondRingInfoNonaromaticCarbocycle(_Rdkit2DBondRingInfo): """Feature factory for the 2D bond feature "ring_info_nonaromatic_carbocycle", calculated with rdkit. The index of this feature is 551 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-ring_info_nonaromatic_carbocycle`` feature.""" self._analyze_rings()
[docs] class Rdkit2DBondRingInfoNonaromaticHeterocycle(_Rdkit2DBondRingInfo): """Feature factory for the 2D bond feature "ring_info_nonaromatic_heterocycle", calculated with rdkit. The index of this feature is 552 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-ring_info_nonaromatic_heterocycle`` feature.""" self._analyze_rings()
[docs] class Rdkit2DBondStereo(BaseFeaturizer): """Feature factory for the 2D bond feature "stereo", calculated with rdkit. The index of this feature is 553 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-stereo`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) self.results[self.atom_bond_idx] = {self.feature_name: str(bond.GetStereo())}
[docs] class Rdkit2DBondValenceContributionBeginAtom(BaseFeaturizer): """Feature factory for the 2D bond feature "valence_contribution_begin_atom", calculated with rdkit. The index of this feature is 554 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-valence_contribution_begin_atom`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom = bond.GetBeginAtom() self.results[self.atom_bond_idx] = {self.feature_name: bond.GetValenceContrib(begin_atom)}
[docs] class Rdkit2DBondValenceContributionEndAtom(BaseFeaturizer): """Feature factory for the 2D bond feature "valence_contribution_end_atom", calculated with rdkit. The index of this feature is 555 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit2D-bond-valence_contribution_end_atom`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) end_atom = bond.GetEndAtom() self.results[self.atom_bond_idx] = {self.feature_name: bond.GetValenceContrib(end_atom)}
[docs] class Rdkit3DBondBondLength(BaseFeaturizer): """Feature factory for the 3D bond feature "bond_length", calculated with rdkit. The index of this feature is 557 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit3D-bond-bond_length`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom_idx = bond.GetBeginAtomIdx() end_atom_idx = bond.GetEndAtomIdx() length = GetBondLength(self.mol.GetConformer(0), begin_atom_idx, end_atom_idx) self.results[self.atom_bond_idx] = {self.feature_name: round(length, 6)}
[docs] class Rdkit3DBondCoordinatesBeginAtom(BaseFeaturizer): """Feature factory for the 3D bond feature "coordinates_begin_atom", calculated with rdkit. The index of this feature is 558 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit3D-bond-coordinates_begin_atom`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom_idx = bond.GetBeginAtomIdx() pos = self.mol.GetConformer().GetAtomPosition(begin_atom_idx) atom_coordinates_ = [pos.x, pos.y, pos.z] atom_coordinates = ",".join([str(round(c, 6)) for c in atom_coordinates_]) self.results[self.atom_bond_idx] = {self.feature_name: atom_coordinates}
[docs] class Rdkit3DBondCoordinatesCenter(BaseFeaturizer): """Feature factory for the 3D bond feature "coordinates_center", calculated with rdkit. The index of this feature is 559 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit3D-bond-coordinates_center`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) begin_atom_idx = bond.GetBeginAtomIdx() end_atom_idx = bond.GetEndAtomIdx() begin_pos = self.mol.GetConformer().GetAtomPosition(begin_atom_idx) end_pos = self.mol.GetConformer().GetAtomPosition(end_atom_idx) begin_coords = np.array([begin_pos.x, begin_pos.y, begin_pos.z]) end_coords = np.array([end_pos.x, end_pos.y, end_pos.z]) center_coords = (begin_coords + end_coords) / 2 center_coordinates = ",".join([str(round(c, 6)) for c in center_coords]) self.results[self.atom_bond_idx] = {self.feature_name: center_coordinates}
[docs] class Rdkit3DBondCoordinatesEndAtom(BaseFeaturizer): """Feature factory for the 3D bond feature "coordinates_end_atom", calculated with rdkit. The index of this feature is 560 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "rdkit.misc" in the _feature_config.toml file. """ def __init__(self) -> None: self.extraction_mode = "single" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``rdkit3D-bond-coordinates_end_atom`` feature.""" bond = self.mol.GetBondWithIdx(self.atom_bond_idx) end_atom_idx = bond.GetEndAtomIdx() pos = self.mol.GetConformer().GetAtomPosition(end_atom_idx) atom_coordinates_ = [pos.x, pos.y, pos.z] atom_coordinates = ",".join([str(round(c, 6)) for c in atom_coordinates_]) self.results[self.atom_bond_idx] = {self.feature_name: atom_coordinates}