"""Autocorrelation features for atoms in 2D molecules."""
from __future__ import annotations
import operator
from typing import TYPE_CHECKING, Any, List, Optional
import numpy as np
from rdkit import Chem
from bonafide.utils.base_featurizer import BaseFeaturizer
if TYPE_CHECKING:
from numpy.typing import NDArray
[docs]
class _Bonafide2DAtomAutocorrelation(BaseFeaturizer):
"""Parent feature factory for the 2D atom autocorrelation features."""
depth: int
iterable_option: str
def __init__(self) -> None:
self.extraction_mode = "single"
super().__init__()
[docs]
def run_workflow(self, operant: Any, scale: bool) -> None:
"""Execute the workflow for calculating an autocorrelation feature vector for a give root
atom.
Parameters
----------
operant : Any
The mathematical operation (sum, difference, product, mean, abs difference) to be
applied between the property of the root atom and the respective other atom.
scale : bool
Whether to scale the autocorrelation feature at each depth by the number of atoms at
that depth.
Returns
-------
None
"""
# Get the distance matrix, either from the cache or by calculating it
_feature_name = "rdkit2d-global-bond_distance_matrix"
if _feature_name not in self.global_feature_cache[self.conformer_idx]:
distance_matrix = Chem.GetDistanceMatrix(self.mol)
self.global_feature_cache[self.conformer_idx][_feature_name] = distance_matrix
else:
distance_matrix = self.global_feature_cache[self.conformer_idx][_feature_name]
# Calculate autocorrelation vector
dist_vector = distance_matrix[self.atom_bond_idx]
property_vector = self._get_property_vector(dist_vector=dist_vector)
if property_vector is None:
return
if "_inaccessible" in property_vector:
self._err = (
f"the property vector for the '{self.iterable_option}' feature contains "
f"inaccessible values. Therefore, the requested autocorrelation feature cannot "
"be calculated"
)
return
autocorr_vector = self._get_autocorrelation_vector(
dist_vector=dist_vector, property_vector=property_vector, operant=operant, scale=scale
)
# Modify feature_name to also include the name of the underlying atom feature
f_name = self.iterable_option.split("-")[-1]
self.feature_name = f"{self.feature_name}__{f_name}"
# Safe results to the results dictionary
self.results[self.atom_bond_idx] = {self.feature_name: autocorr_vector}
[docs]
def _get_property_vector(
self, dist_vector: NDArray[np.float64]
) -> Optional[NDArray[np.float64]]:
"""Get the atom property vector for the requested feature to be used in the generation of
the autocorrelation features.
This property must be precomputed for every atom in the molecule, requested by the
``iterable_option`` input available in the configuration settings of the autocorrelation
feature.
Parameters
----------
dist_vector : NDArray[np.float64]
The vector of the shortest distances in bonds from the root atom to all other atoms
in the molecule.
Returns
-------
Optional[NDArray[np.float64]]
The property vector for the requested feature for all atoms in the molecule or ``None``
if an error occurred.
"""
# Check if feature for autocorrelation was precomputed
if self.iterable_option not in self.feature_cache[self.conformer_idx]:
self._err = (
f"the '{self.iterable_option}' feature that was requested to be used to "
"calculate the autocorrelation vector was not precomputed for conformer "
f"with index {self.conformer_idx}. Calculate this feature before requesting "
"the autocorrelation feature"
)
return None
# Get the property vector
_cached_data = self.feature_cache[self.conformer_idx][self.iterable_option]
_n_atoms = self.mol.GetNumAtoms()
property_vector: List[Any] = ["_missing" for _ in range(_n_atoms)]
for idx, value in _cached_data.items():
property_vector[idx] = value
# Check if a required value from the property vector is missing
for idx, dist in enumerate(dist_vector):
if dist > self.depth:
continue
if property_vector[idx] == "_missing":
self._err = (
f"the property vector for the '{self.iterable_option}' feature is missing "
f"a value at atom index {idx}. Therefore, the requested autocorrelation "
"feature cannot be calculated"
)
return None
return np.array(property_vector)
[docs]
def _get_autocorrelation_vector(
self,
dist_vector: NDArray[np.float64],
property_vector: NDArray[np.float64],
operant: Any,
scale: bool,
) -> str:
"""Calculate the autocorrelation vector.
Parameters
----------
dist_vector : NDArray[np.float64]
The vector of the shortest distances in bonds from the root atom to all other atoms
in the molecule.
property_vector : NDArray[np.float64]
The property vector for the requested feature for all atoms in the molecule.
operant : Any
The mathematical operation (sum, difference, product, mean, abs difference) to be
applied between the property of the root atom and the respective other atom.
scale : bool
Whether to scale the autocorrelation feature at each depth by the number of atoms at
that depth.
Returns
-------
str
The autocorrelation vector as a comma-separated string.
"""
root_prop = property_vector[self.atom_bond_idx]
autocorr_vector = []
for d in range(self.depth + 1):
kronecker_vec = self._kronecker_delta(arr=dist_vector, target_value=d)
value = np.sum(operant(root_prop, property_vector) * kronecker_vec)
if scale is True:
value /= np.count_nonzero(a=kronecker_vec)
autocorr_vector.append(value)
autocorr_vector = [float(x) for x in autocorr_vector]
autocorr_vector_str = ",".join(
[str(round(number=val, ndigits=8)) for val in autocorr_vector]
)
return autocorr_vector_str
[docs]
@staticmethod
def _kronecker_delta(arr: NDArray[np.float64], target_value: int) -> NDArray[np.float64]:
"""Calculate the Kronecker delta for a given array and target value.
Parameters
----------
arr : NDArray[np.float64]
The input array (i.e., the distance vector of a given atom to all atoms).
target_value : int
The target value (i.e., the current depth).
Returns
-------
NDArray[np.float64]
The Kronecker delta array, where elements equal to the target value are 1.0, and all
other elements are 0.0.
"""
res: NDArray[np.float64] = (arr == target_value).astype(float)
return res
[docs]
@staticmethod
def _mean(num: float, arr: NDArray[np.float64]) -> NDArray[np.float64]:
"""Calculate the mean between a number and an array.
Parameters
----------
num : float
The number (i.e., the property of the root atom).
arr : NDArray[np.float64]
The array (i.e., the property vector of all atoms in the molecule).
Returns
-------
NDArray[np.float64]
The mean between the number and each element in the array.
"""
return (num + arr) / 2
[docs]
@staticmethod
def _abs_diff(num: float, arr: NDArray[np.float64]) -> NDArray[np.float64]:
"""Calculate the absolute difference between a number and an array.
Parameters
----------
num : float
The number (i.e., the property of the root atom).
arr : NDArray[np.float64]
The array (i.e., the property vector of all atoms in the molecule).
Returns
-------
NDArray[np.float64]
The absolute difference between the number and each element in the array.
"""
return np.abs(num - arr)
[docs]
class Bonafide2DAtomAutocorrelationAbsDifference(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_abs_difference", implemented
within this package.
The index of this feature is 5 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_abs_difference`` feature."""
self.run_workflow(operant=self._abs_diff, scale=False)
[docs]
class Bonafide2DAtomAutocorrelationDifference(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_difference", implemented within
this package.
The index of this feature is 6 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_difference`` feature."""
self.run_workflow(operant=operator.sub, scale=False)
[docs]
class Bonafide2DAtomAutocorrelationMean(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_mean", implemented within this
package.
The index of this feature is 7 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_mean`` feature."""
self.run_workflow(operant=self._mean, scale=False)
[docs]
class Bonafide2DAtomAutocorrelationProduct(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_product", implemented within
this package.
The index of this feature is 8 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_product`` feature."""
self.run_workflow(operant=operator.mul, scale=False)
[docs]
class Bonafide2DAtomAutocorrelationScaledAbsDifference(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_scaled_abs_difference",
implemented within this package.
The index of this feature is 9 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_scaled_abs_difference`` feature."""
self.run_workflow(operant=self._abs_diff, scale=True)
[docs]
class Bonafide2DAtomAutocorrelationScaledDifference(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_scaled_difference", implemented
within this package.
The index of this feature is 10 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_scaled_difference`` feature."""
self.run_workflow(operant=operator.sub, scale=True)
[docs]
class Bonafide2DAtomAutocorrelationScaledMean(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_scaled_mean", implemented within
this package.
The index of this feature is 11 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_scaled_mean`` feature."""
self.run_workflow(operant=self._mean, scale=True)
[docs]
class Bonafide2DAtomAutocorrelationScaledProduct(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_scaled_product", implemented
within this package.
The index of this feature is 12 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_scaled_product`` feature."""
self.run_workflow(operant=operator.mul, scale=True)
[docs]
class Bonafide2DAtomAutocorrelationScaledSum(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_scaled_sum", implemented within
this package.
The index of this feature is 13 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_scaled_sum`` feature."""
self.run_workflow(operant=operator.add, scale=True)
[docs]
class Bonafide2DAtomAutocorrelationSum(_Bonafide2DAtomAutocorrelation):
"""Feature factory for the 2D atom feature "autocorrelation_sum", implemented within this
package.
The index of this feature is 14 (see the ``list_atom_features()`` and
``list_bond_features()`` method). The corresponding configuration settings can be found
under "bonafide.autocorrelation" in the _feature_config.toml file.
"""
def __init__(self) -> None:
super().__init__()
[docs]
def calculate(self) -> None:
"""Calculate the ``bonafide2D-atom-autocorrelation_sum`` feature."""
self.run_workflow(operant=operator.add, scale=False)