Source code for bonafide.features.alfabet_

"""Bond dissociation energy features from ``ALFABET``."""

import logging
from typing import Dict

import pandas as pd

from bonafide.utils.base_featurizer import BaseFeaturizer
from bonafide.utils.driver import external_driver
from bonafide.utils.helper_functions import get_function_or_method_name
from bonafide.utils.helper_functions_chemistry import get_atom_bond_mapping_dicts


[docs] class _Alfabet2DBond(BaseFeaturizer): """Parent feature factory for the 2D atom ALFABET features. For details, please refer to the ALFABET repository (https://github.com/NREL/alfabet, last accessed on 09.09.2025). """ python_interpreter_path: str def __init__(self) -> None: self.extraction_mode = "multi" super().__init__()
[docs] def calculate(self) -> None: """Calculate the ``alfabet2D-bond-bond_dissociation_energy`` and ``alfabet2D-bond-bond_dissociation_free_energy`` feature.""" # Get the canonical SMILES string and the bond mapping dictionary to ensure that ALFABET # is run with the canonical SMILES string to avoid potential issues with different # atom/bond orderings. _, mapping_dict_bonds, canonical_smiles = get_atom_bond_mapping_dicts(self.mol) # ALFABET is run in its separate Python environment through a helper script that is # temporarily created and run with the respective Python interpreter. This was necessary # because ALFABET was not compatible with BONAFIDE's python environment. # Python script for ALFABET alfabet_script = [ "import pandas as pd", "import os", "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'", "from alfabet import model", f"df = model.predict([r'{canonical_smiles}'])", f"df.to_csv('Alfabet2DBond_{self.conformer_name}.csv', index=False)", ] alfabet_script_str = "\n".join(alfabet_script) # Run ALFABET res = external_driver( program_path=self.python_interpreter_path, program_input=alfabet_script_str, input_file_extension=".py", namespace=self.conformer_name[::-1].split("__", 1)[-1][::-1], dependencies=["pandas", "alfabet"], capture_output=True, text=True, check=False, ) # Check for errors stderr = res.stderr returncode = res.returncode if returncode != 0: self._err = f"returncode: {returncode}, stderr: {stderr}" return # Save the results self._read_output_file(mapping_dict=mapping_dict_bonds)
[docs] def _read_output_file(self, mapping_dict: Dict[int, int]) -> None: """Read the ALFABET output pandas DataFrame and write the results to the results dictionary. Only the bonds that can be predicted by ALFABET will have an entry in the DataFrame. If molecules with no hydrogen atoms added are passed to BONAFIDE, the X-H dissociation energies still will be predicted by ALFABET, but the results will not appear in the final BONAFIDE output, as the bonds do not exist in the actual input molecule. Add hydrogen atoms to the molecule before passing it to BONAFIDE to avoid this. Parameters ---------- mapping_dict : Dict[int, int] The mapping dictionary to map the bond indices from the canonical SMILES string to the bond indices of the input molecule. This is included for security to ensure that the bond indices are handled correctly. Returns ------- None """ _loc = f"{self.__class__.__name__}.{get_function_or_method_name()}" # Read the output file df = pd.read_csv(f"Alfabet2DBond_{self.conformer_name}.csv") # Get the data and write it to the results dictionary for _, row_data in df.iterrows(): bond_idx = int(row_data["bond_index"]) bde = row_data["bde_pred"] bdfe = row_data["bdfe_pred"] valid = row_data["is_valid"] if valid is False: _namespace = self.conformer_name[::-1].split("__", 1)[-1][::-1] logging.warning( f"'{_namespace}' | {_loc}()\nPrediction of the bond dissociation (free) " f"energy with ALFABET for bond with index {bond_idx} was labeled as invalid. " "Check your input and the output." ) if bond_idx in mapping_dict: self.results[mapping_dict[bond_idx]] = { "alfabet2D-bond-bond_dissociation_energy": bde, "alfabet2D-bond-bond_dissociation_free_energy": bdfe, }
[docs] class Alfabet2DBondBondDissociationEnergy(_Alfabet2DBond): """Feature factory for the 2D bond feature "bond_dissociation_energy", calculated with alfabet. The index of this feature is 0 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "alfabet" in the _feature_config.toml file. """ def __init__(self) -> None: super().__init__()
# This feature is automatically calculated in _Alfabet2DBond
[docs] class Alfabet2DBondBondDissociationFreeEnergy(_Alfabet2DBond): """Feature factory for the 2D bond feature "bond_dissociation_free_energy", calculated with alfabet. The index of this feature is 1 (see the ``list_atom_features()`` and ``list_bond_features()`` method). The corresponding configuration settings can be found under "alfabet" in the _feature_config.toml file. """ def __init__(self) -> None: super().__init__()
# This feature is automatically calculated in _Alfabet2DBond