"""Type and format validation of the configuration settings parameters of the individual
featurizers."""
from __future__ import annotations
import logging
import os
import shutil
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import (
BaseModel,
Field,
StrictBool,
StrictFloat,
StrictInt,
StrictStr,
ValidationError,
ValidationInfo,
field_validator,
model_validator,
)
from pydantic_core import PydanticCustomError
from bonafide.utils.constants import (
ATOMIC_RADII_MULTIWFN_POPULATION,
AVERAGE_METHODS_DSCRIBE_SOAP,
CNTYPE_METHODS_KALLISTO,
DISTAL_VOLUME_METHODS_MORFEUS_BV,
EEM_PARAMETERS_MULTIWFN_POPULATION,
ELECTRONEGATIVITY_EN_SCALES,
ELEMENT_SYMBOLS,
ESP_TYPE_MULTIWFN_POPULATION,
FUNCTIONAL_GROUP_KEY_LEVELS,
GEOMETRY_FUNCTION_METHODS_DSCRIBE_LMBTR,
IBIS_GRID_METHODS_MULTIWFN_BOND_ANALYSIS,
IGM_TYPES_MULTIWFN_BOND_ANALYSIS,
INTEGRATION_GRID_METHODS_MULTIWFN_FUZZY,
ITERABLE_OPTIONS_MULTIWFN_CDFT,
METHOD_METHODS_MENDELEEV,
METHODS_MORFEUS_LOCAL_FORCE,
METHODS_XTB,
NORMALIZATION_METHODS_DSCRIBE_LMBTR,
PARTITION_SCHEME_METHODS_MULTIWFN_FUZZY,
PYRAMIDALIZATION_CALCULATION_METHODS_MORFEUS_PYRAMIDALIZATION,
RADII_TYPE_METHODS_MORFEUS_BV_CONE_SOLID_ANGLE,
RADII_TYPE_METHODS_MORFEUS_DISPERSION,
RADII_TYPE_METHODS_MORFEUS_SASA,
RADIUS_BECKE_PARTITION_METHODS_MULTIWFN_FUZZY,
RADIUS_BECKE_PARTITION_METHODS_MULTIWFN_POPULATION,
RBF_METHODS_DSCRIBE_SOAP,
REAL_SPACE_FUNCTIONS_MULTIWFN,
SOLVENT_MODEL_SOLVERS_PSI4,
SOLVENT_MODELS_XTB,
SOLVENTS_PSI4,
SOLVENTS_XTB,
VDWTYPE_METHODS_KALLISTO,
WEIGHTING_FUNCTION_METHODS_DSCRIBE_LMBTR,
)
from bonafide.utils.helper_functions import get_function_or_method_name
[docs]
class _StandardizeStrMixin:
"""Standardize string inputs before validation."""
[docs]
@field_validator("*", mode="before")
@classmethod
def standardize_strings(cls, value: Any, info: ValidationInfo) -> Any:
"""Standardize string inputs by stripping whitespace and converting to lowercase.
If the value is not a string or the field name is in a predefined blacklist, it is returned
as is.
Parameters
----------
value : Any
The value to be standardized.
info : ValidationInfo
Information about the field being validated.
Returns
-------
Any
The standardized value if it is a string, otherwise the original value.
"""
_black_list = ["XTBHOME"]
if info.field_name in _black_list:
return value
if type(value) == str:
return value.strip().lower()
return value
[docs]
class _ValidateSpeciesMixin:
"""Validate a list of chemical element symbols."""
[docs]
@field_validator("species", mode="before")
@classmethod
def validate_species_before(cls, value: Any) -> List[str]:
"""Validate ``species`` before type validation.
"auto" is the only valid string input.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
List[str]
List of element symbols or ["auto"] if the input is valid.
"""
_errmsg = "Input must be either 'auto' or a list of element symbols to consider"
if type(value) == str:
if value == "auto":
return [value]
raise PydanticCustomError("", _errmsg)
elif type(value) == list:
return value
else:
raise PydanticCustomError("", _errmsg)
[docs]
@field_validator("species", mode="after")
@classmethod
def validate_species_after(cls, value: List[str]) -> Union[str, List[str]]:
"""Validate ``species`` after type validation.
Parameters
----------
value : List[str]
The list of element symbols to be validated.
Returns
-------
Union[str, List[str]]
Returns "auto" if the input is ["auto"], otherwise returns the validated list of
chemical element symbols.
"""
if value == ["auto"]:
return value[0]
for symbol in value:
if symbol not in ELEMENT_SYMBOLS:
_errmsg = f"Input must only contain {ELEMENT_SYMBOLS}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class _ValidateIterableIntOptionMixin:
"""Mixin to validate the input of a feature index corresponding to a feature of data type int
or float.
"""
feature_info: Dict[int, Dict[str, Any]]
iterable_option: List[Any]
[docs]
@field_validator("iterable_option", mode="before")
@classmethod
def validate_iterable_option_before(cls, value: Any) -> Any:
"""Validate ``iterable_option`` before type validation.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
Any
The validated input list. If the input is a single integer, it is converted to a list.
"""
if any([value is None, value == "", value == []]):
_errmsg = "Input must not be empty"
raise PydanticCustomError("", _errmsg)
elif type(value) == int:
value = [value]
return value
[docs]
@model_validator(mode="after")
def check_iterable_option(self) -> _ValidateIterableIntOptionMixin:
"""Validate ``iterable_option`` after type validation.
Returns
-------
_ValidateIterableIntOptionMixin
The instance with the validated and formatted iterable option.
"""
_new_iterable_option_list = []
for idx in self.iterable_option:
# Check if the iterable option is a valid feature index
if idx not in self.feature_info:
_errmsg = (
f"Input is not a valid feature index, obtained: {idx} "
f"(of type '{type(idx).__name__}')"
)
raise PydanticCustomError("", _errmsg)
# Check if the index corresponds to an atom feature of data type float or int
_feature_name = self.feature_info[idx]["name"]
_feature_type = self.feature_info[idx]["feature_type"]
_data_type = self.feature_info[idx]["data_type"]
if _feature_type != "atom" or _data_type not in ["float", "int"]:
_errmsg = (
"Input is not a feature index corresponding to an atom feature of type "
f"'int' or 'float', obtained: {idx} (of type '{type(idx).__name__}')"
)
raise PydanticCustomError("", _errmsg)
_new_iterable_option_list.append(_feature_name)
# Replace the feature indices in the iterable options list with the feature names
self.iterable_option = [x for x in _new_iterable_option_list]
return self
[docs]
class ValidateAlfabet(BaseModel):
"""Validate the configuration settings for the alfabet features."""
# Don't standardize the string to avoid changing the path
python_interpreter_path: StrictStr
[docs]
class ValidateBonafideAutocorrelation(_ValidateIterableIntOptionMixin, BaseModel):
"""Validate the configuration settings for the autocorrelation features.
Attributes
----------
depth : StrictInt
The depth of the autocorrelation, must be a positive integer.
iterable_option : List[StrictInt]
A list of feature indices to be used for the autocorrelation calculation.
feature_info : Dict
A dictionary containing information about the available features, where keys are feature
indices and values are dictionaries with feature details.
"""
depth: StrictInt = Field(gt=0)
iterable_option: List[StrictInt]
feature_info: Dict[int, Dict[str, Any]]
[docs]
class ValidateBonafideConstant(BaseModel):
"""Validate the configuration settings for the constant atom/bond features.
Attributes
----------
atom_constant : StrictStr
The constant value to be assigned the requested atoms.
bond_constant : StrictStr
The constant value to be assigned the requested bonds.
"""
# Don't standardize strings to avoid overwriting the custom user input
atom_constant: StrictStr
bond_constant: StrictStr
[docs]
class ValidateBonafideDistance(BaseModel):
"""Validate the configuration settings for the distance-based features.
Attributes
----------
n_bonds_cutoff : StrictInt
The number of bonds to consider for the feature calculation as a distance cutoff.
radius_cutoff : StrictFloat
The radius in Angstrom to consider for the feature calculation as a distance cutoff.
"""
n_bonds_cutoff: StrictInt = Field(gt=0)
radius_cutoff: StrictFloat = Field(gt=0)
[docs]
class ValidateBonafideFunctionalGroup(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the functional group features.
Attributes
----------
key_level : StrictStr
The key level for the functional group features which determines how fine-grained the
analysis is carried out.
custom_groups : List[List[StrictStr]]
A list of custom functional groups defined by the user, where each functional group is
represented by a list containing the name of the functional group and its corresponding
SMARTS pattern.
"""
key_level: StrictStr
custom_groups: List[List[StrictStr]]
[docs]
@field_validator("key_level")
@classmethod
def validate_key_level(cls, value: str) -> str:
"""Validate ``key_level``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The formatted and validated key level.
"""
if value not in FUNCTIONAL_GROUP_KEY_LEVELS:
_errmsg = f"Input must be one of {FUNCTIONAL_GROUP_KEY_LEVELS}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("custom_groups")
@classmethod
def validate_custom_groups(cls, value: List[List[str]]) -> List[List[str]]:
"""Validate ``custom_groups``.
Parameters
----------
value : List[List[str]]
The value to be validated.
Returns
-------
List[List[str]]
The validated list of custom functional groups.
"""
for custom_group in value:
if len(custom_group) != 2:
_errmsg = (
"Each custom functional group must be a list of length 2, with the first entry "
"being the name of the functional group and the second entry being the SMARTS "
"pattern defining the group"
)
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateBonafideOxidationState(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the oxidation state feature.
Attributes
----------
en_scale : StrictStr
The name of the electronegativity scale to be used for the oxidation state calculation.
"""
en_scale: StrictStr
[docs]
@field_validator("en_scale")
@classmethod
def validate_en_scale(cls, value: str) -> str:
"""Validate ``en_scale``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated electronegativity scale.
"""
if value not in ELECTRONEGATIVITY_EN_SCALES:
_errmsg = f"Input must be one of {ELECTRONEGATIVITY_EN_SCALES}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateBonafideSymmetry(BaseModel):
"""Validate the configuration settings for the symmetry feature.
For further details, please refer to the RDKit documentation
(https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html, last accessed on
14.10.2025).
Attributes
----------
reduce_to_canonical : StrictBool
Whether to calculate features only for the first of the symmetry-equivalent atoms in the
canonical rank atom list.
includeChirality : StrictBool
Whether to include chirality information when calculating the symmetry feature.
includeIsotopes : StrictBool
Whether to consider isotopes when calculating the symmetry feature.
includeAtomMaps : StrictBool
Whether to include atom mapping numbers when calculating the symmetry feature.
includeChiralPresence : StrictBool
Whether to include the presence of chiral centers when calculating the symmetry feature.
"""
reduce_to_canonical: StrictBool
includeChirality: StrictBool
includeIsotopes: StrictBool
includeAtomMaps: StrictBool
includeChiralPresence: StrictBool
[docs]
class ValidateDbstep(BaseModel):
"""Validate the configuration settings for the dbstep features.
For further details, please refer to the dbstep repository (https://github.com/patonlab/DBSTEP,
last accessed on 05.09.2025).
Attributes
----------
r : StrictFloat
The cutoff radius, must be a positive float.
scan : List[StrictFloat]
A list of three values defining the scan range and step size.
exclude : List[StrictInt]
A list of atom indices to be excluded from the feature calculation.
noH : StrictBool
Whether to exclude hydrogen atoms from the feature calculation.
addmetals : StrictBool
Whether to include metal atoms in the feature calculation.
grid : StrictFloat
The grid point spacing, must be a positive float.
vshell : StrictBool
Whether to calculate the buried volume of a hollow sphere.
scalevdw : StrictFloat
The scaling factor for van-der-Waals radii, must be a positive float.
"""
r: StrictFloat = Field(gt=0)
scan: List[StrictFloat]
exclude: List[StrictInt]
noH: StrictBool
addmetals: StrictBool
grid: StrictFloat = Field(gt=0)
vshell: StrictBool
scalevdw: StrictFloat = Field(gt=0)
[docs]
@field_validator("scan")
@classmethod
def validate_scan(cls, value: List[float]) -> Union[str, bool]:
"""Validate ``scan``.
Parameters
----------
value : List[float]
The value to be validated.
Returns
-------
Union[str, bool]
The validated and formatted scan range and step size, or ``False`` if the input is
empty.
"""
if len(value) != 0:
if len(value) != 3:
_errmsg = "Input must contain exactly 3 values if not left empty"
raise PydanticCustomError("", _errmsg)
value_str = ":".join([str(val) for val in value])
return value_str
return False
[docs]
@field_validator("exclude")
@classmethod
def validate_exclude(cls, value: List[int]) -> Union[str, bool]:
"""Validate ``exclude``.
Parameters
----------
value : List[int]
The value to be validated.
Returns
-------
Union[str, bool]
The validated and formatted list of atom indices to be excluded, or ``False`` if the
input is empty.
"""
if value == []:
return False
value_str = ",".join([str(val) for val in value])
return value_str
[docs]
class ValidateDscribeAcsf(_ValidateSpeciesMixin, BaseModel):
"""Validate the configuration settings for the dscribe atom-centered symmetry functions
feature.
For further details, please refer to the dscribe documentation
(https://singroup.github.io/dscribe/0.3.x/index.html, last accessed on 05.09.2025).
Attributes
----------
r_cut : StrictFloat
The smooth cutoff radius, must be a positive float.
species : List[StrictStr]
A list of chemical element symbols to be considered in the feature calculation.
g2_params : List[List[StrictFloat]]
The parameters for the G2 symmetry functions.
g3_params : List[StrictFloat]
The parameters for the G3 symmetry functions.
g4_params : List[List[StrictFloat]]
The parameters for the G4 symmetry functions.
g5_params : List[List[StrictFloat]]
The parameters for the G5 symmetry functions.
"""
r_cut: StrictFloat = Field(gt=0)
species: List[StrictStr]
g2_params: Any
g3_params: Any
g4_params: Any
g5_params: Any
[docs]
@field_validator("g2_params", "g3_params", "g4_params", "g5_params")
@classmethod
def validate_params(cls, value: Any, info: ValidationInfo) -> Optional[Any]:
"""Validate ``g2_params``, ``g3_params``, ``g4_params``, and ``g5_params``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
Any
The validated value, either ``None`` or the value specified by the user.
"""
if type(value) == str:
if value.strip().lower() == "none":
return None
else:
_errmsg = "Input must be of type list or 'none'"
raise PydanticCustomError("", _errmsg)
if type(value) != list:
_errmsg = "Input must be of type list or 'none'"
raise PydanticCustomError("", _errmsg)
if len(value) == 0:
_errmsg = "Input must not be an empty list"
raise PydanticCustomError("", _errmsg)
for idx1, el in enumerate(value):
if type(el) != list and info.field_name != "g3_params":
_errmsg = (
f"Each entry in the list must be of type list, but obtained {type(el).__name__}"
)
raise PydanticCustomError("", _errmsg)
if info.field_name == "g3_params":
try:
if type(el) in [int, float]:
_f = float(el)
else:
raise Exception()
except Exception:
_errmsg = (
"Each entry in the list must be of type int or float, but obtained "
f"{type(el).__name__}"
)
raise PydanticCustomError("", _errmsg)
else:
value[idx1] = _f
if info.field_name == "g2_params":
if len(el) != 2:
_errmsg = "Each inner list must contain exactly 2 entries"
raise PydanticCustomError("", _errmsg)
if info.field_name in ["g4_params", "g5_params"]:
if len(el) != 3:
_errmsg = "Each inner list must contain exactly 3 entries"
raise PydanticCustomError("", _errmsg)
if info.field_name in ["g2_params", "g4_params", "g5_params"]:
for idx2, val in enumerate(el):
try:
if type(val) in [int, float]:
_f = float(val)
else:
raise Exception()
except Exception:
_errmsg = (
"Each entry in the inner list must be of type int or float, but "
f"obtained {type(val).__name__}"
)
raise PydanticCustomError("", _errmsg)
else:
value[idx1][idx2] = _f
return value
[docs]
class ValidateDscribeCoulombMatrix(BaseModel):
"""Validate the configuration settings for the dscribe Coulomb matrix-based feature.
For further details, please refer to the dscribe documentation
(https://singroup.github.io/dscribe/0.3.x/index.html, last accessed on 05.09.2025).
Attributes
----------
scaling_exponent : StrictFloat
The exponent used for the distance scaling.
"""
scaling_exponent: StrictFloat
[docs]
class ValidateDscribeLmbtr(_StandardizeStrMixin, _ValidateSpeciesMixin, BaseModel):
"""Validate the configuration settings for the dscribe local many-body tensor representation
feature.
For further details, please refer to the dscribe documentation
(https://singroup.github.io/dscribe/0.3.x/index.html, last accessed on 05.09.2025).
Attributes
----------
species : List[StrictStr]
A list of chemical element symbols to be considered in the feature calculation.
geometry_function : StrictStr
The name of the geometry function.
grid_min : StrictFloat
The minimum value of the grid, must be a float.
grid_max : StrictFloat
The maximum value of the grid, must be a float.
grid_sigma : StrictFloat
The width of the Gaussian functions, must be a positive float.
grid_n : StrictFloat
The number of grid points, must be a non-negative integer.
weighting_function : StrictStr
The name of the weighting function.
weighting_scale : StrictFloat
The scaling factor of the weighting function, must be a float.
weighting_threshold : StrictFloat
The threshold of the weighting function, must be a positive float.
normalize_gaussians : StrictBool
Whether to normalize the Gaussians to an area of 1.
normalization : StrictStr
The normalization method.
"""
species: List[StrictStr]
geometry_function: StrictStr
grid_min: StrictFloat
grid_max: StrictFloat
grid_sigma: StrictFloat
grid_n: StrictFloat = Field(ge=0)
weighting_function: StrictStr
weighting_scale: StrictFloat
weighting_threshold: StrictFloat = Field(gt=0)
normalize_gaussians: StrictBool
normalization: StrictStr
[docs]
@field_validator("geometry_function")
@classmethod
def validate_geometry_function(cls, value: str) -> str:
"""Validate ``geometry_function``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated geometry function.
"""
if value not in GEOMETRY_FUNCTION_METHODS_DSCRIBE_LMBTR:
_errmsg = f"Input must be one of {GEOMETRY_FUNCTION_METHODS_DSCRIBE_LMBTR}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("weighting_function")
@classmethod
def validate_weighting_function(cls, value: str) -> str:
"""Validate ``weighting_function``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated weighting function.
"""
if value not in WEIGHTING_FUNCTION_METHODS_DSCRIBE_LMBTR:
_errmsg = f"Input must be one of {WEIGHTING_FUNCTION_METHODS_DSCRIBE_LMBTR}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("normalization")
@classmethod
def validate_normalization(cls, value: str) -> str:
"""Validate ``normalization``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated normalization method.
"""
if value not in NORMALIZATION_METHODS_DSCRIBE_LMBTR:
_errmsg = f"Input must be one of {NORMALIZATION_METHODS_DSCRIBE_LMBTR}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateDscribeSoap(_StandardizeStrMixin, _ValidateSpeciesMixin, BaseModel):
"""Validate the configuration settings for the dscribe smooth overlap of atomic positions
feature.
For further details, please refer to the dscribe documentation
(https://singroup.github.io/dscribe/0.3.x/index.html, last accessed on 05.09.2025).
Attributes
----------
r_cut : StrictFloat
The cutoff to define the local environment, must be a positive float.
n_max : StrictInt
The number of radial basis functions, must be a positive integer.
l_max : StrictInt
The maximum degree of spherical harmonics, must be a non-negative integer.
species : List[StrictStr]
A list of chemical element symbols to be considered in the feature calculation.
sigma : StrictFloat
The width of the Gaussian functions, must be a positive float.
rbf : StrictStr
The radial basis function.
average : StrictStr
The averaging method.
"""
r_cut: StrictFloat
n_max: StrictInt = Field(gt=0)
l_max: StrictInt = Field(ge=0)
species: List[StrictStr]
sigma: StrictFloat = Field(gt=0)
rbf: StrictStr
average: StrictStr
[docs]
@field_validator("rbf")
@classmethod
def validate_rbf(cls, value: str) -> str:
"""Validate ``rbf``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated radial basis function.
"""
if value not in RBF_METHODS_DSCRIBE_SOAP:
_errmsg = f"Input must be one of {RBF_METHODS_DSCRIBE_SOAP}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("average")
@classmethod
def validate_average(cls, value: str) -> str:
"""Validate ``average``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated averaging method.
"""
if value not in AVERAGE_METHODS_DSCRIBE_SOAP:
_errmsg = f"Input must be one of {AVERAGE_METHODS_DSCRIBE_SOAP}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateKallisto(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Kallisto features.
For further details, please refer to the Kallisto documentation
(https://ehjc.gitbook.io/kallisto/, last accessed on 05.09.2025).
Attributes
----------
cntype : StrictStr
The name of the coordination number calculation method.
size : List[StrictInt]
The definition of the proximity shell.
vdwtype : StrictStr
The name of the method to define reference van-der-Waals radii.
angstrom : StrictBool
Whether to calculate van-der-Waals radii in Angstrom.
"""
cntype: StrictStr
size: List[StrictInt]
vdwtype: StrictStr
angstrom: StrictBool
[docs]
@field_validator("cntype")
@classmethod
def validate_cntype(cls, value: str) -> str:
"""Validate ``cntype``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated coordination number method.
"""
if value not in CNTYPE_METHODS_KALLISTO:
_errmsg = f"Input must be one of {CNTYPE_METHODS_KALLISTO}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("size", mode="before")
@classmethod
def validate_size_before(cls, value: Any) -> List[int]:
"""Validate ``size`` before type validation.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
List[int]
The validated definition of the proximity shell.
"""
_errmsg = "Input must be a list consisting exactly of two integer numbers"
if type(value) != list:
raise PydanticCustomError("", _errmsg)
if len(value) != 2:
raise PydanticCustomError("", _errmsg)
for v in value:
if type(v) != int:
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("size", mode="after")
@classmethod
def validate_size_after(cls, value: List[int]) -> Tuple[str, str]:
"""Validate ``size`` after type validation.
Parameters
----------
value : List[int]
The value to be validated.
Returns
-------
Tuple[str, str]
The validated definition of the proximity shell.
"""
if value[0] >= value[1]:
_errmsg = "Input value at index 0 must be smaller than input value at index 1"
raise PydanticCustomError("", _errmsg)
return (str(value[0]), str(value[1]))
[docs]
@field_validator("vdwtype")
@classmethod
def validate_vdwtype(cls, value: str) -> str:
"""Validate ``vdwtype``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated van-der-Waals radius method.
"""
if value not in VDWTYPE_METHODS_KALLISTO:
_errmsg = f"Input must be one of {VDWTYPE_METHODS_KALLISTO}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMendeleev(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Mendeleev features.
For further details, please refer to the Mendeleev documentation
(https://mendeleev.readthedocs.io/en/stable/, last accessed on 05.09.2025).
Attributes
----------
method : StrictStr
The method to use for the effective nuclear charge calculation.
alle : StrictBool
Whether to include all valence electrons in the effective nuclear charge calculation.
"""
method: StrictStr
alle: StrictBool
[docs]
@field_validator("method")
@classmethod
def validate_method(cls, value: str) -> str:
"""Validate ``method``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated method.
"""
if value not in METHOD_METHODS_MENDELEEV:
_errmsg = f"Input must be one of {METHOD_METHODS_MENDELEEV}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMorfeusBuriedVolume(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Morfeus buried volume features.
For further details, please refer to the Morfeus documentation
(https://digital-chemistry-laboratory.github.io/morfeus/index.html, last accessed on
05.09.2025).
Attributes
----------
excluded_atoms : List[StrictInt]
A list of atom indices to be excluded from the feature calculation.
radii : List[StrictFloat]
A list of atomic radii to be used for the feature calculation.
include_hs : StrictBool
Whether to include hydrogen atoms.
radius : StrictFloat
The radius of the reference sphere around the specified atom, must be a positive float.
radii_type : StrictStr
The name of the atomic radius scheme to be used for the feature calculation.
radii_scale : StrictFloat
A scaling factor for the atomic radii, must be a positive float.
density : StrictFloat
The density of the grid points on the molecular surface, must be a positive float.
z_axis_atoms : List[StrictInt]
A list of atom indices defining the z-axis.
xz_plane_atoms : List[StrictInt]
A list of atom indices defining the xz-plane.
distal_volume_method : StrictStr
The method to be used for the distal volume calculation.
distal_volume_sasa_density : StrictFloat
The density of the grid points for the distal volume solvent-accessible surface area
calculation, must be a positive float.
"""
excluded_atoms: List[StrictInt]
radii: List[StrictFloat]
include_hs: StrictBool
radius: StrictFloat = Field(gt=0)
radii_type: StrictStr
radii_scale: StrictFloat = Field(gt=0)
density: StrictFloat = Field(gt=0)
z_axis_atoms: List[StrictInt]
xz_plane_atoms: List[StrictInt]
distal_volume_method: StrictStr
distal_volume_sasa_density: StrictFloat = Field(gt=0)
[docs]
@field_validator("radii_type")
@classmethod
def validate_radii_type(cls, value: str) -> str:
"""Validate ``radii_type``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated radius type.
"""
if value not in RADII_TYPE_METHODS_MORFEUS_BV_CONE_SOLID_ANGLE:
_errmsg = f"Input must be one of {RADII_TYPE_METHODS_MORFEUS_BV_CONE_SOLID_ANGLE}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("distal_volume_method")
@classmethod
def validate_distal_volume_method(cls, value: str) -> str:
"""Validate ``distal_volume_method``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated distal volume method.
"""
if value not in DISTAL_VOLUME_METHODS_MORFEUS_BV:
_errmsg = f"Input must be one of {DISTAL_VOLUME_METHODS_MORFEUS_BV}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMorfeusConeAndSolidAngle(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Morfeus cone and solid angle features.
For further details, please refer to the Morfeus documentation
(https://digital-chemistry-laboratory.github.io/morfeus/index.html, last accessed on
05.09.2025).
Attributes
----------
radii : List[StrictFloat]
A list of atomic radii to be used for the feature calculation.
radii_type : StrictStr
The name of the atomic radius scheme to be used for the feature calculation.
density : StrictFloat
The density of the grid points on the molecular surface, must be a positive float. Only
relevant for the solid angle calculation.
"""
radii: List[StrictFloat]
radii_type: StrictStr
density: StrictFloat = Field(gt=0)
[docs]
@field_validator("radii_type")
@classmethod
def validate_radii_type(cls, value: str) -> str:
"""Validate ``radii_type``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated radius type.
"""
if value not in RADII_TYPE_METHODS_MORFEUS_BV_CONE_SOLID_ANGLE:
_errmsg = f"Input must be one of {RADII_TYPE_METHODS_MORFEUS_BV_CONE_SOLID_ANGLE}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMorfeusDispersion(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Morfeus dispersion features.
For further details, please refer to the Morfeus documentation
(https://digital-chemistry-laboratory.github.io/morfeus/index.html, last accessed on
05.09.2025).
Attributes
----------
radii : List[StrictFloat]
A list of atomic radii to be used for the feature calculation.
radii_type : StrictStr
The name of the atomic radius scheme to be used for the feature calculation.
density : StrictFloat
The density of the grid points on the molecular surface, must be a positive float.
excluded_atoms : List[StrictInt]
A list of atom indices to be excluded from the feature calculation.
included_atoms : List[StrictInt]
A list of atom indices to be included in the feature calculation.
"""
radii: List[StrictFloat]
radii_type: StrictStr
density: StrictFloat = Field(gt=0)
excluded_atoms: List[StrictInt]
included_atoms: List[StrictInt]
[docs]
@field_validator("radii_type")
@classmethod
def validate_radii_type(cls, value: str) -> str:
"""Validate ``radii_type``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated radius type.
"""
if value not in RADII_TYPE_METHODS_MORFEUS_DISPERSION:
_errmsg = f"Input must be one of {RADII_TYPE_METHODS_MORFEUS_DISPERSION}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMorfeusLocalForce(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Morfeus local force features.
For further details, please refer to the Morfeus documentation
(https://digital-chemistry-laboratory.github.io/morfeus/index.html, last accessed on
05.09.2025).
Attributes
----------
method
project_imag
imag_cutoff
save_hessian
"""
method: StrictStr
project_imag: StrictBool
imag_cutoff: StrictFloat = Field(gt=0)
save_hessian: StrictBool
[docs]
@field_validator("method")
@classmethod
def validate_method(cls, value: str) -> str:
"""Validate ``method``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated method.
"""
if value not in METHODS_MORFEUS_LOCAL_FORCE:
_errmsg = f"Input must be one of {METHODS_MORFEUS_LOCAL_FORCE}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMorfeusPyramidalization(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Morfeus pyramidalization features.
For further details, please refer to the Morfeus documentation
(https://digital-chemistry-laboratory.github.io/morfeus/index.html, last accessed on
05.09.2025).
Attributes
----------
radii : List[StrictFloat]
A list of atomic radii to be used for the feature calculation.
excluded_atoms : List[StrictInt]
A list of atom indices to be excluded from the feature calculation.
method : StrictStr
The name of the pyramidalization calculation method.
scale_factor : StrictFloat
A scaling factor for determining connectivity.
"""
radii: List[StrictFloat]
excluded_atoms: List[StrictInt]
method: StrictStr
scale_factor: StrictFloat = Field(gt=0)
[docs]
@field_validator("method")
@classmethod
def validate_method(cls, value: str) -> str:
"""Validate ``method``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated method to calculate the pyramidalization.
"""
if value not in PYRAMIDALIZATION_CALCULATION_METHODS_MORFEUS_PYRAMIDALIZATION:
_errmsg = f"Input must be one of {PYRAMIDALIZATION_CALCULATION_METHODS_MORFEUS_PYRAMIDALIZATION}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMorfeusSasa(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Morfeus solvent-accessible surface area
features.
For further details, please refer to the Morfeus documentation
(https://digital-chemistry-laboratory.github.io/morfeus/index.html, last accessed on
05.09.2025).
Attributes
----------
radii : List[StrictFloat]
A list of atomic radii to be used for the SASA calculation.
radii_type : StrictStr
The name of the atomic radius scheme to be used for the SASA calculation.
probe_radius : StrictFloat
The radius of the probe sphere, must be a positive float.
density : StrictFloat
The density of the grid points on the molecular surface, must be a positive float.
"""
radii: List[StrictFloat]
radii_type: StrictStr
probe_radius: StrictFloat = Field(gt=0)
density: StrictFloat = Field(gt=0)
[docs]
@field_validator("radii_type")
@classmethod
def validate_radii_type(cls, value: str) -> str:
"""Validate ``radii_type``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the formatted and validated radius type.
"""
if value not in RADII_TYPE_METHODS_MORFEUS_SASA:
_errmsg = f"Input must be one of {RADII_TYPE_METHODS_MORFEUS_SASA}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMultiwfnRootData(BaseModel):
"""Validate the configuration settings for Multiwfn's root data.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
[docs]
class ValidateMultiwfnBondAnalysis(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Multiwfn bond analysis features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
ibsi_grid : StrictStr
The quality of the grid for the calculation of the intrinsic bond strength index.
connectivity_index_threshold : StrictFloat
The threshold for considering atom connectivity, must be a positive float.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
ibis_igm_type: StrictStr
ibsi_grid: StrictStr
connectivity_index_threshold: StrictFloat = Field(gt=0)
[docs]
@field_validator("ibis_igm_type")
@classmethod
def validate_ibis_igm_type(cls, value: str) -> str:
"""Validate ``ibis_igm_type``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The name of the selected IGM type
"""
if value not in IGM_TYPES_MULTIWFN_BOND_ANALYSIS:
_errmsg = f"Input must be one of {IGM_TYPES_MULTIWFN_BOND_ANALYSIS}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("ibsi_grid")
@classmethod
def validate_ibsi_grid(cls, value: Any) -> int:
"""Validate ``ibsi_grid``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected grid quality.
"""
_keys = list(IBIS_GRID_METHODS_MULTIWFN_BOND_ANALYSIS.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return IBIS_GRID_METHODS_MULTIWFN_BOND_ANALYSIS[value]
[docs]
class ValidateMultiwfnCdft(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Multiwfn conceptual DFT features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
iterable_option : List[StrictStr]
A list of population analysis schemes to be used for the calculation of the conceptual DFT
features.
ow_delta : StrictFloat
The delta parameter for the calculation of orbital-weighted Fukui indices, must be a
positive float.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
iterable_option: List[StrictStr]
ow_delta: StrictFloat = Field(gt=0)
[docs]
@field_validator("iterable_option", mode="before")
@classmethod
def validate_iterable_option_before(cls, value: Any) -> Any:
"""Validate ``iterable_option`` before type validation.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
Any
The pre-validated iterable options.
"""
if any([value is None, value == "", value == []]):
_errmsg = "Input must not be empty"
raise PydanticCustomError("", _errmsg)
elif type(value) == str:
value = [value.strip().lower()]
elif type(value) == list:
try:
value = [str(v).strip().lower() for v in value]
except:
pass
return value
[docs]
@field_validator("iterable_option", mode="after")
@classmethod
def validate_iterable_option_after(cls, value: List[str]) -> List[str]:
"""Validate ``iterable_option`` after type validation.
Parameters
----------
value : List[str]
The value to be validated.
Returns
-------
List[str]
The validated iterable.
"""
for val in value:
if val not in ITERABLE_OPTIONS_MULTIWFN_CDFT:
_errmsg = f"Input must only contain {ITERABLE_OPTIONS_MULTIWFN_CDFT}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMultiwfnFuzzy(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Multiwfn fuzzy space analysis features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
integration_grid : StrictStr
The name of the integration grid method.
exclude_atoms : List[StrictInt]
A list of atom indices to be excluded from the feature calculation.
n_iterations_becke_partition : StrictInt
The number of iterations for the Becke partitioning, must be a positive integer.
radius_becke_partition : StrictStr
The name of the method for the radius in Becke partitioning.
partitioning_scheme : StrictStr
The name of the partitioning scheme.
real_space_function : StrictStr
The name of the real space function to be used.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
integration_grid: StrictStr
exclude_atoms: List[StrictInt]
n_iterations_becke_partition: StrictInt = Field(gt=0)
radius_becke_partition: StrictStr
partitioning_scheme: StrictStr
real_space_function: StrictStr
[docs]
@field_validator("integration_grid")
@classmethod
def validate_integration_grid(cls, value: Any) -> int:
"""Validate ``integration_grid``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected integration grid method.
"""
_keys = list(INTEGRATION_GRID_METHODS_MULTIWFN_FUZZY.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return INTEGRATION_GRID_METHODS_MULTIWFN_FUZZY[value]
[docs]
@field_validator("radius_becke_partition")
@classmethod
def validate_radius_becke_partition(cls, value: Any) -> int:
"""Validate ``radius_becke_partition``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected radius method for Becke partitioning.
"""
_keys = list(RADIUS_BECKE_PARTITION_METHODS_MULTIWFN_FUZZY.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return RADIUS_BECKE_PARTITION_METHODS_MULTIWFN_FUZZY[value]
[docs]
@field_validator("partitioning_scheme")
@classmethod
def validate_partitioning_scheme(cls, value: Any) -> int:
"""Validate ``partitioning_scheme``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected partitioning scheme.
"""
_keys = list(PARTITION_SCHEME_METHODS_MULTIWFN_FUZZY.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return PARTITION_SCHEME_METHODS_MULTIWFN_FUZZY[value]
[docs]
@field_validator("real_space_function")
@classmethod
def validate_real_space_function(cls, value: Any) -> int:
"""Validate ``real_space_function``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected real space function.
"""
_keys = list(REAL_SPACE_FUNCTIONS_MULTIWFN.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return REAL_SPACE_FUNCTIONS_MULTIWFN[value]
[docs]
class ValidateMultiwfnMisc(_StandardizeStrMixin, BaseModel):
"""Validate the miscellaneous configuration settings for the Multiwfn features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
[docs]
class ValidateMultiwfnOrbital(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Multiwfn orbital features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
homo_minus : StrictInt
The number of orbitals to go below the HOMO, must be great than or equal to zero.
lumo_plus : StrictInt
The number of orbitals to go above the LUMO, must be great than or equal to zero.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
homo_minus: StrictInt = Field(ge=0)
lumo_plus: StrictInt = Field(ge=0)
[docs]
class ValidateMultiwfnPopulation(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Multiwfn population analysis features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
n_iterations_becke_partition : StrictInt
The number of iterations for the Becke partitioning, must be a positive integer.
radius_becke_partition : StrictStr
The name of the method for the radius in Becke partitioning.
grid_spacing_chelpg : StrictFloat
The grid size for CHELPG calculations.
box_extension_chelpg : StrictFloat
The box extension size for CHELPG calculations.
esp_type : StrictStr
The name of the ESP type for various population analysis methods.
atomic_radii : StrictStr
The name of the atomic radii definition used in various population analysis methods.
exclude_atoms : List[StrictInt]
A list of atom indices to be excluded from the feature calculation.
fitting_points_settings_merz_kollmann : List[StrictFloat]
A list with the number and the scale factors required for calculating the Merz-Kollmann
fitting points.
n_points_angstrom2_merz_kollmann : StrictFloat
The number of fitting points per square Angstrom for Merz-Kollmann fitting.
eem_parameters : StrictStr
The name of the parameter set for calculating EEM charges.
tightness_resp : StrictFloat
The tightness parameter for RESP calculations.
restraint_one_stage_resp : StrictFloat
The restraint strength for one-stage RESP calculations.
restraint_stage1_resp : StrictFloat
The restraint strength for stage 1 of two-stage RESP calculations.
restraint_stage2_resp : StrictFloat
The restraint strength for stage 2 of two-stage RESP calculations.
n_iterations_resp : StrictInt
The maximum number of iterations for RESP calculations.
convergence_threshold_resp : StrictFloat
The convergence threshold for RESP calculations.
ch_equivalence_constraint_resp : StrictBool
Whether to apply charge equivalence constraints due to chemical equivalence in RESP
calculation.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
n_iterations_becke_partition: StrictInt = Field(gt=0)
radius_becke_partition: StrictStr
grid_spacing_chelpg: StrictFloat = Field(gt=0)
box_extension_chelpg: StrictFloat = Field(gt=0)
esp_type: StrictStr
atomic_radii: StrictStr
exclude_atoms: List[StrictInt]
fitting_points_settings_merz_kollmann: List[StrictFloat]
n_points_angstrom2_merz_kollmann: StrictFloat = Field(gt=0)
eem_parameters: StrictStr
tightness_resp: StrictFloat = Field(gt=0)
restraint_one_stage_resp: StrictFloat = Field(gt=0)
restraint_stage1_resp: StrictFloat = Field(gt=0)
restraint_stage2_resp: StrictFloat = Field(gt=0)
n_iterations_resp: StrictInt = Field(gt=0)
convergence_threshold_resp: StrictFloat = Field(gt=0)
ch_equivalence_constraint_resp: StrictBool
[docs]
@field_validator("radius_becke_partition")
@classmethod
def validate_radius_becke_partition(cls, value: Any) -> int:
"""Validate ``radius_becke_partition``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected radius method for Becke partitioning.
"""
_keys = list(RADIUS_BECKE_PARTITION_METHODS_MULTIWFN_POPULATION.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return RADIUS_BECKE_PARTITION_METHODS_MULTIWFN_POPULATION[value]
[docs]
@field_validator("esp_type")
@classmethod
def validate_esp_type(cls, value: Any) -> int:
"""Validate ``esp_type``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected ESP type.
"""
_keys = list(ESP_TYPE_MULTIWFN_POPULATION.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return ESP_TYPE_MULTIWFN_POPULATION[value]
[docs]
@field_validator("atomic_radii")
@classmethod
def validate_atomic_radii(cls, value: Any) -> int:
"""Validate ``atomic_radii``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the radius type.
"""
_keys = list(ATOMIC_RADII_MULTIWFN_POPULATION.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return ATOMIC_RADII_MULTIWFN_POPULATION[value]
[docs]
@field_validator("eem_parameters")
@classmethod
def validate_eem_parameters(cls, value: Any) -> int:
"""Validate ``eem_parameters``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the EEM parameter set.
"""
_keys = list(EEM_PARAMETERS_MULTIWFN_POPULATION.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return EEM_PARAMETERS_MULTIWFN_POPULATION[value]
[docs]
@field_validator("fitting_points_settings_merz_kollmann")
@classmethod
def validate_fitting_points_settings_merz_kollmann(cls, value: Any) -> List[float]:
"""Validate ``fitting_points_settings_merz_kollmann``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
List[float]
The validated number and scale factors of the layers of MK fitting points.
"""
for v in value:
if v <= 0:
_errmsg = "All input values must be greater than 0"
raise PydanticCustomError("", _errmsg)
return [float(v) for v in value]
[docs]
class ValidateMultiwfnSurface(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Multiwfn surface features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
surface_definition : StrictStr
The scheme to define the molecular surface.
surface_iso_value : StrictFloat
The iso value for defining the surface, must be a positive float.
grid_point_spacing : StrictFloat
The scaling parameter for the grid to generate the surface, must be a positive float.
length_scale : StrictFloat
The length scale for surface generation, must be a positive float
orbital_overlap_edr_option : List[Any]
The total number, start, and increment in EDR exponents.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
surface_definition: StrictStr
surface_iso_value: StrictFloat = Field(gt=0)
grid_point_spacing: StrictFloat = Field(gt=0)
length_scale: StrictFloat = Field(gt=0)
orbital_overlap_edr_option: List[Any]
[docs]
@field_validator("surface_definition")
@classmethod
def validate_surface_definition(cls, value: Any) -> int:
"""Validate ``surface_definition``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
int
The index of the selected surface definition.
"""
_keys = list(REAL_SPACE_FUNCTIONS_MULTIWFN.keys())
if value not in _keys:
_errmsg = f"Input must be one of {_keys}"
raise PydanticCustomError("", _errmsg)
return REAL_SPACE_FUNCTIONS_MULTIWFN[value]
[docs]
@field_validator("orbital_overlap_edr_option")
@classmethod
def validate_orbital_overlap_edr_option(cls, value: List[Any]) -> List[Union[int, float]]:
"""Validate ``orbital_overlap_edr_option``.
Parameters
----------
value : List[Any]
The value to be validated.
Returns
-------
List[Union[int, float]]
The validated list of the EDR function data.
"""
if len(value) != 3:
_errmsg = "Input must exactly contain 3 values"
raise PydanticCustomError("", _errmsg)
if type(value[0]) != int:
_errmsg = "Input must contain an integer value at index 0"
raise PydanticCustomError("", _errmsg)
if value[0] < 1:
_errmsg = "Input at index 0 must be greater than 0"
raise PydanticCustomError("", _errmsg)
if value[0] > 50:
_errmsg = "Input at index 0 must not be greater than 50"
raise PydanticCustomError("", _errmsg)
if type(value[1]) != int and type(value[1]) != float:
_errmsg = "Input must contain a number at index 1"
raise PydanticCustomError("", _errmsg)
if type(value[2]) != int and type(value[2]) != float:
_errmsg = "Input must contain a number at index 2"
raise PydanticCustomError("", _errmsg)
if value[2] <= 1.01:
_errmsg = "Input at index 2 must be greater than 1.01"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateMultiwfnTopology(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for the Multiwfn topology features.
For further details, please refer to the Multiwfn manual (http://sobereva.com/multiwfn/, last
accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
NUM_THREADS : StrictInt
The number of threads, must be a positive integer.
step_size : StrictFloat
The step size, must be a positive float.
neighbor_distance_cutoff : StrictFloat
The neighbor distance cutoff, must be a positive float.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
step_size: StrictFloat = Field(gt=0)
neighbor_distance_cutoff: StrictFloat = Field(gt=0)
[docs]
class ValidatePsi4(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for Psi4.
For further details, please refer to the Psi4 documentation
(https://psicode.org/psi4manual/master/index.html, last accessed on 05.09.2025).
Attributes
----------
method : StrictStr
The quantum chemistry method.
basis : str
The basis set.
maxiter : int
The maximum number of SCF iterations.
memory : str
The amount of memory, e.g., "2 gb".
num_threads : int
The number of threads.
solvent : str
The name of the solvent.
solvent_model_solver : str
The name of the solver for the solvent model.
"""
method: StrictStr
basis: StrictStr
maxiter: StrictInt = Field(gt=0)
memory: StrictStr
num_threads: StrictInt = Field(gt=0)
solvent: StrictStr
solvent_model_solver: StrictStr
[docs]
@field_validator("memory")
@classmethod
def validate_memory(cls, value: str) -> str:
"""Validate ``memory``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The validated memory string.
"""
_errmsg = "Input must be a string in the format '<number> <unit>', e.g., '2 gb'"
splitted = value.split()
if len(splitted) != 2:
raise PydanticCustomError("", _errmsg)
try:
int(splitted[0])
except ValueError:
raise PydanticCustomError("", _errmsg)
return value.strip()
[docs]
@field_validator("solvent")
@classmethod
def validate_solvent(cls, value: str) -> str:
"""Validate ``solvent``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The validated solvent string.
"""
if value not in SOLVENTS_PSI4:
_errmsg = f"Input must be one of {SOLVENTS_PSI4}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("solvent_model_solver")
@classmethod
def validate_solvent_model_solver(cls, value: str) -> str:
"""Validate ``solvent_model_solver``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The validated solver string string.
"""
if value not in SOLVENT_MODEL_SOLVERS_PSI4:
_errmsg = f"Input must be one of {SOLVENT_MODEL_SOLVERS_PSI4}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateRdkitFingerprint(BaseModel):
"""Validate the configuration settings for the RDKit fingerprint features.
For further details, please refer to the RDKit documentation
(https://www.rdkit.org/docs/source/rdkit.Chem.rdFingerprintGenerator.html, last accessed on
05.09.2025).
Attributes
----------
radius : StrictInt
The radius of the fingerprint, must be a non-negative integer.
countSimulation : StrictBool
Whether to use count simulation during fingerprint generation.
includeChirality : StrictBool
Whether to include chirality information in the fingerprint.
useBondTypes : StrictBool
Whether to consider bond types in the fingerprint.
countBounds : Any
The boundaries for count simulation.
fpSize : StrictInt
The size of the fingerprint, must be a positive integer.
torsionAtomCount : StrictInt
The number of atoms to include in the torsions.
minDistance : StrictInt
The minimum distance between two atoms, must be a non-negative integer.
maxDistance : StrictInt
The maximum distance between two atoms, must be a non-negative integer.
use2D : StrictBool
Whether to use the 2D distance matrix during fingerprint generation.
minPath : StrictInt
The minimum path length as number of bonds, must be a non-negative integer.
maxPath : StrictInt
The maximum path length as number of bonds, must be a non-negative integer.
useHs : StrictBool
Whether to include hydrogen atoms in the fingerprint.
branchedPaths : StrictBool
Whether to consider branched paths in the fingerprint.
useBondOrder : StrictBool
Whether to consider bond order in the fingerprint.
numBitsPerFeature : StrictInt
The number of bits to use per feature, must be a positive integer.
"""
radius: StrictInt = Field(ge=0)
countSimulation: StrictBool
includeChirality: StrictBool
useBondTypes: StrictBool
countBounds: Any
fpSize: StrictInt = Field(gt=0)
torsionAtomCount: StrictInt = Field(ge=0)
minDistance: StrictInt = Field(ge=0)
maxDistance: StrictInt = Field(ge=0)
use2D: StrictBool
minPath: StrictInt = Field(ge=0)
maxPath: StrictInt = Field(ge=0)
useHs: StrictBool
branchedPaths: StrictBool
useBondOrder: StrictBool
numBitsPerFeature: StrictInt = Field(gt=0)
[docs]
@field_validator("countBounds")
@classmethod
def validate_count_bounds(cls, value: Any) -> Any:
"""Validate ``countBounds``.
Parameters
----------
value : Any
The value to be validated.
Returns
-------
Any
The validated value, either ``None`` or the original value specified by the user.
"""
if type(value) == str:
if value.strip().lower() == "none":
return None
return value
[docs]
class ValidateXtb(_StandardizeStrMixin, BaseModel):
"""Validate the configuration settings for xtb.
For further details, please refer to the xtb documentation
(https://xtb-docs.readthedocs.io/en/latest/, last accessed on 05.09.2025).
Attributes
----------
OMP_STACKSIZE : StrictStr
The size of the OpenMP stack.
OMP_NUM_THREADS : StrictInt
The number of OpenMP threads, must be a positive integer.
OMP_MAX_ACTIVE_LEVELS : StrictInt
The maximum number of nested active parallel regions, must be a positive integer.
MKL_NUM_THREADS : StrictInt
The number of threads for the Intel Math Kernel Library, must be a positive integer.
XTBHOME : StrictStr
The path to the xtb home directory. If set to "auto", the path is determined automatically.
method : StrictStr
The semi-empirical method to be used.
iterations : StrictInt
The maximum number of SCF iterations, must be a positive integer.
acc : StrictFloat
The accuracy level for the xtb calculation.
etemp : StrictInt
The electronic temperature.
etemp_native : StrictInt
The electronic temperature used for the direct calculation xtb features.
solvent_model : str
The name of the solvent model.
solvent : str
The name of the solvent.
"""
OMP_STACKSIZE: Optional[StrictStr] = Field(default=None)
OMP_NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
OMP_MAX_ACTIVE_LEVELS: Optional[StrictInt] = Field(default=None, gt=0)
MKL_NUM_THREADS: Optional[StrictInt] = Field(default=None, gt=0)
XTBHOME: Optional[StrictStr] = Field(default=None)
method: StrictStr
iterations: StrictInt = Field(gt=0)
acc: StrictFloat = Field(ge=0.0001, le=1000)
etemp: StrictInt = Field(ge=0)
etemp_native: StrictInt = Field(ge=0)
solvent_model: StrictStr
solvent: StrictStr
[docs]
@field_validator("XTBHOME")
@classmethod
def validate_xtb_home(cls, value: str) -> str:
"""Validate ``XTBHOME``.
If set to "auto", the path is determined automatically by pointing to /share/xtb
in the xtb installation directory. If the user-provided path does not exist, the
automatically generated path is used.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The validated XTB home path, either the user-provided path or the automatically
generated one.
"""
_val = value.strip().lower()
_auto_value = os.path.join(
str(os.path.dirname(os.path.dirname(str(shutil.which("xtb"))))), "share", "xtb"
)
if _val == "auto":
return _auto_value
if os.path.exists(value) is False:
return _auto_value
return value
[docs]
@field_validator("method")
@classmethod
def validate_method(cls, value: str) -> str:
"""Validate ``method``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The formatted and validated method string.
"""
if value not in METHODS_XTB:
_errmsg = f"Input must be one of {METHODS_XTB}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("solvent_model")
@classmethod
def validate_solvent_model(cls, value: str) -> str:
"""Validate ``solvent_model``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The formatted and validated solvent model string.
"""
if value not in SOLVENT_MODELS_XTB:
_errmsg = f"Input must be one of {SOLVENT_MODELS_XTB}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
@field_validator("solvent")
@classmethod
def validate_solvent(cls, value: str) -> str:
"""Validate ``solvent``.
Parameters
----------
value : str
The value to be validated.
Returns
-------
str
The formatted and validated solvent string.
"""
if value not in SOLVENTS_XTB:
_errmsg = f"Input must be one of {SOLVENTS_XTB}"
raise PydanticCustomError("", _errmsg)
return value
[docs]
class ValidateDummy(BaseModel):
"""Dummy validator class that does not perform any validation."""
pass
[docs]
def config_data_validator(
config_path: List[str], params: Dict[str, Any], _namespace: Optional[str]
) -> Dict[str, Any]:
"""Validate the configuration settings of a featurizer.
The respective validation class is selected based on the provided configuration path. In case
no validation is needed or implemented, a warning is logged and a dummy validator is called.
Parameters
----------
config_path : List[str]
A list of strings representing the path to the configuration settings in the internal
configuration settings tree.
params : Dict[str, Any]
A dictionary containing the configuration settings to be validated. The keys should match
the attributes of the respective validation data class.
_namespace : Optional[str]
The namespace of the currently handled molecule for logging purposes; ``None`` if no
molecule was read in yet.
Returns
-------
Dict[str, Any]
The validated and formatted configuration settings.
"""
_loc = get_function_or_method_name()
_validators = {
"alfabet": ValidateAlfabet,
"bonafide.autocorrelation": ValidateBonafideAutocorrelation,
"bonafide.constant": ValidateBonafideConstant,
"bonafide.distance": ValidateBonafideDistance,
"bonafide.functional_group": ValidateBonafideFunctionalGroup,
"bonafide.misc": ValidateDummy,
"bonafide.oxidation_state": ValidateBonafideOxidationState,
"bonafide.symmetry": ValidateBonafideSymmetry,
"dbstep": ValidateDbstep,
"dscribe.acsf": ValidateDscribeAcsf,
"dscribe.coulomb_matrix": ValidateDscribeCoulombMatrix,
"dscribe.lmbtr": ValidateDscribeLmbtr,
"dscribe.soap": ValidateDscribeSoap,
"kallisto": ValidateKallisto,
"mendeleev": ValidateMendeleev,
"morfeus.buried_volume": ValidateMorfeusBuriedVolume,
"morfeus.cone_and_solid_angle": ValidateMorfeusConeAndSolidAngle,
"morfeus.dispersion": ValidateMorfeusDispersion,
"morfeus.local_force": ValidateMorfeusLocalForce,
"morfeus.pyramidalization": ValidateMorfeusPyramidalization,
"morfeus.sasa": ValidateMorfeusSasa,
"multiwfn": ValidateMultiwfnRootData,
"multiwfn.bond_analysis": ValidateMultiwfnBondAnalysis,
"multiwfn.cdft": ValidateMultiwfnCdft,
"multiwfn.fuzzy": ValidateMultiwfnFuzzy,
"multiwfn.misc": ValidateMultiwfnMisc,
"multiwfn.orbital": ValidateMultiwfnOrbital,
"multiwfn.population": ValidateMultiwfnPopulation,
"multiwfn.surface": ValidateMultiwfnSurface,
"multiwfn.topology": ValidateMultiwfnTopology,
"psi4": ValidatePsi4,
"qmdesc": ValidateDummy,
"rdkit.fingerprint": ValidateRdkitFingerprint,
"rdkit.misc": ValidateDummy,
"xtb": ValidateXtb,
}
config_path_str = ".".join(config_path)
logging.info(
f"'{_namespace}' | {_loc}()\nValidating configuration settings from '{config_path_str}'."
)
# In case no validator is implemented
if config_path_str not in _validators:
logging.warning(
f"'{_namespace}' | {_loc}()\nNo configuration settings validation class implemented "
f"for '{config_path_str}'. This is probably due to using a custom featurization "
"method. Ensure that its setting have the correct data type and format. No data "
"validation is performed."
)
params = {key: value for key, value in params.items() if key not in ["feature_info"]}
logging.info(f"'{_namespace}' | {_loc}()\nConfiguration settings: {params}.")
return params
# Try to validate the set of parameters
try:
v = _validators[config_path_str](**params)
except ValidationError as e:
error_dict = defaultdict(list)
for error in e.errors():
_p_loc = error["loc"]
_inp = error["input"]
# check_iterable_option (mode=after) raises errors with empty loc tuple as it is not a
# classmethod. This is only the case for one method, which checks iterable options.
# Therefore, the location is set manually here.
if len(_p_loc) == 0:
_p_loc = ("iterable_option",)
_inp = {key: value for key, value in _inp.items() if key not in ["feature_info"]}
error_dict[str(_p_loc[0])].append(f"{error['msg']}.")
else:
error_dict[str(_p_loc[0])].append(
f"{error['msg']}, obtained: {_inp} (of type '{type(error['input']).__name__}')."
)
_errmsg = f"Incorrect data encountered in '{config_path_str}': {dict(error_dict)}"
logging.error(f"'{_namespace}' | {_loc}()\n{_errmsg}")
raise ValueError(f"{_loc}(): {_errmsg}")
# Remove the feature_info parameter (is now irrelevant)
params = {key: value for key, value in v.__dict__.items() if key not in ["feature_info"]}
logging.info(f"'{_namespace}' | {_loc}()\nValidated configuration settings: {params}.")
return params