optunaz.config package

Submodules

optunaz.config.build_from_opt module

optunaz.config.build_from_opt.set_build_cache(study, optconfig)[source]: Set the cache to preexisting one from Optimisation, when the number of cores supports this

optunaz.config.build_from_opt.remove_algo_hash(trial)[source]: Remove the hash from an Optuna algo param set

optunaz.config.build_from_opt.buildconfig_from_trial(study, trial)[source]

optunaz.config.build_from_opt.encode_name(CEname, hash=<built-in function hash>)[source]: Encode the parameter names with a hash to enable multi-parameter optimisation

optunaz.config.build_from_opt.suggest_alg_params(trial, alg)[source]

optunaz.config.build_from_opt.suggest_aux_params(trial, desc)[source]

optunaz.config.build_from_opt.check_invalid_descriptor_param(alg)[source]

optunaz.config.buildconfig module

class optunaz.config.buildconfig.Algorithm[source]

Bases: Algorithm

abstract estimator()[source]

class optunaz.config.buildconfig.AdaBoostClassifier(name: Literal['AdaBoostClassifier'], parameters: AdaBoostClassifierParameters)[source]

Bases: Algorithm

class AdaBoostClassifierParameters(n_estimators: int = 1, learning_rate: float = 0.1)[source]

Bases: object

n_estimators = 1

learning_rate = 0.1

name

parameters

estimator()[source]

class optunaz.config.buildconfig.Lasso(name: Literal['Lasso'], parameters: LassoParameters)[source]

Bases: Algorithm

class LassoParameters(alpha: float = 1.0)[source]

Bases: object

alpha = 1.0

name

parameters

estimator()[source]

class optunaz.config.buildconfig.KNeighborsClassifier(name: Literal['KNeighborsClassifier'], parameters: KNeighborsClassifierParameters)[source]

Bases: Algorithm

class KNeighborsClassifierParameters(metric: str, weights: str, n_neighbors: int = 5)[source]

Bases: object

metric

weights

n_neighbors = 5

name

parameters

estimator()[source]

class optunaz.config.buildconfig.KNeighborsRegressor(name: Literal['KNeighborsRegressor'], parameters: KNeighborsRegressorParameters)[source]

Bases: Algorithm

class KNeighborsRegressorParameters(metric: str, weights: str, n_neighbors: int = 5)[source]

Bases: object

metric

weights

n_neighbors = 5

name

parameters

estimator()[source]

class optunaz.config.buildconfig.LogisticRegression(name: Literal['LogisticRegression'], parameters: LogisticRegressionParameters)[source]

Bases: Algorithm

class LogisticRegressionParameters(solver: str, C: float = 1.0)[source]

Bases: object

solver

C = 1.0

name

parameters

estimator()[source]

class optunaz.config.buildconfig.PLSRegression(name: Literal['PLSRegression'], parameters: PLSParameters)[source]

Bases: Algorithm

class PLSParameters(n_components: int = 2)[source]

Bases: object

n_components = 2

name

parameters

estimator()[source]

class optunaz.config.buildconfig.RandomForestClassifier(name: Literal['RandomForestClassifier'], parameters: RandomForestParameters)[source]

Bases: Algorithm

class RandomForestParameters(max_features: str, max_depth: int = None, n_estimators: int = 100)[source]

Bases: object

max_features

max_depth = None

n_estimators = 100

name

parameters

estimator()[source]

class optunaz.config.buildconfig.RandomForestRegressor(name: Literal['RandomForestRegressor'], parameters: RandomForestParameters)[source]

Bases: Algorithm

class RandomForestParameters(max_depth: int, n_estimators: int, max_features: str)[source]

Bases: object

max_depth

n_estimators

max_features

name

parameters

estimator()[source]

class optunaz.config.buildconfig.Ridge(name: Literal['Ridge'], parameters: RidgeParameters)[source]

Bases: Algorithm

class RidgeParameters(alpha: float)[source]

Bases: object

alpha

name

parameters

estimator()[source]

class optunaz.config.buildconfig.SVC(name: Literal['SVC'], parameters: SVCParameters)[source]

Bases: Algorithm

class SVCParameters(C: float = 1.0, gamma: float = 0.0001)[source]

Bases: object

C = 1.0

gamma = 0.0001

name

parameters

estimator()[source]

class optunaz.config.buildconfig.SVR(name: Literal['SVR'], parameters: SVRParameters)[source]

Bases: Algorithm

class SVRParameters(C: float, gamma: float)[source]

Bases: object

C

gamma

name

parameters

estimator()[source]

class optunaz.config.buildconfig.XGBRegressor(name: Literal['XGBRegressor'], parameters: XGBRegressorParameters)[source]

Bases: Algorithm

class XGBRegressorParameters(max_depth: int, n_estimators: int, learning_rate: float)[source]

Bases: object

max_depth

n_estimators

learning_rate

name

parameters

estimator()[source]

class optunaz.config.buildconfig.PRFClassifier(name: Literal['PRFClassifier'], parameters: PRFClassifierParameters)[source]

Bases: Algorithm

class PRFClassifierParameters(max_depth: int, n_estimators: int, max_features: str, use_py_gini: int, use_py_leafs: int, bootstrap: int = 1, new_syn_data_frac: float = 0.0, min_py_sum_leaf: int = 1)[source]

Bases: object

max_depth

n_estimators

max_features

use_py_gini

use_py_leafs

bootstrap = 1

new_syn_data_frac = 0.0

min_py_sum_leaf = 1

name

parameters

estimator()[source]

class optunaz.config.buildconfig.ChemPropRegressor(name: Literal['ChemPropRegressor'], parameters: ChemPropRegressorParameters)[source]

Bases: Algorithm

class ChemPropRegressorParameters(activation: str, aggregation: str, aggregation_norm: float, batch_size: float, depth: float, dropout: float, ensemble_size: int, epochs: int, features_generator: str, ffn_hidden_size: float, ffn_num_layers: float, final_lr_ratio_exp: int, hidden_size: float, init_lr_ratio_exp: int, max_lr_exp: int, warmup_epochs_ratio: float = 0.1, aux_weight_pc: int = 100)[source]

Bases: object

activation

aggregation

aggregation_norm

batch_size

depth

dropout

ensemble_size

epochs

features_generator

ffn_hidden_size

ffn_num_layers

final_lr_ratio_exp

hidden_size

init_lr_ratio_exp

max_lr_exp

warmup_epochs_ratio = 0.1

aux_weight_pc = 100

name

parameters

estimator()[source]

class optunaz.config.buildconfig.ChemPropClassifier(name: Literal['ChemPropClassifier'], parameters: ChemPropClassifierParameters)[source]

Bases: Algorithm

class ChemPropClassifierParameters(activation: str, aggregation: str, aggregation_norm: float, batch_size: float, depth: float, dropout: float, ensemble_size: int, epochs: int, features_generator: str, ffn_hidden_size: float, ffn_num_layers: float, final_lr_ratio_exp: int, hidden_size: float, init_lr_ratio_exp: int, max_lr_exp: int, warmup_epochs_ratio: float = 0.1, aux_weight_pc: int = 100)[source]

Bases: object

activation

aggregation

aggregation_norm

batch_size

depth

dropout

ensemble_size

epochs

features_generator

ffn_hidden_size

ffn_num_layers

final_lr_ratio_exp

hidden_size

init_lr_ratio_exp

max_lr_exp

warmup_epochs_ratio = 0.1

aux_weight_pc = 100

name

parameters

estimator()[source]

class optunaz.config.buildconfig.ChemPropRegressorPretrained(name: Literal['ChemPropRegressorPretrained'], parameters: ChemPropRegressorPretrainedParameters)[source]

Bases: Algorithm

class ChemPropRegressorPretrainedParameters(epochs: int, frzn: str, pretrained_model: str)[source]

Bases: object

epochs

frzn

pretrained_model

name

parameters

estimator()[source]

class optunaz.config.buildconfig.ChemPropHyperoptClassifier(name: Literal['ChemPropHyperoptClassifier'], parameters: ChemPropHyperoptClassifierParameters)[source]

Bases: Algorithm

class ChemPropHyperoptClassifierParameters(ensemble_size: int, epochs: int, features_generator: str, num_iters: int, search_parameter_level: str, aux_weight_pc: int = 100)[source]

Bases: object

ensemble_size

epochs

features_generator

num_iters

search_parameter_level

aux_weight_pc = 100

name

parameters

estimator()[source]

class optunaz.config.buildconfig.ChemPropHyperoptRegressor(name: Literal['ChemPropHyperoptRegressor'], parameters: ChemPropHyperoptRegressorParameters)[source]

Bases: Algorithm

class ChemPropHyperoptRegressorParameters(ensemble_size: int, epochs: int, features_generator: str, num_iters: int, search_parameter_level: str, aux_weight_pc: int = 100)[source]

Bases: object

ensemble_size

epochs

features_generator

num_iters

search_parameter_level

aux_weight_pc = 100

name

parameters

estimator()[source]

class optunaz.config.buildconfig.CustomClassificationModel(name: Literal['CustomClassificationModel'], parameters: CustomClassificationModelParameters)[source]

Bases: Algorithm

class CustomClassificationModelParameters(preexisting_model: str, refit_model: int)[source]

Bases: object

preexisting_model

refit_model

class CustomClassificationEstimator(preexisting_model, refit_model)[source]

Bases: ClassifierMixin, BaseEstimator

fit(X, y)[source]

predict(y)[source]

predict_proba(y)[source]

set_score_request(*, sample_weight='$UNCHANGED$')

Request metadata passed to the score method.

Note that this method is only relevant if enable_metadata_routing=True (see sklearn.set_config()). Please see User Guide on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to score.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

New in version 1.3.

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a Pipeline. Otherwise it has no effect.

Parameters:: sample_weight (str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED) – Metadata routing for sample_weight parameter in score.
Returns:: self – The updated object.
Return type:: object

name

parameters

estimator()[source]

class optunaz.config.buildconfig.CustomRegressionModel(name: Literal['CustomRegressionModel'], parameters: CustomRegressionModelParameters)[source]

Bases: Algorithm

class CustomRegressionModelParameters(preexisting_model: str, refit_model: int)[source]

Bases: object

preexisting_model

refit_model

class CustomRegressionEstimator(preexisting_model, refit_model)[source]

Bases: RegressorMixin, BaseEstimator

fit(X, y)[source]

predict(y)[source]

set_score_request(*, sample_weight='$UNCHANGED$')

Request metadata passed to the score method.

Note that this method is only relevant if enable_metadata_routing=True (see sklearn.set_config()). Please see User Guide on how the routing mechanism works.

The options for each parameter are:

True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.
False: metadata is not requested and the meta-estimator will not pass it to score.
None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.
str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the existing request. This allows you to change the request for some parameters and not others.

New in version 1.3.

Note

This method is only relevant if this estimator is used as a sub-estimator of a meta-estimator, e.g. used inside a Pipeline. Otherwise it has no effect.

Parameters:: sample_weight (str, True, False, or None, default=sklearn.utils.metadata_routing.UNCHANGED) – Metadata routing for sample_weight parameter in score.
Returns:: self – The updated object.
Return type:: object

name

parameters

estimator()[source]

class optunaz.config.buildconfig.CalibratedClassifierCVWithVA(name: Literal['CalibratedClassifierCVWithVA'], parameters: CalibratedClassifierCVParameters)[source]

Bases: Algorithm

class CalibratedClassifierCVParameters(n_folds: int, ensemble: str, method: str, estimator: Union[AdaBoostClassifier, KNeighborsClassifier, LogisticRegression, RandomForestClassifier, SVC, ChemPropClassifier, ChemPropRegressor, ChemPropRegressorPretrained, ChemPropHyperoptClassifier, ChemPropHyperoptRegressor, CustomClassificationModel])[source]

Bases: object

n_folds

ensemble

method

estimator

name

parameters

estimator()[source]

class optunaz.config.buildconfig.Mapie(name: Literal['Mapie'], parameters: MapieParameters)[source]

Bases: Algorithm

class MapieParameters(mapie_alpha: float, estimator: Union[Lasso, PLSRegression, RandomForestRegressor, KNeighborsRegressor, Ridge, SVR, XGBRegressor, PRFClassifier, CustomRegressionModel])[source]

Bases: object

mapie_alpha

estimator

name

parameters

estimator()[source]

class optunaz.config.buildconfig.BuildConfig(data, metadata, descriptor, settings, algorithm, task='building')[source]

Bases: object

Build configuration.

This is the configuration to train a model, i.e. optimize parameters of a model, given fixed hyperparameters. It roughly corresponds to Optuna Trial.

class Metadata(name: Optional[str] = None, cross_validation: Optional[int] = None, shuffle: Optional[bool] = None, best_trial: Optional[int] = None, best_value: Optional[float] = None, n_trials: Optional[int] = None, visualization: Optional[Visualization] = None)[source]

Bases: object

name = None

cross_validation = None

shuffle = None

best_trial = None

best_value = None

n_trials = None

visualization = None

class Settings(mode: Optional[ModelMode] = None, scoring: Union[RegressionScore, ClassificationScore, str, NoneType] = None, direction: Optional[OptimizationDirection] = None, n_trials: Optional[int] = None, tracking_rest_endpoint: Optional[str] = None)[source]

Bases: object

mode = None

scoring = None

direction = None

n_trials = None

tracking_rest_endpoint = None

data

metadata

descriptor

settings

algorithm

task = 'building'

optunaz.config.optconfig module

class optunaz.config.optconfig.ClassificationScore(value)[source]

Bases: str, Enum

An enumeration.

ACCURACY = 'accuracy'

AVERAGE_PRECISION = 'average_precision'

AUC_PR_CAL = 'auc_pr_cal'

BALANCED_ACCURACY = 'balanced_accuracy'

BEDROC = 'bedroc_score'

CONCORDANCE_INDEX = 'concordance_index'

F1 = 'f1'

F1_MACRO = 'f1_macro'

F1_MICRO = 'f1_micro'

F1_WEIGHTED = 'f1_weighted'

JACCARD = 'jaccard'

JACCARD_MACRO = 'jaccard_macro'

JACCARD_MICRO = 'jaccard_micro'

JACCARD_WEIGHTED = 'jaccard_weighted'

NEG_BRIER_SCORE = 'neg_brier_score'

PRECISION = 'precision'

PRECISION_MACRO = 'precision_macro'

PRECISION_MICRO = 'precision_micro'

PRECISION_WEIGHTED = 'precision_weighted'

RECALL = 'recall'

RECALL_MACRO = 'recall_macro'

RECALL_MICRO = 'recall_micro'

RECALL_WEIGHTED = 'recall_weighted'

ROC_AUC = 'roc_auc'

class optunaz.config.optconfig.RegressionScore(value)[source]

Bases: str, Enum

An enumeration.

EXPLAINED_VARIANCE = 'explained_variance'

MAX_ERROR = 'max_error'

NEG_MEAN_ABSOLUTE_ERROR = 'neg_mean_absolute_error'

NEG_MEAN_SQUARED_ERROR = 'neg_mean_squared_error'

NEG_MEDIAN_ABSOLUTE_ERROR = 'neg_median_absolute_error'

R2 = 'r2'

class optunaz.config.optconfig.Algorithm[source]: Bases: Algorithm

class optunaz.config.optconfig.AdaBoostClassifier(name, parameters)[source]

Bases: Algorithm

AdaBoost Classifier.

An AdaBoost classifier is a meta-estimator that begins by fitting a classifier on the original dataset and then fits additional copies of the classifier on the same dataset but where the weights of incorrectly classified instances are adjusted such that subsequent classifiers focus more on difficult cases.

class Parameters(n_estimators=AdaBoostClassifier.Parameters.AdaBoostClassifierParametersNEstimators(low=3, high=100), learning_rate=AdaBoostClassifier.Parameters.AdaBoostClassifierParametersLearningRate(low=1.0, high=1.0))[source]

Bases: object

Parameters:

n_estimators (AdaBoostClassifierParametersNEstimators) – The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early. - title: n_estimators
learning_rate (AdaBoostClassifierParametersLearningRate) – Weight applied to each classifierat each boosting iteration. A higher learning rateincreases the contribution of each classifier. There is a trade-off between the learning_rateand n_estimators parameters. - title: learning_rate

class AdaBoostClassifierParametersNEstimators(low: int = 3, high: int = 100)[source]

Bases: object

low = 3

high = 100

class AdaBoostClassifierParametersLearningRate(low: float = 1.0, high: float = 1.0)[source]

Bases: object

low = 1.0

high = 1.0

n_estimators = AdaBoostClassifier.Parameters.AdaBoostClassifierParametersNEstimators(low=3, high=100)

learning_rate = AdaBoostClassifier.Parameters.AdaBoostClassifierParametersLearningRate(low=1.0, high=1.0)

name

parameters

class optunaz.config.optconfig.Lasso(name, parameters)[source]

Bases: Algorithm

Lasso regression.

Lasso is a Linear Model trained with L1 prior as regularizer.

The Lasso is a linear model that estimates sparse coefficients. It tends to prefer solutions with fewer non-zero coefficients, effectively reducing the number of features upon which the given solution is dependent.

class Parameters(alpha=Lasso.Parameters.LassoParametersAlpha(low=0.0, high=2.0))[source]

Bases: object

Parameters:: alpha (LassoParametersAlpha) – Constant that multiplies the L1 term, controlling regularization strength. alpha must be a non-negative float i.e. in [0, inf). When alpha = 0, the objective is equivalent to ordinary least squares, solved by the LinearRegression object. For numerical reasons, using alpha = 0 with the Lasso object is not advised. Instead, you should use the LinearRegression object. - title: Alpha

class LassoParametersAlpha(low: float = 0.0, high: float = 2.0)[source]

Bases: object

low = 0.0

high = 2.0

alpha = Lasso.Parameters.LassoParametersAlpha(low=0.0, high=2.0)

name

parameters

class optunaz.config.optconfig.KNeighborsWeights(value)[source]

Bases: str, Enum

Method used to define the weights for a K-Neighbors Classifier

UNIFORM = 'uniform': uniform weights. All points in each neighborhood are weighted equally.

DISTANCE = 'distance': weight points by the inverse of their distance so closer neighbors for a query will have greater influence than further neighbors

class optunaz.config.optconfig.KNeighborsMetric(value)[source]

Bases: str, Enum

Metric used to define the weights for a K-Neighbors Classifier

MINKOWSKI = 'minkowski'

EUCLIDEAN = 'euclidean'

MANHATTAN = 'manhattan'

class optunaz.config.optconfig.KNeighborsClassifier(name, parameters)[source]

Bases: Algorithm

KNeighborsClassifier.

Classifier implementing the k-nearest neighbors vote.

The principle behind nearest neighbor methods is to find a predefined number of training samples closest in distance to the new point, and predict the label from these. The number of samples is a user-defined constant for k-nearest neighbor learning. Despite its simplicity, nearest neighbors is successful in a large number of classification problems

class Parameters(n_neighbors=KNeighborsClassifier.Parameters.KNeighborsClassifierParametersN_Neighbors(low=1, high=10), weights, metric)[source]

Bases: object

Parameters:

n_neighbors (KNeighborsClassifierParametersN_Neighbors) – Number of neighbors to use by default for kneighbors queries. - title: N Neighbors
weights (List) – Weight function used in prediction - title: Weights
metric (List) – Metric to use for distance computation.The default of “minkowski” results in the standard Euclidean distance - title: Metric

class KNeighborsClassifierParametersN_Neighbors(low: float = 1, high: float = 10)[source]

Bases: object

low = 1

high = 10

n_neighbors = KNeighborsClassifier.Parameters.KNeighborsClassifierParametersN_Neighbors(low=1, high=10)

weights

metric

name

parameters

class optunaz.config.optconfig.KNeighborsRegressor(name, parameters)[source]

Bases: Algorithm

KNeighborsRegressor.

Regressor implementing the k-nearest neighbors vote.

The principle behind nearest neighbor methods is to find a predefined number of training samples closest in distance to the new point, and predict the label from these. The number of samples is a user-defined constant for k-nearest neighbor learning. Despite its simplicity, nearest neighbors is successful in a large number of classification problems

class Parameters(n_neighbors=KNeighborsRegressor.Parameters.KNeighborsRegressorParametersN_Neighbors(low=1, high=10), weights, metric)[source]

Bases: object

Parameters:

n_neighbors (KNeighborsRegressorParametersN_Neighbors) – Number of neighbors to use by default for kneighbors queries. - title: N Neighbors
weights (List) – Weight function used in prediction - title: Weights
metric (List) – Metric to use for distance computation.The default of “minkowski” results in the standard Euclidean distance - title: Metric

class KNeighborsRegressorParametersN_Neighbors(low: float = 1, high: float = 10)[source]

Bases: object

low = 1

high = 10

n_neighbors = KNeighborsRegressor.Parameters.KNeighborsRegressorParametersN_Neighbors(low=1, high=10)

weights

metric

name

parameters

class optunaz.config.optconfig.LogisticRegression(name, parameters)[source]

Bases: Algorithm

Logistic Regression classifier.

Logistic regression, despite its name, is a linear model for classification rather than regression. Logistic regression is also known in the literature as logit regression, maximum-entropy classification (MaxEnt) or the log-linear classifier. In this model, the probabilities describing the possible outcomes of a single trial are modeled using a logistic function.

class Parameters(solver, C=LogisticRegression.Parameters.LogisticRegressionParametersParameterC(low=1.0, high=1.0))[source]

Bases: object

Parameters:

solver (List) – List of solvers to try. Note ‘sag’ and ‘saga’ fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler. - title: Solver
C (LogisticRegressionParametersParameterC) – Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization. - title: C

class LogisticRegressionParametersParameterC(low: float = 1.0, high: float = 1.0)[source]

Bases: object

low = 1.0

high = 1.0

solver

C = LogisticRegression.Parameters.LogisticRegressionParametersParameterC(low=1.0, high=1.0)

name

parameters

class optunaz.config.optconfig.PLSRegression(name, parameters)[source]

Bases: Algorithm

PLS regression (Cross decomposition using partial least squares).

PLS is a form of regularized linear regression where the number of components controls the strength of the regularization.

Cross decomposition algorithms find the fundamental relations between two matrices (X and Y). They are latent variable approaches to modeling the covariance structures in these two spaces. They will try to find the multidimensional direction in the X space that explains the maximum multidimensional variance direction in the Y space. In other words, PLS projects both X and Y into a lower-dimensional subspace such that the covariance between transformed(X) and transformed(Y) is maximal.

class Parameters(n_components=PLSRegression.Parameters.NComponents(low=2, high=5))[source]

Bases: object

Parameters:: n_components (NComponents) – Number of components to keep. Should be in [1, min(n_samples, n_features, n_targets)]. - title: n_components

class NComponents(low: int = 2, high: int = 5)[source]

Bases: object

low = 2

high = 5

n_components = PLSRegression.Parameters.NComponents(low=2, high=5)

name

parameters

class optunaz.config.optconfig.RandomForestMaxFeatures(value)[source]

Bases: str, Enum

Method used to define the maximum number of features in a Random Forest

AUTO = 'auto': Auto sets max_features=sqrt(n_features).

SQRT = 'sqrt': Square root sets max_features=sqrt(n_features).

LOG2 = 'log2': Log2 sets max_features=log2(n_features).

class optunaz.config.optconfig.RandomForestClassifier(name, parameters)[source]

Bases: Algorithm

Random Forest classifier.

A random forest is a meta estimator that fits a number of decision tree classifiers on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.

class Parameters(max_depth=RandomForestClassifier.Parameters.RandomForestClassifierParametersMaxDepth(low=2, high=32), n_estimators=RandomForestClassifier.Parameters.RandomForestClassifierParametersNEstimators(low=10, high=250), max_features)[source]

Bases: object

Parameters:

max_depth (RandomForestClassifierParametersMaxDepth) – The maximum depth of the tree. - title: max_depth
n_estimators (RandomForestClassifierParametersNEstimators) – The number of trees in the forest. - title: n_estimators
max_features (List) – The number of features to considerwhen looking for the best split: If auto, thenconsider max_features features at each split. - If “auto”, then max_features=n_features. - If “sqrt”, then max_features=sqrt(n_features). - If “log2”, then max_features=log2(n_features). - title: max_features

class RandomForestClassifierParametersMaxDepth(low: int = 2, high: int = 32)[source]

Bases: object

low = 2

high = 32

class RandomForestClassifierParametersNEstimators(low: int = 10, high: int = 250)[source]

Bases: object

low = 10

high = 250

max_depth = RandomForestClassifier.Parameters.RandomForestClassifierParametersMaxDepth(low=2, high=32)

n_estimators = RandomForestClassifier.Parameters.RandomForestClassifierParametersNEstimators(low=10, high=250)

max_features

name

parameters

class optunaz.config.optconfig.RandomForestRegressor(name, parameters)[source]

Bases: Algorithm

Random Forest regression.

A random forest is a meta estimator that fits a number of classifying decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.

class Parameters(max_depth=RandomForestRegressor.Parameters.RandomForestRegressorParametersMaxDepth(low=2, high=32), n_estimators=RandomForestRegressor.Parameters.RandomForestRegressorParametersNEstimators(low=10, high=250), max_features)[source]

Bases: object

Parameters:

max_depth (RandomForestRegressorParametersMaxDepth) – The maximum depth of the tree. - title: max_depth
n_estimators (RandomForestRegressorParametersNEstimators) – The number of trees in the forest. - title: n_estimators
max_features (List) – The number of features to considerwhen looking for the best split: If auto, thenconsider max_features features at each split. - If “auto”, then max_features=n_features. - If “sqrt”, then max_features=sqrt(n_features). - If “log2”, then max_features=log2(n_features). - title: max_features

class RandomForestRegressorParametersMaxDepth(low: int = 2, high: int = 32)[source]

Bases: object

low = 2

high = 32

class RandomForestRegressorParametersNEstimators(low: int = 10, high: int = 250)[source]

Bases: object

low = 10

high = 250

max_depth = RandomForestRegressor.Parameters.RandomForestRegressorParametersMaxDepth(low=2, high=32)

n_estimators = RandomForestRegressor.Parameters.RandomForestRegressorParametersNEstimators(low=10, high=250)

max_features

name

parameters

class optunaz.config.optconfig.Ridge(name, parameters)[source]

Bases: Algorithm

Ridge Regression (Linear least squares with l2 regularization).

This model solves a regression model where the loss function is the linear least squares function and regularization is given by the l2-norm. Also known as Ridge Regression or Tikhonov regularization.

class Parameters(alpha=Ridge.Parameters.Alpha(low=0.0, high=2.0))[source]

Bases: object

Parameters:: alpha (Alpha) – Constant that multiplies the L2 term, controlling regularization strength - title: alpha

class Alpha(low: float = 0.0, high: float = 2.0)[source]

Bases: object

low = 0.0

high = 2.0

alpha = Ridge.Parameters.Alpha(low=0.0, high=2.0)

name

parameters

class optunaz.config.optconfig.SVC(name, parameters)[source]

Bases: Algorithm

SVC classifier (C-Support Vector Classification).

The implementation is based on libsvm. The fit time scales at least quadratically with the number of samples and may be impractical beyond tens of thousands of samples.

class Parameters(C=SVC.Parameters.SVCParametersParameterC(low=1e-10, high=100.0), gamma=SVC.Parameters.Gamma(low=0.0001, high=100.0))[source]

Bases: object

Parameters:

C (SVCParametersParameterC) – Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. - title: C
gamma (Gamma) – Kernel coefficient - title: gamma

class SVCParametersParameterC(low: float = 1e-10, high: float = 100.0)[source]

Bases: object

low = 1e-10

high = 100.0

class Gamma(low: float = 0.0001, high: float = 100.0)[source]

Bases: object

low = 0.0001

high = 100.0

C = SVC.Parameters.SVCParametersParameterC(low=1e-10, high=100.0)

gamma = SVC.Parameters.Gamma(low=0.0001, high=100.0)

name

parameters

class optunaz.config.optconfig.SVR(name, parameters)[source]

Bases: Algorithm

SVR regression (Epsilon-Support Vector Regression).

The implementation is based on libsvm. The fit time complexity is more than quadratic with the number of samples which makes it hard to scale to datasets with more than a couple of 10000 samples.

class Parameters(C=SVR.Parameters.SVRParametersParameterC(low=1e-10, high=100.0), gamma=SVR.Parameters.SVRParametersGamma(low=0.0001, high=100.0))[source]

Bases: object

Parameters:

C (SVRParametersParameterC) – Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. - title: C
gamma (SVRParametersGamma) – Kernel coefficient - title: gamma

class SVRParametersParameterC(low: float = 1e-10, high: float = 100.0)[source]

Bases: object

low = 1e-10

high = 100.0

class SVRParametersGamma(low: float = 0.0001, high: float = 100.0)[source]

Bases: object

low = 0.0001

high = 100.0

C = SVR.Parameters.SVRParametersParameterC(low=1e-10, high=100.0)

gamma = SVR.Parameters.SVRParametersGamma(low=0.0001, high=100.0)

name

parameters

class optunaz.config.optconfig.XGBRegressor(parameters=XGBRegressor.Parameters(max_depth=XGBRegressor.Parameters.MaxDepth(low=2, high=32), n_estimators=XGBRegressor.Parameters.NEstimators(low=10, high=250), learning_rate=XGBRegressor.Parameters.LearningRate(low=0.1, high=0.1)), name='XGBRegressor')[source]

Bases: Algorithm

XGBoost regression (gradient boosting trees algorithm).

XGBoost stands for “Extreme Gradient Boosting”, where the term “Gradient Boosting” originates from the paper Greedy Function Approximation: A Gradient Boosting Machine, by Friedman.

class Parameters(max_depth=XGBRegressor.Parameters.MaxDepth(low=2, high=32), n_estimators=XGBRegressor.Parameters.NEstimators(low=10, high=250), learning_rate=XGBRegressor.Parameters.LearningRate(low=0.1, high=0.1))[source]

Bases: object

Parameters:

max_depth (MaxDepth) – Maximum tree depth for base learners. - title: max_depth
n_estimators (NEstimators) – Number of gradient boosted trees.Equivalent to number of boosting rounds. - title: n_estimators
learning_rate (LearningRate) – Weight applied to each classifierat each boosting iteration. A higher learning rateincreases the contribution of each classifier. There is a trade-off between the learning_rateand n_estimators parameters. - title: learning_rate

class MaxDepth(low: int = 2, high: int = 32)[source]

Bases: object

low = 2

high = 32

class NEstimators(low: int = 10, high: int = 250)[source]

Bases: object

low = 10

high = 250

class LearningRate(low: float = 0.1, high: float = 0.1)[source]

Bases: object

low = 0.1

high = 0.1

max_depth = XGBRegressor.Parameters.MaxDepth(low=2, high=32)

n_estimators = XGBRegressor.Parameters.NEstimators(low=10, high=250)

learning_rate = XGBRegressor.Parameters.LearningRate(low=0.1, high=0.1)

parameters = XGBRegressor.Parameters(max_depth=XGBRegressor.Parameters.MaxDepth(low=2, high=32), n_estimators=XGBRegressor.Parameters.NEstimators(low=10, high=250), learning_rate=XGBRegressor.Parameters.LearningRate(low=0.1, high=0.1))

name = 'XGBRegressor'

class optunaz.config.optconfig.PRFClassifierMaxFeatures(value)[source]

Bases: str, Enum

Method used to define the maximum number of features in a Probabilistic Random Forest

AUTO = 'auto': Auto sets max_features=sqrt(n_features).

SQRT = 'sqrt': Square root sets max_features=sqrt(n_features).

LOG2 = 'log2': Log2 sets max_features=log2(n_features).

class optunaz.config.optconfig.PRFClassifier(name, parameters)[source]

Bases: Algorithm

PRF (Probabilistic Random Forest).

PRF can be seen as a hybrid between regression and classification algorithms. Similar to regression algorithms, PRF takes as input real-valued probabilities, usually from Probabilistic Threshold Representation (PTR). However, similar to classification algorithms, it predicts probability of belonging to active or inactive class.

class Parameters(use_py_gini=1, use_py_leafs=1, max_depth=PRFClassifier.Parameters.PRFClassifierParametersMaxDepth(low=2, high=32), n_estimators=PRFClassifier.Parameters.PRFClassifierParametersNEstimators(low=10, high=250), max_features, min_py_sum_leaf=PRFClassifier.Parameters.PRFClassifierParametersMinPySumLeaf(low=1, high=5))[source]

Bases: object

Parameters:

use_py_gini (int) – The probability of y is used in GINI when this is True - minimum: 0, maximum: 1, title: Use pY GINI
use_py_leafs (int) – The probability of y is used in leaves when this is True - minimum: 0, maximum: 1, title: Use pY leafs
max_depth (PRFClassifierParametersMaxDepth) – The maximum depth of the tree. - title: max_depth
n_estimators (PRFClassifierParametersNEstimators) – The number of trees in the forest. - title: n_estimators
max_features (List) – The number of features to considerwhen looking for the best split: - If “auto”, then max_features=sqrt(n_features). - If “sqrt”, then max_features=sqrt(n_features). - If “log2”, then max_features=log2(n_features). - title: max_features
min_py_sum_leaf (PRFClassifierParametersMinPySumLeaf) – This parameter allows tree pruning when the propagation probability is small, thus reducing computation time. This value defines the probability threshold, pth as described in the Selective propagation scheme in the original publication Probabilistic Random Forest: A machine learning algorithm for noisy datasets - title: n_estimators

class PRFClassifierParametersNEstimators(low: int = 10, high: int = 250)[source]

Bases: object

low = 10

high = 250

class PRFClassifierParametersMaxDepth(low: int = 2, high: int = 32)[source]

Bases: object

low = 2

high = 32

class PRFClassifierParametersMinPySumLeaf(low: int = 1, high: int = 5)[source]

Bases: object

low = 1

high = 5

use_py_gini = 1

use_py_leafs = 1

max_depth = PRFClassifier.Parameters.PRFClassifierParametersMaxDepth(low=2, high=32)

n_estimators = PRFClassifier.Parameters.PRFClassifierParametersNEstimators(low=10, high=250)

max_features

min_py_sum_leaf = PRFClassifier.Parameters.PRFClassifierParametersMinPySumLeaf(low=1, high=5)

name

parameters

class optunaz.config.optconfig.ChemPropActivation(value)[source]

Bases: str, Enum

The activation function to use within the network. See https://chemprop.readthedocs.io/en/latest/args.html#chemprop.args.TrainArgs.activation for details

RELU = 'ReLU'

TANH = 'tanh'

LEAKYRELU = 'LeakyReLU'

PRELU = 'PReLU'

SELU = 'SELU'

ELU = 'ELU'

class optunaz.config.optconfig.ChemPropFeatures_Generator(value)[source]

Bases: str, Enum

Features generators are used for computing additional molecule-level features that are appended after message passing. See https://chemprop.readthedocs.io/en/latest/features.html#features-generators for details.

NONE = 'none': Turns off the features generator function.

MORGAN = 'morgan': Generates a binary Morgan fingerprint for a molecule.

MORGAN_COUNT = 'morgan_count': Generates a counts-based Morgan fingerprint for a molecule.

RDKIT_2D = 'rdkit_2d': Generates RDKit 2D features for a molecule.

RDKIT_2D_NORMALIZED = 'rdkit_2d_normalized': Generates RDKit 2D normalized features for a molecule.

class optunaz.config.optconfig.ChemPropAggregation(value)[source]

Bases: str, Enum

Atom-level representations from the MPNN

MEAN = 'mean': Representations averaged over all atoms of a molecule

SUM = 'sum': Representations summed over all atoms of a molecule

NORM = 'norm': Representations summed up and divided by a constant (default=100)

class optunaz.config.optconfig.ChemPropClassifier(name, parameters)[source]

Bases: Algorithm

Chemprop Classifier without hyperopt

Chemprop is an open-source package for training deep learning models for molecular property prediction. ChemProp trains two networks; a Directed Message Passing Neural Network (D-MPNN) to encode a graph representation of molecules, and a Feed Forward Neural Network (FFNN); a standard multi-layer perceptron trained to predict the target property using D-MPNN encoding. It was first presented in the paper “Analyzing Learned Molecular Representations for Property Prediction”. This implementation will use Optuna to optimse parameters instead of Hyperopt (as in the original implementation of ChemProp).

class Parameters(ensemble_size=1, epochs=30, activation, aggregation, aggregation_norm=ChemPropClassifier.Parameters.ChemPropParametersAggregation_Norm(low=1, high=200, q=1), batch_size=ChemPropClassifier.Parameters.ChemPropParametersBatch_Size(low=5, high=200, q=5), depth=ChemPropClassifier.Parameters.ChemPropParametersDepth(low=2, high=6, q=1), dropout=ChemPropClassifier.Parameters.ChemPropParametersDropout(low=0.0, high=0.4, q=0.04), features_generator, ffn_hidden_size=ChemPropClassifier.Parameters.ChemPropParametersFFN_Hidden_Size(low=300, high=2400, q=100), ffn_num_layers=ChemPropClassifier.Parameters.ChemPropParametersFFN_Num_Layers(low=1, high=3, q=1), final_lr_ratio_exp=ChemPropClassifier.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp(low=-4, high=0), hidden_size=ChemPropClassifier.Parameters.ChemPropParametersHidden_Size(low=300, high=2400, q=100), init_lr_ratio_exp=ChemPropClassifier.Parameters.ChemPropParametersInit_Lr_Ratio_Exp(low=-4, high=0), max_lr_exp=ChemPropClassifier.Parameters.ChemPropParametersMax_Lr_Exp(low=-6, high=-2), warmup_epochs_ratio=ChemPropClassifier.Parameters.ChemPropParametersWarmup_Epochs_Ratio(low=0.1, high=0.1, q=0.1))[source]

Bases: object

Parameters:

ensemble_size (int) – Number of ensembles with different weight initialisation (provides uncertainty) - minimum: 1, maximum: 5, title: Ensemble size
epochs (int) – Number of epochs to run (increasing this will increase run time) - minimum: 4, maximum: 400, title: Epochs
activation (List) – Activation function applied to the output of the weighted sum of inputs - title: activation
aggregation (List) – Aggregation scheme for atomic vectors into molecular vectors. - title: aggregation
aggregation_norm (ChemPropParametersAggregation_Norm) – For norm aggregation, number by which to divide summed up atomic features. - title: aggregation_norm
batch_size (ChemPropParametersBatch_Size) – How many samples per batch to load. - title: batch_size
depth (ChemPropParametersDepth) – Number of message passing steps(distance of neighboring atoms visible when modelling). - title: depth
dropout (ChemPropParametersDropout) – Dropout probability. During training, randomly zeroes some of the elements of the input tensor with probability p using samples from a Bernoulli distribution. Each channel will be zeroed out independently on every forward call. This has proven to be an effective technique for regularization and preventing the co-adaptation of neurons - title: dropout
features_generator (List) – Method of generating additional features. - title: features_generator
ffn_hidden_size (ChemPropParametersFFN_Hidden_Size) – Dimensionality of hidden layers in the FFN. - title: ffn_hidden_size
ffn_num_layers (ChemPropParametersFFN_Num_Layers) – Number of layers in the FFN after D-MPNN encoding. - title: ffn_num_layers
final_lr_ratio_exp (ChemPropParametersFinal_Lr_Ratio_Exp) – The exponential for the final learning rate. - title: final_lr_ratio_exp
hidden_size (ChemPropParametersHidden_Size) – Size of the hidden bond message vectors in the D-MPNN - title: hidden_size
init_lr_ratio_exp (ChemPropParametersInit_Lr_Ratio_Exp) – The exponential for the learning rate ratio. - title: init_lr_ratio_exp
max_lr_exp (ChemPropParametersMax_Lr_Exp) – The exponential for the maximum learning rate. - title: max_lr_exp
warmup_epochs_ratio (ChemPropParametersWarmup_Epochs_Ratio) – Ratio for the number of epochs during which learning rate increases linearly from init_lr to max_lr. Afterwards, learning rate decreases exponentially from max_lr to final_lr. - title: warmup_epochs_ratio

class ChemPropParametersAggregation_Norm(low: int = 1, high: int = 200, q: int = 1)[source]

Bases: object

low = 1

high = 200

q = 1

class ChemPropParametersBatch_Size(low: int = 5, high: int = 200, q: int = 5)[source]

Bases: object

low = 5

high = 200

q = 5

class ChemPropParametersDepth(low: int = 2, high: int = 6, q: int = 1)[source]

Bases: object

low = 2

high = 6

q = 1

class ChemPropParametersDropout(low: float = 0.0, high: float = 0.4, q: float = 0.04)[source]

Bases: object

low = 0.0

high = 0.4

q = 0.04

class ChemPropParametersFFN_Hidden_Size(low: int = 300, high: int = 2400, q: int = 100)[source]

Bases: object

low = 300

high = 2400

q = 100

class ChemPropParametersFFN_Num_Layers(low: int = 1, high: int = 3, q: int = 1)[source]

Bases: object

low = 1

high = 3

q = 1

class ChemPropParametersFinal_Lr_Ratio_Exp(low: int = - 4, high: int = 0)[source]

Bases: object

low = -4

high = 0

class ChemPropParametersHidden_Size(low: int = 300, high: int = 2400, q: int = 100)[source]

Bases: object

low = 300

high = 2400

q = 100

class ChemPropParametersInit_Lr_Ratio_Exp(low: int = - 4, high: int = 0)[source]

Bases: object

low = -4

high = 0

class ChemPropParametersMax_Lr_Exp(low: int = - 6, high: int = - 2)[source]

Bases: object

low = -6

high = -2

class ChemPropParametersWarmup_Epochs_Ratio(low: float = 0.1, high: float = 0.1, q: float = 0.1)[source]

Bases: object

low = 0.1

high = 0.1

q = 0.1

ensemble_size = 1

epochs = 30

activation

aggregation

aggregation_norm = ChemPropClassifier.Parameters.ChemPropParametersAggregation_Norm(low=1, high=200, q=1)

batch_size = ChemPropClassifier.Parameters.ChemPropParametersBatch_Size(low=5, high=200, q=5)

depth = ChemPropClassifier.Parameters.ChemPropParametersDepth(low=2, high=6, q=1)

dropout = ChemPropClassifier.Parameters.ChemPropParametersDropout(low=0.0, high=0.4, q=0.04)

features_generator

ffn_hidden_size = ChemPropClassifier.Parameters.ChemPropParametersFFN_Hidden_Size(low=300, high=2400, q=100)

ffn_num_layers = ChemPropClassifier.Parameters.ChemPropParametersFFN_Num_Layers(low=1, high=3, q=1)

final_lr_ratio_exp = ChemPropClassifier.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp(low=-4, high=0)

hidden_size = ChemPropClassifier.Parameters.ChemPropParametersHidden_Size(low=300, high=2400, q=100)

init_lr_ratio_exp = ChemPropClassifier.Parameters.ChemPropParametersInit_Lr_Ratio_Exp(low=-4, high=0)

max_lr_exp = ChemPropClassifier.Parameters.ChemPropParametersMax_Lr_Exp(low=-6, high=-2)

warmup_epochs_ratio = ChemPropClassifier.Parameters.ChemPropParametersWarmup_Epochs_Ratio(low=0.1, high=0.1, q=0.1)

name

parameters

class optunaz.config.optconfig.ChemPropRegressor(name, parameters)[source]

Bases: Algorithm

Chemprop Regressor

Chemprop is an open-source package for training deep learning models for molecular property prediction. ChemProp trains two networks; a Directed Message Passing Neural Network (D-MPNN) to encode a graph representation of molecules, and a Feed Forward Neural Network (FFNN); a standard multi-layer perceptron trained to predict the target property using D-MPNN encoding. It was first presented in the paper “Analyzing Learned Molecular Representations for Property Prediction”. This implementation will use Optuna to optimse parameters instead of Hyperopt (as in the original implementation of ChemProp).

class Parameters(ensemble_size=1, epochs=30, activation, aggregation, aggregation_norm=ChemPropRegressor.Parameters.ChemPropParametersAggregation_Norm(low=1, high=200, q=1), batch_size=ChemPropRegressor.Parameters.ChemPropParametersBatch_Size(low=5, high=200, q=5), depth=ChemPropRegressor.Parameters.ChemPropParametersDepth(low=2, high=6, q=1), dropout=ChemPropRegressor.Parameters.ChemPropParametersDropout(low=0.0, high=0.4, q=0.04), features_generator, ffn_hidden_size=ChemPropRegressor.Parameters.ChemPropParametersFFN_Hidden_Size(low=300, high=2400, q=100), ffn_num_layers=ChemPropRegressor.Parameters.ChemPropParametersFFN_Num_Layers(low=1, high=3, q=1), final_lr_ratio_exp=ChemPropRegressor.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp(low=-4, high=0), hidden_size=ChemPropRegressor.Parameters.ChemPropParametersHidden_Size(low=300, high=2400, q=100), init_lr_ratio_exp=ChemPropRegressor.Parameters.ChemPropParametersInit_Lr_Ratio_Exp(low=-4, high=0), max_lr_exp=ChemPropRegressor.Parameters.ChemPropParametersMax_Lr_Exp(low=-6, high=-2), warmup_epochs_ratio=ChemPropRegressor.Parameters.ChemPropParametersWarmup_Epochs_Ratio(low=0.1, high=0.1, q=0.1))[source]

Bases: object

Parameters:

ensemble_size (int) – Number of ensembles with different weight initialisation (provides uncertainty) - minimum: 1, maximum: 5, title: Ensemble size
epochs (int) – Number of epochs to run (increasing this will increase run time) - minimum: 4, maximum: 400, title: Epochs
activation (List) – Activation function applied to the output of the weighted sum of inputs - title: activation
aggregation (List) – Aggregation scheme for atomic vectors into molecular vectors. - title: aggregation
aggregation_norm (ChemPropParametersAggregation_Norm) – For norm aggregation, number by which to divide summed up atomic features. - title: aggregation_norm
batch_size (ChemPropParametersBatch_Size) – How many samples per batch to load. - title: batch_size
depth (ChemPropParametersDepth) – Number of message passing steps(distance of neighboring atoms visible when modelling). - title: depth
dropout (ChemPropParametersDropout) – Dropout probability. During training, randomly zeroes some of the elements of the input tensor with probability p using samples from a Bernoulli distribution. Each channel will be zeroed out independently on every forward call. This has proven to be an effective technique for regularization and preventing the co-adaptation of neurons - title: dropout
features_generator (List) – Method of generating additional features. - title: features_generator
ffn_hidden_size (ChemPropParametersFFN_Hidden_Size) – Dimensionality of hidden layers in the FFN. - title: ffn_hidden_size
ffn_num_layers (ChemPropParametersFFN_Num_Layers) – Number of layers in the FFN after D-MPNN encoding. - title: ffn_num_layers
final_lr_ratio_exp (ChemPropParametersFinal_Lr_Ratio_Exp) – The exponential for the final learning rate. - title: final_lr_ratio_exp
hidden_size (ChemPropParametersHidden_Size) – Size of the hidden bond message vectors in the D-MPNN - title: hidden_size
init_lr_ratio_exp (ChemPropParametersInit_Lr_Ratio_Exp) – The exponential for the learning rate ratio. - title: init_lr_ratio_exp
max_lr_exp (ChemPropParametersMax_Lr_Exp) – The exponential for the maximum learning rate. - title: max_lr_exp
warmup_epochs_ratio (ChemPropParametersWarmup_Epochs_Ratio) – Ratio for the number of epochs during which learning rate increases linearly from init_lr to max_lr. Afterwards, learning rate decreases exponentially from max_lr to final_lr. - title: warmup_epochs_ratio

class ChemPropParametersAggregation_Norm(low: int = 1, high: int = 200, q: int = 1)[source]

Bases: object

low = 1

high = 200

q = 1

class ChemPropParametersBatch_Size(low: int = 5, high: int = 200, q: int = 5)[source]

Bases: object

low = 5

high = 200

q = 5

class ChemPropParametersDepth(low: int = 2, high: int = 6, q: int = 1)[source]

Bases: object

low = 2

high = 6

q = 1

class ChemPropParametersDropout(low: float = 0.0, high: float = 0.4, q: float = 0.04)[source]

Bases: object

low = 0.0

high = 0.4

q = 0.04

class ChemPropParametersFFN_Hidden_Size(low: int = 300, high: int = 2400, q: int = 100)[source]

Bases: object

low = 300

high = 2400

q = 100

class ChemPropParametersFFN_Num_Layers(low: int = 1, high: int = 3, q: int = 1)[source]

Bases: object

low = 1

high = 3

q = 1

class ChemPropParametersFinal_Lr_Ratio_Exp(low: int = - 4, high: int = 0)[source]

Bases: object

low = -4

high = 0

class ChemPropParametersHidden_Size(low: int = 300, high: int = 2400, q: int = 100)[source]

Bases: object

low = 300

high = 2400

q = 100

class ChemPropParametersInit_Lr_Ratio_Exp(low: int = - 4, high: int = 0)[source]

Bases: object

low = -4

high = 0

class ChemPropParametersMax_Lr_Exp(low: int = - 6, high: int = - 2)[source]

Bases: object

low = -6

high = -2

class ChemPropParametersWarmup_Epochs_Ratio(low: float = 0.1, high: float = 0.1, q: float = 0.1)[source]

Bases: object

low = 0.1

high = 0.1

q = 0.1

ensemble_size = 1

epochs = 30

activation

aggregation

aggregation_norm = ChemPropRegressor.Parameters.ChemPropParametersAggregation_Norm(low=1, high=200, q=1)

batch_size = ChemPropRegressor.Parameters.ChemPropParametersBatch_Size(low=5, high=200, q=5)

depth = ChemPropRegressor.Parameters.ChemPropParametersDepth(low=2, high=6, q=1)

dropout = ChemPropRegressor.Parameters.ChemPropParametersDropout(low=0.0, high=0.4, q=0.04)

features_generator

ffn_hidden_size = ChemPropRegressor.Parameters.ChemPropParametersFFN_Hidden_Size(low=300, high=2400, q=100)

ffn_num_layers = ChemPropRegressor.Parameters.ChemPropParametersFFN_Num_Layers(low=1, high=3, q=1)

final_lr_ratio_exp = ChemPropRegressor.Parameters.ChemPropParametersFinal_Lr_Ratio_Exp(low=-4, high=0)

hidden_size = ChemPropRegressor.Parameters.ChemPropParametersHidden_Size(low=300, high=2400, q=100)

init_lr_ratio_exp = ChemPropRegressor.Parameters.ChemPropParametersInit_Lr_Ratio_Exp(low=-4, high=0)

max_lr_exp = ChemPropRegressor.Parameters.ChemPropParametersMax_Lr_Exp(low=-6, high=-2)

warmup_epochs_ratio = ChemPropRegressor.Parameters.ChemPropParametersWarmup_Epochs_Ratio(low=0.1, high=0.1, q=0.1)

name

parameters

class optunaz.config.optconfig.ChemPropFrzn(value)[source]

Bases: str, Enum

QSARtuna implements a hyperparameter search space level for ChemProp in order to define Hyperopt search space: to optimise. Increasing levels correspond to increasing the search space.

NONE = 'none': No weights are frozen

MPNN = 'mpnn': Freeze the weights in only the MPNN during transfer learning

MPNN_FIRST_FFN = 'mpnn_first_ffn': Freeze the MPNN and first layer of the FFN during transfer learning

MPNN_LAST_FFN = 'mpnn_last_ffn': Freeze the MPNN and until the penultimate layer of the FFN during transfer learning

class optunaz.config.optconfig.ChemPropRegressorPretrained(name, parameters)[source]

Bases: Algorithm

Chemprop Regressor from a pretrined model

Pretraining can be carried out by supplying previously trained QSARtuna ChemProp PKL model.

class Parameters(epochs=ChemPropRegressorPretrained.Parameters.ChemPropParametersEpochs(low=4, high=30, q=1), frzn, pretrained_model=None)[source]

Bases: object

Parameters:

epochs (ChemPropParametersEpochs) – Number of epochs to fine-tune the pretrained model on new data - title: epochs
frzn (List) – Decide which layers of the MPNN or FFN to freeze during transfer learning. - title: Frozen layers
pretrained_model (str) – Path to a pretrained QSARtuna pkl model - title: Pretrained Model

class ChemPropParametersEpochs(low: int = 4, high: int = 30, q: int = 1)[source]

Bases: object

low = 4

high = 30

q = 1

epochs = ChemPropRegressorPretrained.Parameters.ChemPropParametersEpochs(low=4, high=30, q=1)

frzn

pretrained_model = None

name

parameters

class optunaz.config.optconfig.ChemPropSearch_Parameter_Level(value)[source]

Bases: str, Enum

QSARtuna implements a hyperparameter search space level for ChemProp in order to define Hyperopt search space: to optimise. Increasing levels correspond to increasing the search space.

AUTO = 'auto': Alter the space depending on training data, i.e training set size, no. of hyperparameter trial configurations (num_iters) & no. epochs. This ensures search spaces are not too large for limited data/epochs, and vice-versa, an extensive search space is trailed when applicable.

L1 = '1'

depth, ffn_num_layers, dropout, and linked_hidden_size.

Type:: Search only the basic set of hyperparameters

L2 = '2': Search basic and linked_hidden_size (search hidden_size and ffn_hidden_size constrained to have the same value.

L3 = '3': Search for basic, “hidden_size”, “ffn_hidden_size` and learning_rate (which uses max_lr_exp, init_lr_exp, final_lr_exp, and warmup_epochs parameters).

L4 = '4': Search for basic, hidden_size, ffn_hidden_size (hidden sizes now independent) and learning_rate.

L5 = '5': Search for basic, hidden_size, ffn_hidden_size, learning_rate and activation.

L6 = '6': Search for basic, hidden_size, ffn_hidden_size, learning_rate, activation and batch_size.

L7 = '7': Search for basic, hidden_size, ffn_hidden_size, learning_rate, activation, batch_size and aggregation_norm.

L8 = '8': Search all possible network hyper-parameters

class optunaz.config.optconfig.ChemPropHyperoptClassifier(name, parameters)[source]

Bases: Algorithm

Chemprop classifier

Chemprop is an open-source package for training deep learning models for molecular property prediction. ChemProp trains two networks; a Directed Message Passing Neural Network (D-MPNN) to encode a graph representation of molecules, and a Feed Forward Neural Network (FFNN); a standard multi-layer perceptron trained to predict the target property using D-MPNN encoding. It was first presented in the paper “Analyzing Learned Molecular Representations for Property Prediction”. This implementation will use Hyperopt to optimse network parameters within each trial, allowing for Optuna to trial more complex hyperparameters, such as feature generation and side information weighting. NB: This implementation can also be used to implement quick/simple ChemProp models by using sensible defaults from the authors; to do this run ChemProp with Num_Iters=’1’.

class Parameters(ensemble_size=1, epochs=30, num_iters=1, features_generator, search_parameter_level)[source]

Bases: object

Parameters:

ensemble_size (int) – Number of ensembles with different weight initialisation (provides uncertainty) - minimum: 1, maximum: 5, title: Ensemble size
epochs (int) – Number of epochs to run (increasing this will increase run time) - minimum: 4, maximum: 400, title: Epochs
num_iters (int) – Dictates the number (Hyperopt) trials ChemProp will run - minimum: 1, maximum: 50, title: Number of HyperOpt iterations
features_generator (List) – Method of generating additional features. - title: features_generator
search_parameter_level (List) – Defines the complexity of the search space used by Hyperopt (larger=more complex). - title: search_parameter_level

ensemble_size = 1

epochs = 30

num_iters = 1

features_generator

search_parameter_level

name

parameters

class optunaz.config.optconfig.ChemPropHyperoptRegressor(name, parameters)[source]

Bases: Algorithm

Chemprop regressor

Chemprop is an open-source package for training deep learning models for molecular property prediction. ChemProp trains two networks; a Directed Message Passing Neural Network (D-MPNN) to encode a graph representation of molecules, and a Feed Forward Neural Network (FFNN); a standard multi-layer perceptron trained to predict the target property using D-MPNN encoding. It was first presented in the paper “Analyzing Learned Molecular Representations for Property Prediction”. This implementation will use Hyperopt to optimse network parameters within each trial, allowing for Optuna to trial more complex hyperparameters, such as feature generation and side information weighting. NB: This implementation can also be used to implement quick/simple ChemProp models by using sensible defaults from the authors; to do this run ChemProp with Num_Iters=’1’.

class Parameters(ensemble_size=1, epochs=30, num_iters=1, features_generator, search_parameter_level)[source]

Bases: object

Parameters:

ensemble_size (int) – Number of ensembles with different weight initialisation (provides uncertainty) - minimum: 1, maximum: 5, title: Ensemble size
epochs (int) – Number of epochs to run (increasing this will increase run time) - minimum: 4, maximum: 400, title: Epochs
num_iters (int) – Dictates the number (Hyperopt) trials ChemProp will run - minimum: 1, maximum: 50, title: Number of HyperOpt iterations
features_generator (List) – Method of generating additional features. - title: features_generator
search_parameter_level (List) – Defines the complexity of the search space used by Hyperopt (larger=more complex). - title: search_parameter_level

ensemble_size = 1

epochs = 30

num_iters = 1

features_generator

search_parameter_level

name

parameters

class optunaz.config.optconfig.CustomClassificationModel(name, parameters)[source]

Bases: Algorithm

Classifier from a preexisting pkl model

class Parameters(preexisting_model=None, refit_model=0)[source]

Bases: object

Parameters:

preexisting_model (str) – Path to a preexisting pkl model - title: Preexisting Model
refit_model (int) – Whether fit should be called during the trial of the custom model - minimum: 0, maximum: 1, title: Refit Model

preexisting_model = None

refit_model = 0

name

parameters

class optunaz.config.optconfig.CustomRegressionModel(name, parameters)[source]

Bases: Algorithm

Classifier from a preexisting pkl model

class Parameters(preexisting_model=None, refit_model=0)[source]

Bases: object

Parameters:

preexisting_model (str) – Path to a preexisting pkl model - title: Preexisting Model
refit_model (int) – Whether fit should be called during the trial of the custom model - minimum: 0, maximum: 1, title: Refit Model

preexisting_model = None

refit_model = 0

name

parameters

class optunaz.config.optconfig.CalibratedClassifierCVEnsemble(value)[source]

Bases: str, Enum

An enumeration.

TRUE = 'True'

FALSE = 'False'

class optunaz.config.optconfig.CalibratedClassifierCVMethod(value)[source]

Bases: str, Enum

An enumeration.

SIGMOID = 'sigmoid'

ISOTONIC = 'isotonic'

VENNABERS = 'vennabers'

class optunaz.config.optconfig.CalibratedClassifierCVWithVA(name, parameters)[source]

Bases: Algorithm

Calibrated Classifier.

Probability calibration with isotonic regression, logistic regression, or VennABERS.

This class uses cross-validation (cv) to both estimate the parameters of a classifier and subsequently calibrate a classifier. With default ensemble=True, for each cv split it fits a copy of the base estimator to the training subset, and calibrates it using the testing subset. For prediction, predicted probabilities are averaged across these individual calibrated classifiers. When ensemble=False, cv is used to obtain unbiased predictions which are then used for calibration. For prediction, the base estimator, trained using all the data, is used. VennABERS offers uncertainty prediction based on p0 vs. p1 discordance.

class Parameters(estimator=typing.Union[optunaz.config.optconfig.AdaBoostClassifier, optunaz.config.optconfig.KNeighborsClassifier, optunaz.config.optconfig.LogisticRegression, optunaz.config.optconfig.RandomForestClassifier, optunaz.config.optconfig.SVC, optunaz.config.optconfig.ChemPropClassifier, optunaz.config.optconfig.ChemPropHyperoptClassifier, optunaz.config.optconfig.CustomClassificationModel], ensemble=<CalibratedClassifierCVEnsemble.TRUE: 'True'>, method=<CalibratedClassifierCVMethod.ISOTONIC: 'isotonic'>, n_folds=2)[source]

Bases: object

Parameters:

estimator (Union) – Base estimator to use for calibration - title: Estimator
ensemble (Union) – Whether each cv it fits a copy of the base estimator, vs. cv used to obtain unbiased predictions used for calibration - title: ensemble
method (Union) – Calibration method used to obtained calibrated predictions - title: method
n_folds (int) – Number of cv folds to obtain calibration data - minimum: 2, maximum: 5, title: Number of Cross validation folds (splits)

estimator: alias of Union[AdaBoostClassifier, KNeighborsClassifier, LogisticRegression, RandomForestClassifier, SVC, ChemPropClassifier, ChemPropHyperoptClassifier, CustomClassificationModel]

ensemble = 'True'

method = 'isotonic'

n_folds = 2

name

parameters

class optunaz.config.optconfig.Mapie(name, parameters)[source]

Bases: Algorithm

MAPIE - Model Agnostic Prediction Interval Estimator

MAPIE allows you to estimate prediction intervals for regression models. Prediction intervals output by MAPIE encompass both aleatoric and epistemic uncertainties and are backed by strong theoretical guarantees thanks to conformal prediction methods.

class Parameters(estimator=typing.Union[optunaz.config.optconfig.Lasso, optunaz.config.optconfig.PLSRegression, optunaz.config.optconfig.RandomForestRegressor, optunaz.config.optconfig.KNeighborsRegressor, optunaz.config.optconfig.Ridge, optunaz.config.optconfig.SVR, optunaz.config.optconfig.XGBRegressor, optunaz.config.optconfig.PRFClassifier], mapie_alpha=0.05)[source]

Bases: object

Parameters:

estimator (Union) – Base estimator to use - title: Estimator
mapie_alpha (float) – Alpha used to generate uncertainty estimates - minimum: 0.01, maximum: 0.99, title: Uncertainty alpha

estimator: alias of Union[Lasso, PLSRegression, RandomForestRegressor, KNeighborsRegressor, Ridge, SVR, XGBRegressor, PRFClassifier]

mapie_alpha = 0.05

name

parameters

optunaz.config.optconfig.isanyof(obj, classes)[source]

optunaz.config.optconfig.detect_mode_from_algs(algs)[source]

optunaz.config.optconfig.copy_path_for_scaled_descriptor(descriptors, dataset, cache)[source]

class optunaz.config.optconfig.OptimizationConfig(name='', description='', data=None, mode=None, algorithms=None, descriptors=None, settings=None, visualization=None, task='optimization')[source]

Bases: object

Optimization configuration.

This is configuration for hyperparameter optimization. It roughly corresponds to Optuna Study.

class Settings(mode=None, cross_validation=5, cv_split_strategy=<factory>, shuffle=False, direction=OptimizationDirection.MAXIMIZATION, scoring=None, minimise_std_dev=False, use_cache=True, n_trials=300, n_jobs=-1, n_startup_trials=50, random_seed=None, optuna_storage=None, track_to_mlflow=False, tracking_rest_endpoint=None, split_chemprop=True, n_chemprop_trials=1)[source]

Bases: object

Optimization settings.

mode = None

cross_validation = 5

cv_split_strategy

shuffle = False

direction = 'maximize'

scoring = None

minimise_std_dev = False

use_cache = True

n_trials = 300

n_jobs = -1

n_startup_trials = 50

random_seed = None

optuna_storage = None

track_to_mlflow = False

tracking_rest_endpoint = None

split_chemprop = True

n_chemprop_trials = 1

name = ''

description = ''

data = None

mode = None

algorithms = None

descriptors = None

settings = None

visualization = None

task = 'optimization'

set_cache()[source]: Set the cache for descriptor generation when the number of cores supports this

set_algo_hashes()[source]

Set hashes for the algorithms

This facilitates tracking duplicate algorithm types with distinct param setups

Module contents

class optunaz.config.ModelMode(value)[source]

Bases: str, Enum

Model mode, either regression or classification.

REGRESSION = 'regression'

CLASSIFICATION = 'classification'

class optunaz.config.OptimizationDirection(value)[source]

Bases: str, Enum

Optimization direction, either minimization or maximization.

MINIMIZATION = 'minimize'

MAXIMIZATION = 'maximize'

class optunaz.config.Task(value)[source]

Bases: str, Enum

Task: optimization, building, or prediction.

OPTIMIZATION = 'optimization'

PREDICTION = 'prediction'

BUILDING = 'building'

class optunaz.config.NameParameterDataclass[source]

Bases: ABC

A base class for (data-)classes that follow “name”-“parameter” structure.

Here is example of a name-parameter class:

>>> class ECFP(NameParameterDataclass):
>>>     name: str
>>>     parameters: Dict

This name-parameter structure is used for parsing Json.

Normally, this abstract class should declare two abstract properties:

>>> @property
>>> @abstractmethod
>>> def name(self) -> str:
>>>     pass
>>>
>>> @property
>>> @abstractmethod
>>> def name(self) -> Any:
>>>     pass

However, Pydantic does not allow overriding properties, thus we don’t declare them.

classmethod new(**kwargs)[source]

Convenience method to initialize objects, instead of __init__.

For example, the following is a full version that calls __init__: >>> descriptor = Avalon(name=’Avalon’, parameters=Avalon.Parameters(nBits=1024))

This method allows a shorter version: >>> descriptor = Avalon.new(nBits=1024)

class optunaz.config.Algorithm[source]

Bases: NameParameterDataclass

Abstract class for ML algorithms.

class optunaz.config.Visualization(output_folder, file_format, plots, use_xvfb=False)[source]

Bases: object

Visualization configuration.

class ImageFileFormat(value)[source]

Bases: str, Enum

An enumeration.

PNG = 'png'

JPEG = 'jpeg'

JPG = 'jpg'

PDF = 'pdf'

SVG = 'svg'

class Plots(plot_history: bool = False, plot_contour: bool = False, plot_parallel_coordinate: bool = False, plot_slice: bool = False)[source]

Bases: object

plot_history = False

plot_contour = False

plot_parallel_coordinate = False

plot_slice = False

output_folder

file_format

plots

use_xvfb = False