Source code for optunaz.config.build_from_opt

import json
from joblib import effective_n_jobs
from typing import Union
from functools import partial

from apischema import deserialize, serialize
from optuna import Study
from optuna.trial import FrozenTrial

from optunaz.config.buildconfig import BuildConfig
from optunaz.config.optconfig import OptimizationConfig
from optunaz.descriptors import MolDescriptor
from joblib import Memory
from optunaz.utils import mkdict
from optunaz.utils.enums import StudyUserAttrs, TrialParams

import optunaz.config.optconfig as opt
import optunaz.config.buildconfig as build
import optunaz.descriptors as descriptors
from optunaz.utils.enums.configuration_enum import ConfigurationEnum

_CE = ConfigurationEnum()


[docs]def set_build_cache(study: Study, optconfig: OptimizationConfig) -> Memory | None: """Set the cache to preexisting one from Optimisation, when the number of cores supports this""" if effective_n_jobs(optconfig.settings.n_jobs) > 1 and "cache" in study.user_attrs: return Memory(study.user_attrs["cache"], verbose=0) else: return None
[docs]def remove_algo_hash(trial: FrozenTrial) -> FrozenTrial: """Remove the hash from an Optuna algo param set""" trial.params = { param_name.split("__")[0]: param_value for param_name, param_value in trial.params.items() } return trial
[docs]def buildconfig_from_trial(study: Study, trial: FrozenTrial) -> BuildConfig: optconfig_json = study.user_attrs.get(StudyUserAttrs.OPTCONFIG, None) if optconfig_json is None: raise ValueError( "Study does not have a user attribute with Optimization Configuration." ) optconfig = deserialize(OptimizationConfig, optconfig_json) trial = remove_algo_hash(trial) descriptor_json = trial.params[TrialParams.DESCRIPTOR] descriptor_dict = json.loads(descriptor_json) descriptor = deserialize(MolDescriptor, descriptor_dict) # Aux weight for side information prepared aux_weight_pc = trial.params.get(_CE.DESCRIPTORS_SMILES_AND_SI_AUX_WEIGHT_PC, 100) # Base estimator for calibrated methods are prepared here base_estimator = trial.user_attrs.get( _CE.ALGORITHMS_CALIBRATEDCLASSIFIERCV_ESTIMATOR ) # Pretrained model for pretrained ChemProp methods are prepared here pretrained_model = trial.user_attrs.get( _CE.ALGORITHMS_CHEMPROP_PRETRAINED_MODEL, {} ) # Parameter dictionary for calibrated CV methods are prepared here calibrated_params = trial.user_attrs.get( _CE.ALGORITHMS_CALIBRATEDCLASSIFIERCV_PARAMS, {} ) if base_estimator: base_estimator[_CE.GENERAL_PARAMETERS][ _CE.DESCRIPTORS_SMILES_AND_SI_AUX_WEIGHT_PC ] = aux_weight_pc algorithm_dict = { _CE.GENERAL_NAME: trial.params.get(_CE.GENERAL_ALGORITHM_NAME), _CE.GENERAL_PARAMETERS: mkdict( { **trial.params, **calibrated_params, **{ _CE.ALGORITHMS_ESTIMATOR: base_estimator, _CE.DESCRIPTORS_SMILES_AND_SI_AUX_WEIGHT_PC: aux_weight_pc, _CE.ALGORITHMS_CHEMPROP_PRETRAINED_MODEL: pretrained_model, }, } ), } algorithm = deserialize( build.AnyAlgorithm, algorithm_dict, additional_properties=True ) if optconfig.settings.minimise_std_dev: best_trial = study.best_trials[0].number best_value = study.best_trials[0].values[0] else: best_trial = study.best_trial.number best_value = study.best_value return BuildConfig( data=optconfig.data, descriptor=descriptor, algorithm=algorithm, metadata=BuildConfig.Metadata( name=optconfig.name, cross_validation=optconfig.settings.cross_validation, shuffle=optconfig.settings.shuffle, best_trial=best_trial, best_value=best_value, n_trials=optconfig.settings.n_trials, ), settings=BuildConfig.Settings( mode=optconfig.settings.mode, scoring=optconfig.settings.scoring, direction=optconfig.settings.direction, n_trials=optconfig.settings.n_trials, ), )
[docs]def encode_name(CEname, hash=hash): """Encode the parameter names with a hash to enable multi-parameter optimisation""" return f"{CEname}__{hash}"
[docs]def suggest_alg_params(trial: FrozenTrial, alg: opt.AnyAlgorithm) -> build.AnyAlgorithm: para = alg.parameters _encode_name = partial(encode_name, hash=alg.hash) if isinstance(alg, opt.AdaBoostClassifier): n_estimators = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_ADABOOSTCLASSIFIER_N_ESTIMATORS), low=para.n_estimators.low, high=para.n_estimators.high, ) learning_rate = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_ADABOOSTCLASSIFIER_LEARNING_RATE), low=para.learning_rate.low, high=para.learning_rate.high, ) return build.AdaBoostClassifier.new( n_estimators=n_estimators, learning_rate=learning_rate, ) elif isinstance(alg, opt.Lasso): alpha = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_LASSO_ALPHA), low=para.alpha.low, high=para.alpha.high, ) return build.Lasso.new(alpha=alpha) elif isinstance(alg, opt.KNeighborsClassifier): metric = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_KNEIGHBORS_METRIC), choices=para.metric, ) n_neighbors = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_KNEIGHBORS_N_NEIGHBORS), low=para.n_neighbors.low, high=para.n_neighbors.high, ) weights = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_KNEIGHBORS_WEIGHTS), choices=para.weights, ) return build.KNeighborsClassifier.new( metric=metric, n_neighbors=n_neighbors, weights=weights ) elif isinstance(alg, opt.KNeighborsRegressor): metric = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_KNEIGHBORS_METRIC), choices=para.metric, ) n_neighbors = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_KNEIGHBORS_N_NEIGHBORS), low=para.n_neighbors.low, high=para.n_neighbors.high, ) weights = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_KNEIGHBORS_WEIGHTS), choices=para.weights, ) return build.KNeighborsRegressor.new( metric=metric, n_neighbors=n_neighbors, weights=weights ) elif isinstance(alg, opt.LogisticRegression): solver = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_LOGISTICREGRESSION_SOLVER), choices=para.solver, ) lg_c = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_LOGISTICREGRESSION_C), low=para.C.low, high=para.C.high, log=True, ) return build.LogisticRegression.new(solver=solver, C=lg_c) elif isinstance(alg, opt.PLSRegression): n_components = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_PLSREGRESSION_N_COMPONENTS), low=para.n_components.low, high=para.n_components.high, ) return build.PLSRegression.new(n_components=n_components) elif isinstance(alg, opt.RandomForestClassifier): max_depth = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_RF_MAX_DEPTH), low=para.max_depth.low, high=para.max_depth.high, ) n_estimators = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_RF_N_ESTIMATORS), low=para.n_estimators.low, high=para.n_estimators.high, ) max_features = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_RF_MAX_FEATURES), choices=para.max_features, ) return build.RandomForestClassifier.new( max_depth=max_depth, n_estimators=n_estimators, max_features=max_features ) elif isinstance(alg, opt.RandomForestRegressor): max_depth = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_RF_MAX_DEPTH), low=para.max_depth.low, high=para.max_depth.high, ) n_estimators = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_RF_N_ESTIMATORS), low=para.n_estimators.low, high=para.n_estimators.high, ) max_features = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_RF_MAX_FEATURES), choices=para.max_features, ) return build.RandomForestRegressor.new( max_depth=max_depth, n_estimators=n_estimators, max_features=max_features ) elif isinstance(alg, opt.Ridge): alpha = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_RIDGE_ALPHA), low=para.alpha.low, high=para.alpha.high, ) return build.Ridge.new(alpha=alpha) elif isinstance(alg, opt.SVC): gamma = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_SVC_GAMMA), low=para.gamma.low, high=para.gamma.high, log=True, ) svc_c = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_SVC_C), low=para.C.low, high=para.C.high, log=True, ) return build.SVC.new(gamma=gamma, C=svc_c) elif isinstance(alg, opt.SVR): gamma = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_SVR_GAMMA), low=para.gamma.low, high=para.gamma.high, log=True, ) svr_c = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_SVR_C), low=para.C.low, high=para.C.high, log=True, ) return build.SVR.new(C=svr_c, gamma=gamma) elif isinstance(alg, opt.XGBRegressor): max_depth = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_XGBREGRESSOR_MAX_DEPTH), low=para.max_depth.low, high=para.max_depth.high, ) n_estimators = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_XGBREGRESSOR_N_ESTIMATORS), low=para.n_estimators.low, high=para.n_estimators.high, ) learning_rate = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_XGBREGRESSOR_LEARNING_RATE), low=para.learning_rate.low, high=para.learning_rate.high, ) return build.XGBRegressor.new( max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, ) elif isinstance(alg, opt.PRFClassifier): max_depth = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_PRF_MAX_DEPTH), low=para.max_depth.low, high=para.max_depth.high, ) n_estimators = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_PRF_N_ESTIMATORS), low=para.n_estimators.low, high=para.n_estimators.high, ) max_features = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_PRF_MAX_FEATURES), choices=para.max_features, ) min_py_sum_leaf = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_PRF_MINPYSUMLEAF), low=para.min_py_sum_leaf.low, high=para.min_py_sum_leaf.high, ) use_py_gini = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_PRF_USE_PY_GINI), low=para.use_py_gini, high=para.use_py_gini, ) use_py_leafs = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_PRF_USE_PY_LEAFS), low=para.use_py_leafs, high=para.use_py_leafs, ) return build.PRFClassifier.new( max_depth=max_depth, n_estimators=n_estimators, max_features=max_features, min_py_sum_leaf=min_py_sum_leaf, use_py_gini=use_py_gini, use_py_leafs=use_py_leafs, ) elif isinstance(alg, opt.ChemPropRegressor): activation = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_ACTIVATION), choices=para.activation, ) aggregation = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_AGGREGATION), choices=para.aggregation, ) aggregation_norm = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_AGGREGATION_NORM), low=para.aggregation_norm.low, high=para.aggregation_norm.high, step=para.aggregation_norm.q, ) batch_size = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_BATCH_SIZE), low=para.batch_size.low, high=para.batch_size.high, step=para.batch_size.q, ) depth = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_DEPTH), low=para.depth.low, high=para.depth.high, step=para.depth.q, ) dropout = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_DROPOUT), low=para.dropout.low, high=para.dropout.high, step=para.dropout.q, ) ensemble_size = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_ENSEMBLE_SIZE), low=para.ensemble_size, high=para.ensemble_size, ) epochs = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_EPOCHS), low=para.epochs, high=para.epochs, ) features_generator = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FEATURES_GENERATOR), choices=para.features_generator, ) ffn_hidden_size = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FFN_HIDDEN_SIZE), low=para.ffn_hidden_size.low, high=para.ffn_hidden_size.high, step=para.ffn_hidden_size.q, ) ffn_num_layers = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FFN_NUM_LAYERS), low=para.ffn_num_layers.low, high=para.ffn_num_layers.high, step=para.ffn_num_layers.q, ) final_lr_ratio_exp = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FINAL_LR_RATIO_EXP), low=para.final_lr_ratio_exp.low, high=para.final_lr_ratio_exp.high, ) hidden_size = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_HIDDEN_SIZE), low=para.hidden_size.low, high=para.hidden_size.high, step=para.hidden_size.q, ) init_lr_ratio_exp = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_INIT_LR_RATIO_EXP), low=para.init_lr_ratio_exp.low, high=para.init_lr_ratio_exp.high, ) max_lr_exp = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_MAX_LR_EXP), low=para.max_lr_exp.low, high=para.max_lr_exp.high, ) warmup_epochs_ratio = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_WARMUP_EPOCHS_RATIO), low=para.warmup_epochs_ratio.low, high=para.warmup_epochs_ratio.high, step=para.warmup_epochs_ratio.q, ) return build.ChemPropRegressor.new( activation=activation, aggregation=aggregation, aggregation_norm=aggregation_norm, batch_size=batch_size, depth=depth, dropout=dropout, ensemble_size=ensemble_size, epochs=epochs, features_generator=features_generator, ffn_hidden_size=ffn_hidden_size, ffn_num_layers=ffn_num_layers, final_lr_ratio_exp=final_lr_ratio_exp, hidden_size=hidden_size, init_lr_ratio_exp=init_lr_ratio_exp, max_lr_exp=max_lr_exp, warmup_epochs_ratio=warmup_epochs_ratio, ) elif isinstance(alg, opt.ChemPropClassifier): activation = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_ACTIVATION), choices=para.activation, ) aggregation = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_AGGREGATION), choices=para.aggregation, ) aggregation_norm = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_AGGREGATION_NORM), low=para.aggregation_norm.low, high=para.aggregation_norm.high, step=para.aggregation_norm.q, ) batch_size = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_BATCH_SIZE), low=para.batch_size.low, high=para.batch_size.high, step=para.batch_size.q, ) depth = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_DEPTH), low=para.depth.low, high=para.depth.high, step=para.depth.q, ) dropout = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_DROPOUT), low=para.dropout.low, high=para.dropout.high, step=para.dropout.q, ) ensemble_size = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_ENSEMBLE_SIZE), low=para.ensemble_size, high=para.ensemble_size, ) epochs = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_EPOCHS), low=para.epochs, high=para.epochs, ) features_generator = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FEATURES_GENERATOR), choices=para.features_generator, ) ffn_hidden_size = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FFN_HIDDEN_SIZE), low=para.ffn_hidden_size.low, high=para.ffn_hidden_size.high, step=para.ffn_hidden_size.q, ) ffn_num_layers = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FFN_NUM_LAYERS), low=para.ffn_num_layers.low, high=para.ffn_num_layers.high, step=para.ffn_num_layers.q, ) final_lr_ratio_exp = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FINAL_LR_RATIO_EXP), low=para.final_lr_ratio_exp.low, high=para.final_lr_ratio_exp.high, ) hidden_size = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_HIDDEN_SIZE), low=para.hidden_size.low, high=para.hidden_size.high, step=para.hidden_size.q, ) init_lr_ratio_exp = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_INIT_LR_RATIO_EXP), low=para.init_lr_ratio_exp.low, high=para.init_lr_ratio_exp.high, ) max_lr_exp = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_MAX_LR_EXP), low=para.max_lr_exp.low, high=para.max_lr_exp.high, ) warmup_epochs_ratio = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_WARMUP_EPOCHS_RATIO), low=para.warmup_epochs_ratio.low, high=para.warmup_epochs_ratio.high, step=para.warmup_epochs_ratio.q, ) return build.ChemPropClassifier.new( activation=activation, aggregation=aggregation, aggregation_norm=aggregation_norm, batch_size=batch_size, depth=depth, dropout=dropout, ensemble_size=ensemble_size, epochs=epochs, features_generator=features_generator, ffn_hidden_size=ffn_hidden_size, ffn_num_layers=ffn_num_layers, final_lr_ratio_exp=final_lr_ratio_exp, hidden_size=hidden_size, init_lr_ratio_exp=init_lr_ratio_exp, max_lr_exp=max_lr_exp, warmup_epochs_ratio=warmup_epochs_ratio, ) elif isinstance(alg, opt.ChemPropHyperoptRegressor): ensemble_size = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_ENSEMBLE_SIZE), low=para.ensemble_size, high=para.ensemble_size, ) epochs = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_EPOCHS), low=para.epochs, high=para.epochs, ) features_generator = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FEATURES_GENERATOR), choices=para.features_generator, ) num_iters = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_NUM_ITERS), low=para.num_iters, high=para.num_iters, ) search_parameter_level = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_SEARCH_PARAMETER_LEVEL), choices=para.search_parameter_level, ) return build.ChemPropHyperoptRegressor.new( ensemble_size=ensemble_size, epochs=epochs, features_generator=features_generator, num_iters=num_iters, search_parameter_level=search_parameter_level, ) elif isinstance(alg, opt.ChemPropHyperoptClassifier): ensemble_size = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_ENSEMBLE_SIZE), low=para.ensemble_size, high=para.ensemble_size, ) epochs = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_EPOCHS), low=para.epochs, high=para.epochs, ) features_generator = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FEATURES_GENERATOR), choices=para.features_generator, ) num_iters = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_NUM_ITERS), low=para.num_iters, high=para.num_iters, ) search_parameter_level = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_SEARCH_PARAMETER_LEVEL), choices=para.search_parameter_level, ) return build.ChemPropHyperoptClassifier.new( ensemble_size=ensemble_size, epochs=epochs, features_generator=features_generator, num_iters=num_iters, search_parameter_level=search_parameter_level, ) elif isinstance(alg, opt.ChemPropRegressorPretrained): frzn = trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_FRZN), choices=para.frzn ) epochs = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CHEMPROP_EPOCHS), low=para.epochs.low, high=para.epochs.high, ) trial.set_user_attr( key=_CE.ALGORITHMS_CHEMPROP_PRETRAINED_MODEL, value=para.pretrained_model ) return build.ChemPropRegressorPretrained.new( epochs=epochs, frzn=frzn, pretrained_model=para.pretrained_model, ) elif isinstance(alg, opt.CalibratedClassifierCVWithVA): n_folds = trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CALIBRATEDCLASSIFIERCV_N_FOLDS), low=para.n_folds, high=para.n_folds, ) estimator = suggest_alg_params(trial, para.estimator) trial.set_user_attr( key=_CE.ALGORITHMS_CALIBRATEDCLASSIFIERCV_ESTIMATOR, value=serialize(estimator), ) calibrated_params = { _CE.ALGORITHMS_CALIBRATEDCLASSIFIERCV_ENSEMBLE: para.ensemble, _CE.ALGORITHMS_CALIBRATEDCLASSIFIERCV_METHOD: para.method, } trial.set_user_attr( key=_CE.ALGORITHMS_CALIBRATEDCLASSIFIERCV_PARAMS, value=calibrated_params ) return build.CalibratedClassifierCVWithVA.new( ensemble=para.ensemble, estimator=estimator, method=para.method, n_folds=n_folds, ) elif isinstance(alg, opt.Mapie): mapie_alpha = trial.suggest_float( name=_encode_name(_CE.ALGORITHMS_MAPIE_ALPHA), low=para.mapie_alpha, high=para.mapie_alpha, ) estimator = suggest_alg_params(trial, para.estimator) trial.set_user_attr( key=_CE.ALGORITHMS_MAPIE_ESTIMATOR, value=serialize(estimator) ) return build.Mapie.new( estimator=estimator, mapie_alpha=mapie_alpha, ) elif isinstance(alg, opt.CustomRegressionModel): trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CUSTOM_FILE), choices=[para.preexisting_model], ) trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CUSTOM_REFIT_MODEL), low=para.refit_model, high=para.refit_model, ) return build.CustomRegressionModel.new( preexisting_model=para.preexisting_model, refit_model=para.refit_model ) elif isinstance(alg, opt.CustomClassificationModel): trial.suggest_categorical( name=_encode_name(_CE.ALGORITHMS_CUSTOM_FILE), choices=[para.preexisting_model], ) trial.suggest_int( name=_encode_name(_CE.ALGORITHMS_CUSTOM_REFIT_MODEL), low=para.refit_model, high=para.refit_model, ) return build.CustomClassificationModel.new( preexisting_model=para.preexisting_model, refit_model=para.refit_model ) else: raise ValueError(f"Unrecognized algorithm: {alg.__class__}")
[docs]def suggest_aux_params(trial: FrozenTrial, desc: descriptors.AnyDescriptor): para = desc.parameters _encode_name = partial(encode_name, hash=trial.user_attrs["alg_hash"]) # SmilesAndSideInfoFromFile is the only descriptor currently supporting aux params if isinstance(desc, descriptors.SmilesAndSideInfoFromFile): return trial.suggest_int( name=_encode_name(_CE.DESCRIPTORS_SMILES_AND_SI_AUX_WEIGHT_PC), low=para.aux_weight_pc.low, high=para.aux_weight_pc.high, step=para.aux_weight_pc.q, )
# All other descriptors currently pass through
[docs]def check_invalid_descriptor_param(alg: build.AnyAlgorithm) -> list: # if calibration is performed then base_estimator should be compat if isinstance(alg, Union[build.Mapie, build.CalibratedClassifierCVWithVA]): alg = alg.parameters.estimator # chemprop should have only chemprop descriptors if opt.isanyof(alg, build.AnyChemPropAlgorithm): return descriptors.SmilesBasedDescriptor.__args__ # all others should have non-chemprop descriptors else: return descriptors.AnyChemPropIncompatible.__args__