import os
import optuna
import warnings
from optuna import storages
from copy import deepcopy
from optuna.trial import TrialState, FrozenTrial
from optunaz.config.optconfig import OptimizationConfig
from optunaz.utils import mkdict
from optunaz.utils.enums.optimization_configuration_enum import (
OptimizationConfigurationEnum,
)
from optunaz.utils.enums.visualization_enum import VisualizationEnum
[docs]class Visualizer:
"""Class to visualize various aspects of the optimization / building process."""
def __init__(self):
# initialize Enums
self._OE = OptimizationConfigurationEnum()
self._VE = VisualizationEnum()
[docs] def plot_by_configuration(self, conf: OptimizationConfig, study: optuna.Study):
vis_dict = mkdict(conf)[self._VE.VISUALIZATION]
output_folder = vis_dict[self._VE.VISUALIZATION_OUTPUT_FOLDER]
self._make_folder(output_folder)
if (
self._VE.VISUALIZATION_PLOTS_HISTORY
in vis_dict[self._VE.VISUALIZATION_PLOTS].keys()
and vis_dict[self._VE.VISUALIZATION_PLOTS][
self._VE.VISUALIZATION_PLOTS_HISTORY
]
is True
):
file_path = os.path.join(
output_folder,
".".join(["history", vis_dict[self._VE.VISUALIZATION_FILE_FORMAT]]),
)
self.plot_history(file_path=file_path, study=study)
if (
self._VE.VISUALIZATION_PLOTS_CONTOUR
in vis_dict[self._VE.VISUALIZATION_PLOTS].keys()
and vis_dict[self._VE.VISUALIZATION_PLOTS][
self._VE.VISUALIZATION_PLOTS_CONTOUR
]
is True
):
contour_folder = os.path.join(output_folder, "contour")
self._make_folder(contour_folder)
self.plot_contour(
folder_path=contour_folder,
study=study,
file_format=vis_dict[self._VE.VISUALIZATION_FILE_FORMAT],
)
if (
self._VE.VISUALIZATION_PLOTS_PARALLEL_COORDINATE
in vis_dict[self._VE.VISUALIZATION_PLOTS].keys()
and vis_dict[self._VE.VISUALIZATION_PLOTS][
self._VE.VISUALIZATION_PLOTS_PARALLEL_COORDINATE
]
is True
):
para_coord_folder = os.path.join(output_folder, "parallel_coordinates")
self._make_folder(para_coord_folder)
self.plot_parallel_coordinate(
folder_path=para_coord_folder,
study=study,
file_format=vis_dict[self._VE.VISUALIZATION_FILE_FORMAT],
)
if (
self._VE.VISUALIZATION_PLOTS_SLICE
in vis_dict[self._VE.VISUALIZATION_PLOTS].keys()
and vis_dict[self._VE.VISUALIZATION_PLOTS][
self._VE.VISUALIZATION_PLOTS_SLICE
]
is True
):
slice_folder = os.path.join(output_folder, "slice")
self._make_folder(slice_folder)
self.plot_slice(
folder_path=slice_folder,
study=study,
file_format=vis_dict[self._VE.VISUALIZATION_FILE_FORMAT],
)
[docs] def plot_slice(self, folder_path: str, study: optuna.Study, file_format="png"):
# formats "png" and "jpeg" are handled inside the "write_image()" function of "plotly" / "orca"
try:
studies_list = self._split_study_by_algorithm(study=study)
for sub_study in studies_list:
file_path = os.path.join(
folder_path, "".join([sub_study.study_name, ".", file_format])
)
fig = optuna.visualization._get_slice_plot(study=sub_study)
fig.update_layout(title_text=sub_study.study_name)
fig.write_image(file_path, scale=3.25, width=None, height=None)
except:
warnings.warn("Orca could not find an X11 interface, plotting disabled.")
[docs] def plot_parallel_coordinate(
self, folder_path: str, study: optuna.Study, file_format="png"
):
try:
studies_list = self._split_study_by_algorithm(study=study)
for sub_study in studies_list:
file_path = os.path.join(
folder_path, "".join([sub_study.study_name, ".", file_format])
)
fig = optuna.visualization._get_parallel_coordinate_plot(
study=sub_study
)
fig.update_layout(title_text=sub_study.study_name)
fig.write_image(file_path, scale=6.75, width=None, height=None)
except:
warnings.warn("Orca could not find an X11 interface, plotting disabled.")
[docs] def plot_contour(self, folder_path: str, study: optuna.Study, file_format="png"):
try:
studies_list = self._split_study_by_algorithm(study=study)
for sub_study in studies_list:
# as this is a two-dimensional plot, disable it for all algorithms that have less than 2 hyperparameters
# note, that "study_type" has been removed by "_split_study_by_algorithm()", so only 'real' hyperparameters
# remain at this stage
if len(sub_study.trials[0].params) < 2:
continue
file_path = os.path.join(
folder_path, "".join([sub_study.study_name, ".", file_format])
)
fig = optuna.visualization._get_contour_plot(study=sub_study)
fig.update_layout(title_text=sub_study.study_name)
fig.write_image(file_path, scale=6.75, width=None, height=None)
except:
warnings.warn("Orca could not find an X11 interface, plotting disabled.")
[docs] @staticmethod
def plot_history(file_path: str, study: optuna.Study):
try:
fig = optuna.visualization._get_optimization_history_plot(study=study)
fig.write_image(file_path, scale=3.25, width=None, height=None)
except:
warnings.warn("Orca could not find an X11 interface, plotting disabled.")
def _split_study_by_algorithm(self, study: optuna.Study) -> list:
# the general idea is to make a copy of the "Study" object and remove all trials that do not belong to a given
# algorithm, i.e. return a list of "Study" objects, one for each algorithm used
# note, that internally "optuna" only uses the trials and optimization direction to do the plots
studies_list = []
# 1) get whether it is a regression or classification and, since algorithms are just another hyperparameter in
# "Optuna_AZ", build a list of the algorithms used
study_type = self._get_study_type(study=study)
names_algorithms = list(
dict.fromkeys([trial.params[study_type] for trial in study.trials])
)
# 2) loop over algorithms and remove trials that are not using the current algorithm; also set "best" attributes
# to "None" to avoid undesirable side-effects
for algorithm in names_algorithms:
# a unique study name is necessary for internal reasons
storage = storages.InMemoryStorage()
# remove the algorithm as "hyperparameter" and renumber the trials to make sure they are plotted properly
trials = [
trial
for trial in deepcopy(study.trials)
if trial.params[study_type] == algorithm
and trial.state == TrialState.COMPLETE
]
if len(trials) == 0:
continue
for number, trial in enumerate(trials):
del trial.params[study_type]
del trial.distributions[study_type]
trial_updated = FrozenTrial(
number=number,
state=TrialState.COMPLETE,
value=trial.value,
datetime_start=trial.datetime_start,
datetime_complete=trial.datetime_complete,
params=trial.params,
distributions=trial.distributions,
user_attrs=trial.user_attrs,
system_attrs=trial.system_attrs,
intermediate_values=trial.intermediate_values,
trial_id=number,
)
trials[number] = trial_updated
storage.trials = trials
storage.study_name = algorithm
sub_study = optuna.Study(study_name=algorithm, storage=storage)
studies_list.append(sub_study)
return studies_list
def _get_study_type(self, study: optuna.Study) -> str:
if self._VE.VISUALIZATION_REGRESSOR in study.best_trial.distributions.keys():
return self._VE.VISUALIZATION_REGRESSOR
elif self._VE.VISUALIZATION_CLASSIFIER in study.best_trial.distributions.keys():
return self._VE.VISUALIZATION_CLASSIFIER
else:
raise AttributeError("Study must be either classification or regression.")
@staticmethod
def _make_folder(path):
# make sure, the output folder for the plots exists; not that this will only work if the
# last directory is missing
if not os.path.exists(path):
os.mkdir(path)