Source code for optunaz.utils

import dataclasses
import json
from typing import Any, Dict
import numpy as np
from pathlib import Path
import pandas as pd
import hashlib


[docs]def mkdict(obj: Any) -> Dict: # To recursively convert nested dataclasses to dict, use json machinery. # https://stackoverflow.com/a/51286749 class EnhancedJSONEncoder(json.JSONEncoder): def default(self, o): if dataclasses.is_dataclass(o): return dataclasses.asdict(o) return super().default(o) objstr = json.dumps(obj, cls=EnhancedJSONEncoder) objdict = json.loads(objstr) return objdict
[docs]def load_df_from_file(filename: str, smiles_col: str): file_format = Path(filename).suffix if file_format == ".csv": try: return pd.read_csv(filename, skipinitialspace=True, low_memory=False) except UnicodeDecodeError: return pd.read_csv( filename, skipinitialspace=True, low_memory=False, encoding="latin" ) elif file_format == ".sdf": # Import here so as not to "spill" dependencies into pickled files from rdkit.Chem import PandasTools return PandasTools.LoadSDF(filename, smilesName=smiles_col, isomericSmiles=True) else: raise ValueError(f"Unsupported format for data: {file_format}.")
[docs]def remove_failed_idx(failed_idx, y_, smis, auxs) -> tuple[Any, Any, Any]: y_ = np.array([val for y_idx, val in enumerate(y_) if y_idx not in failed_idx]) smis = np.array([smi for s_idx, smi in enumerate(smis) if s_idx not in failed_idx]) if auxs is not None: auxs = np.array( [aux for s_idx, aux in enumerate(auxs) if s_idx not in failed_idx], dtype=float, ) if len(auxs.shape) == 1: auxs = auxs.reshape(len(auxs), 1) else: auxs = None return y_, smis, auxs
[docs]def md5_hash(dictionary: Dict[str, Any]) -> str: """MD5 hash of an optimisation algorithm or a model metadata dictionary""" dhash = hashlib.md5() encoded = json.dumps(dictionary, sort_keys=True).encode() dhash.update(encoded) return dhash.hexdigest()