Source code for hypertunity.reports.base

import abc
import datetime
import os
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import tinydb

from hypertunity.domain import Domain, Sample
from hypertunity.optimisation.base import EvaluationScore, HistoryPoint

__all__ = [
    "Reporter"
]

HistoryEntryType = Union[
    HistoryPoint,
    Tuple[Sample, Union[float, Dict[str, float], Dict[str, EvaluationScore]]]
]


[docs]class Reporter: """Abstract class :class:`Reporter` for result visualisation."""
[docs] def __init__(self, domain: Domain, metrics: List[str], primary_metric: str = "", database_path: str = None): """Initialise the base reporter with domain and metrics. Args: domain: A :class:`Domain` from which all evaluated samples are drawn. metrics: :obj:`List[str]` with names of the metrics used during evaluation. primary_metric: (optional) :obj:`str` primary metric from `metrics`. This is used to determine the best sample. Defaults to the first one. database_path: (optional) :obj:`str` path to the database for storing experiment history on disk. Defaults to in-memory storage. """ self.domain = domain if not metrics: self.metrics = ["score"] else: self.metrics = metrics if not primary_metric: self.primary_metric = self.metrics[0] else: self.primary_metric = primary_metric self._default_table_name = f"trial_{datetime.datetime.now().isoformat()}" if database_path is not None: if not os.path.exists(database_path): os.makedirs(database_path) db_path = os.path.join(database_path, "db.json") self._db = tinydb.TinyDB( db_path, sort_keys=True, indent=4, separators=(',', ': ') ) else: from tinydb.storages import MemoryStorage self._db = tinydb.TinyDB(storage=MemoryStorage, default_table=self._default_table_name) self._db_default_table = self._db.table(self._default_table_name)
@property def database(self): """Return the logging database.""" return self._db @property def default_database_table(self): """Return the default database table name.""" return self._default_table_name
[docs] def log(self, entry: HistoryEntryType, **kwargs: Any): """Create an entry for an optimisation history point in the :class:`Reporter`. Args: entry: :class:`HistoryPoint` or :obj:`Tuple[Sample, Dict]`. The history point to log. If given as a tuple of :class:`Sample` instance and a mapping from metric names to results, the variance of the evaluation noise can be supplied by adding an entry in the dict with the metric name and the suffix '_var'. **kwargs: (optional) :obj:`Any`. Additional arguments for the logging implementation in a subclass. Keyword Args: meta: (optional) additional information to be logged in the database for this entry. """ if isinstance(entry, Tuple): log_fn = self._log_tuple elif isinstance(entry, HistoryPoint): self._add_to_db(entry, kwargs.pop("meta", None)) log_fn = self._log_history_point else: raise TypeError( "The history point can be either a tuple or a " "`HistoryPoint` type object." ) log_fn(entry, **kwargs)
def _log_tuple(self, entry: Tuple, **kwargs): """Helper function to convert the history entry from tuple to :class:`HistoryPoint` and then log it using the overridden method :method:`_log_history_point`. """ if not (len(entry) == 2 and isinstance(entry[0], Sample) and isinstance(entry[1], (Dict, EvaluationScore, float))): raise ValueError(f"Malformed history entry tuple: {entry}.") sample, metrics_obj = entry if isinstance(metrics_obj, (float, EvaluationScore)): # use default name for score column metrics_obj = {self.primary_metric: metrics_obj} metrics = {} # create a properly formatted metrics dict of type Dict[str, EvaluationScore] for name, val in metrics_obj.items(): if name in metrics: continue if name.endswith("_var"): metric_name = name.rstrip("_var") if (metric_name not in metrics_obj or not isinstance(metrics_obj[metric_name], float)): raise ValueError( f"Metrics dict does not contain a proper value " f"for metric {metric_name}." ) metrics[metric_name] = EvaluationScore( value=metrics_obj[metric_name], variance=val ) elif isinstance(val, EvaluationScore): metrics[name] = val elif isinstance(val, float): metrics[name] = EvaluationScore( value=val, variance=metrics_obj.get(f"{name}_var", 0.0) ) entry = HistoryPoint(sample=sample, metrics=metrics) self._add_to_db(entry, kwargs.pop("meta", None)) self._log_history_point(entry, **kwargs) @abc.abstractmethod def _log_history_point(self, entry: HistoryPoint, **kwargs: Any): """Abstract method to override. Log the :class:`HistoryPoint` entry into the reporter. Args: entry: :class:`HistoryPoint`. The sample and evaluation metrics to log. """ raise NotImplementedError def _add_to_db(self, entry: HistoryPoint, meta: Any = None): document = self._convert_history_to_doc(entry) if meta is not None: document["meta"] = meta self._db_default_table.insert(document)
[docs] def get_best(self, criterion: Union[str, Callable] = "max") -> Optional[Dict[str, Any]]: """Return the entry from the database which corresponds to the best scoring experiment. Args: criterion: :obj:`str` or :obj:`Callable`. The function used to determine whether the highest or lowest score is requested. If several evaluation metrics are present, then a custom `criterion` must be supplied. Returns: JSON object or `None` if the database is empty. The content of the database for the best experiment. """ if not self._db_default_table: return None if isinstance(criterion, str): predefined = {"max": max, "min": min} if criterion not in predefined: raise ValueError( f"Unknown criterion for finding best experiment. " f"Select one from {list(predefined.keys())} " f"or supply a custom function." ) selection_fn = predefined[criterion] elif isinstance(criterion, Callable): selection_fn = criterion else: raise TypeError("The criterion must be of type str or Callable.") return self._get_best_from_db(selection_fn)
def _get_best_from_db(self, selection_fn: Callable): best_entry = self._db_default_table.get(doc_id=1) best_score = best_entry["metrics"][self.primary_metric]["value"] for entry in self._db_default_table: current_score = entry["metrics"][self.primary_metric]["value"] new_score = selection_fn(current_score, best_score) if new_score != best_score: best_entry = entry best_score = new_score return best_entry
[docs] def from_history(self, history: List[HistoryEntryType]): """Load the reporter with data from an entry of evaluations. Args: history: :obj:`List[HistoryPoint]` or :obj:`Tuple`. The sequence of evaluations comprised of samples and metrics. """ for h in history: self.log(h)
[docs] def from_database(self, database: Union[str, tinydb.TinyDB], table: str = None): """Load history from a database supplied as a path to a file or a :obj:`tinydb.TinyDB` object. Args: database: :obj:`str` or :obj:`tinydb.TinyDB`. The database to load. table: (optional) :obj:`str`. The table to load from the database. This argument is not required if the database has only one table. Raises: :class:`ValueError`: if the database contains more than one table and `table` is not given. """ if isinstance(database, str): db = tinydb.TinyDB(database, sort_keys=True, indent=4, separators=(',', ': ')) elif isinstance(database, tinydb.TinyDB): db = database else: raise TypeError("The database must be of type str or tinydb.TinyDB.") if len(db.tables()) > 1 and table is None: raise ValueError( "Ambiguous database with multiple tables. " "Specify a table name." ) if table is None: table = list(db.tables())[0] self._db = db self._db_default_table = self._db.table(table)
[docs] def to_history(self, table: str = None) -> List[HistoryPoint]: """Export the reporter logged history from a database table to an optimiser-friendly history. Args: table: (optional) :obj:`str`. The name of the table to export. Defaults to the one created during reporter initialisation. Returns: A list of :class:`HistoryPoint` objects which can be loaded into an :class:`Optimiser` instance. """ history = [] if table is None: default_table = self._db_default_table else: default_table = self._db.table(table) for doc in default_table: history.append(self._convert_doc_to_history(doc)) return history
@staticmethod def _convert_history_to_doc(entry: HistoryPoint) -> Dict: db_entry = { "sample": entry.sample.as_dict(), "metrics": {k: { "value": v.value, "variance": v.variance } for k, v in entry.metrics.items()} } return db_entry @staticmethod def _convert_doc_to_history(document: Dict) -> HistoryPoint: hist_point = HistoryPoint( sample=Sample(document["sample"]), metrics={k: EvaluationScore(v["value"], v["variance"]) for k, v in document["metrics"].items()} ) return hist_point