Source code for hypertunity.reports.base

import abc
import datetime
import os
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import tinydb

from hypertunity.domain import Domain, Sample
from hypertunity.optimisation.base import EvaluationScore, HistoryPoint

__all__ = [
    "Reporter"
]

HistoryEntryType = Union[
    HistoryPoint,
    Tuple[Sample, Union[float, Dict[str, float], Dict[str, EvaluationScore]]]
]


[docs]class Reporter:
    """Abstract class :class:`Reporter` for result visualisation."""

[docs]    def __init__(self, domain: Domain,
                 metrics: List[str],
                 primary_metric: str = "",
                 database_path: str = None):
        """Initialise the base reporter with domain and metrics.

        Args:
            domain: A :class:`Domain` from which all evaluated samples are drawn.
            metrics: :obj:`List[str]` with names of the metrics used during
                evaluation.
            primary_metric: (optional) :obj:`str` primary metric from `metrics`.
                This is used to determine the best sample. Defaults to the first one.
            database_path: (optional) :obj:`str` path to the database for
                storing experiment history on disk. Defaults to in-memory storage.
        """
        self.domain = domain
        if not metrics:
            self.metrics = ["score"]
        else:
            self.metrics = metrics
        if not primary_metric:
            self.primary_metric = self.metrics[0]
        else:
            self.primary_metric = primary_metric

        self._default_table_name = f"trial_{datetime.datetime.now().isoformat()}"
        if database_path is not None:
            if not os.path.exists(database_path):
                os.makedirs(database_path)
            db_path = os.path.join(database_path, "db.json")
            self._db = tinydb.TinyDB(
                db_path,
                sort_keys=True,
                indent=4,
                separators=(',', ': ')
            )
        else:
            from tinydb.storages import MemoryStorage
            self._db = tinydb.TinyDB(storage=MemoryStorage,
                                     default_table=self._default_table_name)
        self._db_default_table = self._db.table(self._default_table_name)

    @property
    def database(self):
        """Return the logging database."""
        return self._db

    @property
    def default_database_table(self):
        """Return the default database table name."""
        return self._default_table_name

[docs]    def log(self, entry: HistoryEntryType, **kwargs: Any):
        """Create an entry for an optimisation history point in the
        :class:`Reporter`.

        Args:
            entry: :class:`HistoryPoint` or :obj:`Tuple[Sample, Dict]`.
                The history point to log. If given as a tuple of :class:`Sample`
                instance and a mapping from metric names to results, the
                variance of the evaluation noise can be supplied by adding
                an entry in the dict with the metric name and the suffix '_var'.
            **kwargs: (optional) :obj:`Any`. Additional arguments for the
                logging implementation in a subclass.

        Keyword Args:
            meta: (optional) additional information to be logged in the database
                for this entry.
        """
        if isinstance(entry, Tuple):
            log_fn = self._log_tuple
        elif isinstance(entry, HistoryPoint):
            self._add_to_db(entry, kwargs.pop("meta", None))
            log_fn = self._log_history_point
        else:
            raise TypeError(
                "The history point can be either a tuple or a "
                "`HistoryPoint` type object."
            )
        log_fn(entry, **kwargs)

    def _log_tuple(self, entry: Tuple, **kwargs):
        """Helper function to convert the history entry from tuple to
        :class:`HistoryPoint` and then log it using the overridden method
        :method:`_log_history_point`.
        """
        if not (len(entry) == 2 and isinstance(entry[0], Sample)
                and isinstance(entry[1], (Dict, EvaluationScore, float))):
            raise ValueError(f"Malformed history entry tuple: {entry}.")
        sample, metrics_obj = entry
        if isinstance(metrics_obj, (float, EvaluationScore)):
            # use default name for score column
            metrics_obj = {self.primary_metric: metrics_obj}
        metrics = {}
        # create a properly formatted metrics dict of type Dict[str, EvaluationScore]
        for name, val in metrics_obj.items():
            if name in metrics:
                continue
            if name.endswith("_var"):
                metric_name = name.rstrip("_var")
                if (metric_name not in metrics_obj
                        or not isinstance(metrics_obj[metric_name], float)):
                    raise ValueError(
                        f"Metrics dict does not contain a proper value "
                        f"for metric {metric_name}."
                    )
                metrics[metric_name] = EvaluationScore(
                    value=metrics_obj[metric_name],
                    variance=val
                )
            elif isinstance(val, EvaluationScore):
                metrics[name] = val
            elif isinstance(val, float):
                metrics[name] = EvaluationScore(
                    value=val,
                    variance=metrics_obj.get(f"{name}_var", 0.0)
                )
        entry = HistoryPoint(sample=sample, metrics=metrics)
        self._add_to_db(entry, kwargs.pop("meta", None))
        self._log_history_point(entry, **kwargs)

    @abc.abstractmethod
    def _log_history_point(self, entry: HistoryPoint, **kwargs: Any):
        """Abstract method to override.

        Log the :class:`HistoryPoint` entry into the reporter.

        Args:
            entry: :class:`HistoryPoint`. The sample and evaluation metrics to log.
        """
        raise NotImplementedError

    def _add_to_db(self, entry: HistoryPoint, meta: Any = None):
        document = self._convert_history_to_doc(entry)
        if meta is not None:
            document["meta"] = meta
        self._db_default_table.insert(document)

[docs]    def get_best(self, criterion: Union[str, Callable] = "max") -> Optional[Dict[str, Any]]:
        """Return the entry from the database which corresponds to the best
        scoring experiment.

        Args:
            criterion: :obj:`str` or :obj:`Callable`. The function used to
                determine whether the highest or lowest score is requested. If
                several evaluation metrics are present, then a custom `criterion`
                must be supplied.

        Returns:
            JSON object or `None` if the database is empty. The content of the
            database for the best experiment.
        """
        if not self._db_default_table:
            return None
        if isinstance(criterion, str):
            predefined = {"max": max, "min": min}
            if criterion not in predefined:
                raise ValueError(
                    f"Unknown criterion for finding best experiment. "
                    f"Select one from {list(predefined.keys())} "
                    f"or supply a custom function."
                )
            selection_fn = predefined[criterion]
        elif isinstance(criterion, Callable):
            selection_fn = criterion
        else:
            raise TypeError("The criterion must be of type str or Callable.")
        return self._get_best_from_db(selection_fn)

    def _get_best_from_db(self, selection_fn: Callable):
        best_entry = self._db_default_table.get(doc_id=1)
        best_score = best_entry["metrics"][self.primary_metric]["value"]
        for entry in self._db_default_table:
            current_score = entry["metrics"][self.primary_metric]["value"]
            new_score = selection_fn(current_score, best_score)
            if new_score != best_score:
                best_entry = entry
                best_score = new_score
        return best_entry

[docs]    def from_history(self, history: List[HistoryEntryType]):
        """Load the reporter with data from an entry of evaluations.

        Args:
            history: :obj:`List[HistoryPoint]` or :obj:`Tuple`. The sequence of
                evaluations comprised of samples and metrics.
        """
        for h in history:
            self.log(h)

[docs]    def from_database(self, database: Union[str, tinydb.TinyDB], table: str = None):
        """Load history from a database supplied as a path to a file or a
        :obj:`tinydb.TinyDB` object.

        Args:
            database: :obj:`str` or :obj:`tinydb.TinyDB`. The database to load.
            table: (optional) :obj:`str`. The table to load from the database.
                This argument is not required if the database has only one table.

        Raises:
            :class:`ValueError`: if the database contains more than one table
                and `table` is not given.
        """
        if isinstance(database, str):
            db = tinydb.TinyDB(database, sort_keys=True, indent=4, separators=(',', ': '))
        elif isinstance(database, tinydb.TinyDB):
            db = database
        else:
            raise TypeError("The database must be of type str or tinydb.TinyDB.")
        if len(db.tables()) > 1 and table is None:
            raise ValueError(
                "Ambiguous database with multiple tables. "
                "Specify a table name."
            )
        if table is None:
            table = list(db.tables())[0]
        self._db = db
        self._db_default_table = self._db.table(table)

[docs]    def to_history(self, table: str = None) -> List[HistoryPoint]:
        """Export the reporter logged history from a database table to an
        optimiser-friendly history.

        Args:
            table: (optional) :obj:`str`. The name of the table to export.
                Defaults to the one created during reporter initialisation.

        Returns:
            A list of :class:`HistoryPoint` objects which can be loaded into
            an :class:`Optimiser` instance.
        """
        history = []
        if table is None:
            default_table = self._db_default_table
        else:
            default_table = self._db.table(table)
        for doc in default_table:
            history.append(self._convert_doc_to_history(doc))
        return history

    @staticmethod
    def _convert_history_to_doc(entry: HistoryPoint) -> Dict:
        db_entry = {
            "sample": entry.sample.as_dict(),
            "metrics": {k: {
                "value": v.value,
                "variance": v.variance
            } for k, v in entry.metrics.items()}
        }
        return db_entry

    @staticmethod
    def _convert_doc_to_history(document: Dict) -> HistoryPoint:
        hist_point = HistoryPoint(
            sample=Sample(document["sample"]),
            metrics={k: EvaluationScore(v["value"], v["variance"])
                     for k, v in document["metrics"].items()}
        )
        return hist_point