Source code for eradiate.test_tools.regression

from __future__ import annotations

import os
import typing as t
from abc import ABC, abstractmethod
from pathlib import Path

import attrs
import matplotlib.pyplot as plt
import mitsuba as mi
import numpy as np
import xarray as xr

from .. import data
from ..attrs import documented, parse_docs
from ..exceptions import DataError
from ..typing import PathLike
from ..util.misc import summary_repr


def regression_test_plots(
    ref: np.typing.ArrayLike,
    result: np.typing.ArrayLike,
    vza: np.typing.ArrayLike,
    filename: PathLike,
    metric: tuple[str, float],
) -> None:
    """
    Create regression test report plots.

    Parameters
    ----------
    ref : array-like
        BRF values for the reference data

    result : array-like
        BRF values for the simulation result

    vza : array-like
        VZA values for plotting

    filename : path-like
        Path to the output file for the plot

    metric : tuple
        A tuple of the form (metric name, value) to be added to the plots.
    """
    fig, axes = plt.subplots(2, 2, figsize=(8, 6))

    axes[0][0].plot(vza, ref, label="reference")
    axes[0][0].plot(vza, result, label="result")
    axes[0][0].set_title("Reference and test result")
    handles, labels = axes[0][0].get_legend_handles_labels()

    axes[1][0].plot(vza, result - ref)
    axes[1][0].set_xlabel("VZA [deg]")
    axes[1][0].set_ylabel("BRF in principal plane [-]")
    axes[1][0].set_title("Absolute difference")

    axes[1][1].plot(vza, (result - ref) / ref)
    axes[1][1].set_title("Relative difference")

    axes[0][1].set_axis_off()
    axes[0][1].legend(handles=handles, labels=labels, loc="upper center")
    axes[0][1].text(
        0.5, 0.5, f"{metric[0]}: {metric[1]:.4}", horizontalalignment="center"
    )

    plt.tight_layout()
    plt.savefig(filename)
    plt.close()


def reference_converter(
    value: os.PathLike | xr.Dataset | None,
) -> xr.Dataset | None:
    """
    A converter for handling the reference data attribute.

    Parameters
    ----------
    value : path-like or Dataset or None
        Path to the reference dataset file or dataset identifier or Dataset or
        None.

    Raises
    ------
    ValueError
        If the reference data is not a valid dataset.

    Returns
    -------
    xr.Dataset or None
        The reference dataset.

    Notes
    -----
    If value is ``None``, the converter returns ``None``.
    If value is a path to a local file, load it as a dataset.
    If value is a path to a remote file, load it from the Eradiate data store.
    If value is a dataset, return it as is.
    If value is a path to a remote file but the data store raised a DataError,
    returns ``None``.
    """
    if value is None:
        return value

    try:
        if isinstance(value, (str, os.PathLike, bytes)):
            # Try to open a file if it is directly referenced
            if os.path.isfile(value):
                return xr.load_dataset(value)

            # Try to serve the file from the data store
            return data.load_dataset(value)

        elif isinstance(value, xr.Dataset):
            return value

        else:
            raise ValueError(
                "Reference must be provided as a Dataset or a file path. "
                f"Got {type(value).__name__}"
            )

    except DataError:
        return None


[docs] @parse_docs @attrs.define class RegressionTest(ABC): """ Common interface for tests based on the comparison of a result array against reference values. """ # Name used for the reference metric. Must be set be subclasses. METRIC_NAME: t.ClassVar[str | None] = None name: str = documented( attrs.field(validator=attrs.validators.instance_of(str)), doc="Test case name.", type="str", init_type="str", ) value: xr.Dataset = documented( attrs.field( validator=attrs.validators.instance_of(xr.Dataset), repr=summary_repr, ), doc="Simulation result. Must be specified as a dataset.", type=":class:`xarray.Dataset`", init_type=":class:`xarray.Dataset`", ) reference: xr.Dataset | None = documented( attrs.field( default=None, converter=reference_converter, validator=attrs.validators.optional( attrs.validators.instance_of(xr.Dataset) ), ), doc="Reference data. Can be specified as an xarray dataset, a path to a " "NetCDF file or a path to a resource served by the data store.", type=":class:`xarray.Dataset` or None", init_type=":class:`xarray.Dataset` or path-like, optional", default="None", ) threshold: float = documented( attrs.field(kw_only=True), doc="Threshold for test evaluation", type="float", init_type="float", ) archive_dir: Path = documented( attrs.field(kw_only=True, converter=Path), doc="Path to output artefact storage directory. Relative paths are " "interpreted with respect to the current working directory.", type=":class:`pathlib.Path`", init_type="path-like", ) def __attrs_pre_init(self): if self.METRIC_NAME is None: raise TypeError(f"Unsupported test type {type(self).__name__}")
[docs] def run(self) -> bool: """ This method controls the execution steps of the regression test: * handle missing reference data * catch errors during text evaluation * create the appropriate plots and data archives Returns ------- bool Result of the test criterion comparison. """ fname = self.name ext = ".nc" archive_dir = os.path.abspath(self.archive_dir) # Absolute path where the reference will be stored fname_reference = os.path.join(archive_dir, fname + "-ref" + ext) # Absolute path where test output will be stored fname_result = os.path.join(archive_dir, fname + "-result" + ext) # if no valid reference is found, store the results as new ref and fail # the test if not self.reference: print("No reference data was found! Storing test results as reference.") self._archive(self.value, fname_reference) self._plot(reference_only=True, metric_value=None) return False # else (we have a reference value), evaluate the test metric try: passed, metric_value = self._evaluate() except Exception as e: print("An exception occurred during test execution!") self._plot(reference_only=False, metric_value=None) raise e # we got a metric: report the results in the archive directory self._archive(self.value, fname_result) self._archive(self.reference, fname_reference) self._plot(reference_only=False, metric_value=metric_value) return passed
@abstractmethod def _evaluate(self) -> tuple[bool, float]: """ Evaluate the test results and perform a comparison to the reference based on the criterion defined in the specialized class. Returns ------- passed : bool ``True`` iff the test passed. metric_value : float The value of the test metric. """ pass def _archive(self, dataset: xr.Dataset, fname_output: PathLike) -> None: """ Create an archive file for test result and reference storage """ os.makedirs(os.path.dirname(fname_output), exist_ok=True) print(f"Saving dataset to {fname_output}") dataset.to_netcdf(fname_output) def _plot(self, metric_value: float | None, reference_only: bool) -> None: """ Create a plot to visualize the results of the test. If the ``reference only`` parameter is set, create only a simple plot visualizing the new reference data. Otherwise, create the more complex comparsion plots for the regression test. Parameters ---------- metric_value : float or None The numerical value of the test metric. reference_only : bool If ``True``, create only a simple visualization of the computed data. """ vza = np.squeeze(self.value.vza.values) if "brf_srf" in self.value.data_vars: # Handle spectral results brf = np.squeeze(self.value.brf_srf.values) else: # Handle monochromatic results brf = np.squeeze(self.value.brf.values) fname = self.name ext = ".png" archive_dir = os.path.abspath(self.archive_dir) fname_plot = os.path.join(archive_dir, fname + ext) os.makedirs(os.path.dirname(fname_plot), exist_ok=True) print(f"Saving plot to {fname_plot}") if reference_only: plt.figure(figsize=(8, 6)) plt.plot(vza, brf) plt.xlabel("VZA [deg]") plt.ylabel("BRF in principal plane [-]") plt.title("Simulation result, can be used as new reference") plt.tight_layout() plt.savefig(fname_plot) plt.close() else: if "brf_srf" in self.value.data_vars: # Handle spectral results brf_ref = np.squeeze(self.reference.brf_srf.values) else: # Handle monochromatic results brf_ref = np.squeeze(self.reference.brf.values) regression_test_plots( brf_ref, brf, vza, fname_plot, (self.METRIC_NAME, metric_value) )
[docs] @parse_docs @attrs.define class RMSETest(RegressionTest): """ This class implements a simple test based on the root mean squared error (RMSE) of a result array with respect to the reference data. The test will pass if the computed root mean squared deviation between the result and reference is smaller or equal to the given threshold. """ METRIC_NAME = "rmse" def _evaluate(self) -> tuple[bool, float]: value_np = self.value.brf.values ref_np = self.reference.brf.values if np.shape(value_np) != np.shape(ref_np): raise ValueError( f"Result and reference do not have the same shape! " f"Got: {np.shape(value_np)}, {np.shape(ref_np)}" ) result_flat = np.array(value_np).flatten() ref_flat = np.array(ref_np).flatten() rmse = np.linalg.norm(result_flat - ref_flat) / np.sqrt(len(ref_flat)) return rmse <= self.threshold, rmse
[docs] @parse_docs @attrs.define class Chi2Test(RegressionTest): """ This class implements a statistical test for the regression testing campaign, based on Pearson's Chi-squared test. https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test It determines the probability for the reference and the test result following the same distribution. This test will pass if the computed p-value is strictly larger than the given threshold. """ # The algorithm is adapted from Mitsuba's testing framework. METRIC_NAME = "p-value" def _evaluate(self) -> tuple[bool, float]: ref_np = self.reference.brf.values result_np = self.value.brf.values histo_bins = np.linspace(ref_np.min(), ref_np.max(), 20) histo_ref = np.histogram(ref_np, histo_bins)[0] histo_res = np.histogram(result_np, histo_bins)[0] # sorting both histograms following the ascending frequencies in # the reference. Algorithm from: # https://stackoverflow.com/questions/9764298/how-to-sort-two-lists-which-reference-each-other-in-the-exact-same-way histo_ref_sorted, histo_res_sorted = zip( *sorted(zip(histo_ref, histo_res), key=lambda x: x[0]) ) from mitsuba.python.math import rlgamma chi2val, dof, pooled_in, pooled_out = mi.math.chi2( histo_res_sorted, histo_ref_sorted, 5 ) p_value = 1.0 - rlgamma(dof / 2.0, chi2val / 2.0) return p_value > self.threshold, p_value