Source code for eradiate.test_tools.regression

import os
import typing as t
from abc import ABC, abstractmethod
from pathlib import Path

import attrs
import matplotlib.pyplot as plt
import mitsuba as mi
import numpy as np
import xarray as xr

from .. import converters
from ..attrs import documented, parse_docs
from ..exceptions import DataError
from ..typing import PathLike


def regression_test_plots(
    ref: np.typing.ArrayLike,
    result: np.typing.ArrayLike,
    vza: np.typing.ArrayLike,
    filename: PathLike,
    metric: t.Tuple[str, float],
) -> None:
    """
    Create regression test report plots.

    Parameters
    ----------
    ref : array-like
        BRF values for the reference data

    result : array-like
        BRF values for the simulation result

    vza : array-like
        VZA values for plotting

    filename : path-like
        Path to the output file for the plot

    metric : tuple
        A tuple of the form (metric name, value) to be added to the plots.
    """
    fig, axes = plt.subplots(2, 2, figsize=(8, 6))

    axes[0][0].plot(vza, ref, label="reference")
    axes[0][0].plot(vza, result, label="result")
    axes[0][0].set_title("Reference and test result")
    handles, labels = axes[0][0].get_legend_handles_labels()

    axes[1][0].plot(vza, result - ref)
    axes[1][0].set_xlabel("VZA [deg]")
    axes[1][0].set_ylabel("BRF in principal plane [-]")
    axes[1][0].set_title("Absolute difference")

    axes[1][1].plot(vza, (result - ref) / ref)
    axes[1][1].set_title("Relative difference")

    axes[0][1].set_axis_off()
    axes[0][1].legend(handles=handles, labels=labels, loc="upper center")
    axes[0][1].text(
        0.5, 0.5, f"{metric[0]}: {metric[1]:.4}", horizontalalignment="center"
    )

    plt.tight_layout()
    plt.savefig(filename)
    plt.close()


def reference_converter(value):
    """
    A converter for handling the reference data attribute.

    * if ``value`` is a string or path-like object, it attempts to load
      a dataset from that location or to serve it from the data store; if that
      fails, it returns ``None``;
    * if ``value`` is an xarray dataset or ``None``, it is returned directly;
    * otherwise, a ValueError is raised.
    """
    if value is None:
        return value

    try:
        return converters.load_dataset(value)

    except DataError:
        return None


[docs]@parse_docs @attrs.define class RegressionTest(ABC): """ Common interface for tests based on the comparison of a result array against reference values. """ # Name used for the reference metric. Must be set be subclasses. METRIC_NAME: t.ClassVar[t.Optional[str]] = None name: str = documented( attrs.field(validator=attrs.validators.instance_of(str)), doc="Test case name.", type="str", init_type="str", ) value: xr.Dataset = documented( attrs.field(validator=attrs.validators.instance_of(xr.Dataset)), doc="Simulation result. Must be specified as a dataset.", type=":class:`xarray.Dataset`", init_type=":class:`xarray.Dataset`", ) reference: t.Optional[xr.Dataset] = documented( attrs.field( default=None, converter=reference_converter, validator=attrs.validators.optional( attrs.validators.instance_of(xr.Dataset) ), ), doc="Reference data. Can be specified as an xarray dataset, a path to a " "NetCDF file or a path to a resource served by the data store.", type=":class:`xarray.Dataset` or None", init_type=":class:`xarray.Dataset` or path-like, optional", default="None", ) threshold: float = documented( attrs.field(kw_only=True), doc="Threshold for test evaluation", type="float", init_type="float", ) archive_dir: Path = documented( attrs.field(kw_only=True, converter=Path), doc="Path to output artefact storage directory. Relative paths are " "interpreted with respect to the current working directory.", type=":class:`pathlib.Path`", init_type="path-like", ) def __attrs_pre_init(self): if self.METRIC_NAME is None: raise TypeError(f"Unsupported test type {type(self).__name__}")
[docs] def run(self) -> bool: """ This method controls the execution steps of the regression test: * handle missing reference data * catch errors during text evaluation * create the appropriate plots and data archives Returns ------- bool Result of the test criterion comparison. """ fname = self.name ext = ".nc" archive_dir = os.path.abspath(self.archive_dir) # Absolute path where the reference will be stored fname_reference = os.path.join(archive_dir, fname + "-ref" + ext) # Absolute path where test output will be stored fname_result = os.path.join(archive_dir, fname + "-result" + ext) # if no valid reference is found, store the results as new ref and fail # the test if not self.reference: print("No reference data was found! Storing test results as reference.") self._archive(self.value, fname_reference) self._plot(reference_only=True, metric_value=None) return False # else (we have a reference value), evaluate the test metric try: passed, metric_value = self._evaluate() except Exception as e: print("An exception occurred during test execution!") self._plot(reference_only=False, metric_value=None) raise e # we got a metric: report the results in the archive directory self._archive(self.value, fname_result) self._archive(self.reference, fname_reference) self._plot(reference_only=False, metric_value=metric_value) return passed
@abstractmethod def _evaluate(self) -> t.Tuple[bool, float]: """ Evaluate the test results and perform a comparison to the reference based on the criterion defined in the specialised class. Returns ------- passed : bool ``True`` iff the test passed. metric_value : float The value of the test metric. """ pass def _archive(self, dataset: xr.Dataset, fname_output: PathLike) -> None: """ Create an archive file for test result and reference storage """ os.makedirs(os.path.dirname(fname_output), exist_ok=True) print(f"Saving dataset to {fname_output}") dataset.to_netcdf(fname_output) def _plot(self, metric_value: t.Optional[float], reference_only: bool) -> None: """ Create a plot to visualize the results of the test. If the ``reference only`` parameter is set, create only a simple plot visualizing the new reference data. Otherwise, create the more complex comparsion plots for the regression test. Parameters ---------- metric_value : float or None The numerical value of the test metric. reference_only : bool If ``True``, create only a simple visualisation of the computed data. """ vza = np.squeeze(self.value.vza.values) if "brf_srf" in self.value.data_vars: # Handle spectral results brf = np.squeeze(self.value.brf_srf.values) else: # Handle monochromatic results brf = np.squeeze(self.value.brf.values) fname = self.name ext = ".png" archive_dir = os.path.abspath(self.archive_dir) fname_plot = os.path.join(archive_dir, fname + ext) os.makedirs(os.path.dirname(fname_plot), exist_ok=True) print(f"Saving plot to {fname_plot}") if reference_only: plt.figure(figsize=(8, 6)) plt.plot(vza, brf) plt.xlabel("VZA [deg]") plt.ylabel("BRF in principal plane [-]") plt.title("Simulation result, can be used as new reference") plt.tight_layout() plt.savefig(fname_plot) plt.close() else: if "brf_srf" in self.value.data_vars: # Handle spectral results brf_ref = np.squeeze(self.reference.brf_srf.values) else: # Handle monochromatic results brf_ref = np.squeeze(self.reference.brf.values) regression_test_plots( brf_ref, brf, vza, fname_plot, (self.METRIC_NAME, metric_value) )
[docs]@parse_docs @attrs.define class RMSETest(RegressionTest): """ This class implements a simple test based on the root mean squared error (RMSE) of a result array with respect to the reference data. The test will pass if the computed root mean squared deviation between the result and reference is smaller or equal to the given threshold. """ METRIC_NAME = "rmse" def _evaluate(self) -> t.Tuple[bool, float]: value_np = self.value.brf.values ref_np = self.reference.brf.values if np.shape(value_np) != np.shape(ref_np): raise ValueError( f"Result and reference do not have the same shape! " f"Got: {np.shape(value_np)}, {np.shape(ref_np)}" ) result_flat = np.array(value_np).flatten() ref_flat = np.array(ref_np).flatten() rmse = np.linalg.norm(result_flat - ref_flat) / np.sqrt(len(ref_flat)) return rmse <= self.threshold, rmse
[docs]@parse_docs @attrs.define class Chi2Test(RegressionTest): """ This class implements a statistical test for the regression testing campaign, based on Pearson's Chi-squared test. https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test It determines the probability for the reference and the test result following the same distribution. This test will pass if the computed p-value is strictly larger than the given threshold. """ # The algorithm is adapted from Mitsuba's testing framework. METRIC_NAME = "p-value" def _evaluate(self) -> t.Tuple[bool, float]: ref_np = self.reference.brf.values result_np = self.value.brf.values histo_bins = np.linspace(ref_np.min(), ref_np.max(), 20) histo_ref = np.histogram(ref_np, histo_bins)[0] histo_res = np.histogram(result_np, histo_bins)[0] # sorting both histograms following the ascending frequencies in # the reference. Algorithm from: # https://stackoverflow.com/questions/9764298/how-to-sort-two-lists-which-reference-each-other-in-the-exact-same-way histo_ref_sorted, histo_res_sorted = zip( *sorted(zip(histo_ref, histo_res), key=lambda x: x[0]) ) from mitsuba.python.math import rlgamma chi2val, dof, pooled_in, pooled_out = mi.math.chi2( histo_res_sorted, histo_ref_sorted, 5 ) p_value = 1.0 - rlgamma(dof / 2.0, chi2val / 2.0) return p_value > self.threshold, p_value