from __future__ import annotations
import os
from abc import ABC, abstractmethod
from io import StringIO
from pathlib import Path
from typing import TYPE_CHECKING, ClassVar
import attrs
import matplotlib.pyplot as plt
import mitsuba as mi
import numpy as np
import scipy.stats as spstats
import xarray as xr
from numpy.typing import ArrayLike
from robot.api import logger
from ..attrs import define, documented
from ..typing import PathLike
from ..util.misc import summary_repr
if TYPE_CHECKING:
from matplotlib.axes import Axes
from matplotlib.figure import Figure
def regression_test_plots(
ref: ArrayLike,
result: ArrayLike,
vza: ArrayLike,
metric: tuple[str, float],
ref_var: ArrayLike | None = None,
result_var: ArrayLike | None = None,
xlabel: str | None = None,
ylabel: str | None = None,
) -> tuple[Figure, list[list[Axes]]]:
"""
Create regression test report plots. Plot errorbars if both ref_var and
result_var are set.
Parameters
----------
ref : array-like
Variable values for the reference data
result : array-like
Variable values for the simulation result
vza : array-like
VZA values for plotting
metric : tuple
A tuple of the form (metric name, value) to be added to the plots.
ref_var : array-like, optional
Variable variance for the reference data.
result_var : array-like, optional
Variable variance for the simulation result.
xlabel, ylabel : str or None
Labels applied to the x and y axes of the plot.
Returns
-------
figure: Figure
Pyplot Figure containing the report charts
axes: list
2x2 array of Axes included in the report Figure
"""
fig, axes = plt.subplots(2, 2, figsize=(8, 6), layout="constrained")
ax = axes[0][0]
if ref_var is None:
ax.plot(vza, ref, label="reference")
else:
ax.errorbar(vza, ref, yerr=np.sqrt(ref_var), label="reference")
if result_var is None:
ax.plot(vza, result, label="result")
else:
ax.errorbar(vza, result, yerr=np.sqrt(result_var), label="result")
ax.set_title("Reference and test result")
handles, labels = ax.get_legend_handles_labels()
ax = axes[1][0]
ax.plot(vza, result - ref)
ax.set_title("Absolute difference")
ax = axes[1][1]
ax.plot(vza, (result - ref) / ref)
ax.set_title("Relative difference")
ax = axes[0][1]
ax.set_axis_off()
ax.legend(handles=handles, labels=labels, loc="upper center")
if metric[1] is None:
ax.text(
0.5,
0.5,
f'Metric "{metric[0]}" is not available',
horizontalalignment="center",
)
else:
ax.text(0.5, 0.5, f"{metric[0]}: {metric[1]:.4}", horizontalalignment="center")
for i, j in [[0, 0], [1, 0], [1, 1]]:
ax = axes[i][j]
if xlabel is not None:
ax.set_xlabel(xlabel)
if ylabel is not None:
ax.set_ylabel(ylabel)
return fig, axes
def figure_to_html(fig: plt.Figure) -> str:
"""
Render a figure in HTML format
Returns a string containing the rendered HTML. The root tag is a <svg> one.
Parameters
----------
fig : plt.Figure
Matplotlib figure to render in HTML.
Returns
-------
str
Rendered HTML <svg> tag with styling.
"""
str_i = StringIO()
fig.savefig(str_i, format="svg", transparent=True, bbox_inches="tight")
fig.canvas.draw_idle()
svg = str_i.getvalue()
# Include some CSS in the SVG to render nicely in Robot report's dark and
# light modes
return "\n".join(
[
"<svg",
'version="1.1"',
'baseProfile="full"',
'width="810" height="540" viewBox="0 0 810 540"'
'xmlns="http://www.w3.org/2000/svg">',
"<style>",
" path {",
" fill: var(--text-color);",
" stroke: var(--text-color);",
" }",
"</style>",
svg,
"</svg>",
]
)
def reference_converter(value: PathLike | xr.Dataset | None) -> xr.Dataset | None:
"""
A converter for handling the reference data attribute.
Parameters
----------
value : path-like or Dataset or None
Path to the reference dataset file or dataset identifier or Dataset or
None.
Returns
-------
xr.Dataset or None
The reference dataset.
Raises
------
ValueError
If the reference data is not a valid dataset.
Notes
-----
The ``value`` argument is processed as follows:
* ``None`` and datasets are passed through.
* If ``value`` is a path, resolve it with the path resolver. If the path
points to a non-existing location, return ``None``. Otherwise, try to load
it as a Dataset.
Anything else raises a :class:`ValueError`.
"""
if value is None:
return value
if isinstance(value, xr.Dataset):
return value
if isinstance(value, (str, os.PathLike, bytes)):
logger.info(f'Looking up "{str(value)}" on disk', also_console=True)
from .. import fresolver
fname = fresolver.resolve(value)
logger.info(f"Resolved path: {fname}", also_console=True)
if not fname.exists():
return None
return xr.load_dataset(fname)
raise ValueError(
"Reference must be provided as a Dataset, a file path or None. "
f"Got a {type(value).__name__}"
)
[docs]
@define
class RegressionTest(ABC):
"""
Common interface for tests based on the comparison of a result array against
reference values.
"""
# Name used for the reference metric. Must be set be subclasses.
METRIC_NAME: ClassVar[str | None] = None
name: str = documented(
attrs.field(validator=attrs.validators.instance_of(str)),
doc="Test case name.",
type="str",
init_type="str",
)
value: xr.Dataset = documented(
attrs.field(
validator=attrs.validators.instance_of(xr.Dataset),
repr=summary_repr,
),
doc="Simulation result. Must be specified as a dataset.",
type=":class:`xarray.Dataset`",
init_type=":class:`xarray.Dataset`",
)
reference: xr.Dataset | None = documented(
attrs.field(
default=None,
converter=reference_converter,
validator=attrs.validators.optional(
attrs.validators.instance_of(xr.Dataset)
),
),
doc="Reference data. Can be specified as an xarray dataset, a path to a "
"NetCDF file or a path to a resource.",
type=":class:`xarray.Dataset` or None",
init_type=":class:`xarray.Dataset` or path-like, optional",
default="None",
)
variable: str = documented(
attrs.field(kw_only=True, default="brf_srf"),
doc="Tested variable",
type="str",
init_type="str",
default="brf_srf",
)
threshold: float = documented(
attrs.field(kw_only=True),
doc="Test metric threshold",
type="float",
init_type="float",
)
archive_dir: Path = documented(
attrs.field(kw_only=True, converter=lambda x: Path(x).resolve()),
doc="Path to output artefact storage directory. Relative paths are "
"interpreted with respect to the current working directory.",
type=":class:`pathlib.Path`",
init_type="path-like",
)
plot: bool = documented(
attrs.field(kw_only=True, converter=bool),
doc="Enable pyplot charts",
type="bool",
init_type="bool",
)
def __attrs_pre_init__(self):
if self.METRIC_NAME is None:
raise TypeError(f"Unsupported test type {type(self).__name__}")
def __attrs_post_init__(self):
if self.plot:
if (
"w" in self.reference[self.variable]
and self.reference[self.variable].w.size > 1
):
raise ValueError(
"Regression charts are implemented for single a wavelength, "
"but the reference dataset has a spectral dimension. "
"Please disable the 'plot' option."
)
if (
"w" in self.value[self.variable]
and self.value[self.variable].w.size > 1
):
raise ValueError(
"Regression charts are implemented for single a wavelength, "
"but the tested dataset has a spectral dimension. "
"Please disable the 'plot' option."
)
[docs]
def run(self, diagnostic=False) -> bool:
"""
This method controls the execution steps of the regression test:
* handle missing reference data
* catch errors during text evaluation
* create the appropriate plots and data archives
Returns
-------
bool
Result of the test criterion comparison.
"""
logger.info(f"Regression test {self.name} results:", also_console=True)
fname = self.name
ext = ".nc"
archive_dir = self.archive_dir
fname_reference = archive_dir / f"{fname}-ref{ext}"
fname_result = archive_dir / f"{fname}-result{ext}"
# if no valid reference is found, store the results as new ref and fail
# the test
if not self.reference:
logger.info(
"No reference data found. Storing test results to "
f"{fname_reference}. This can be the new reference.",
also_console=True,
)
self._archive(self.value, fname_reference)
self._plot(metric_value=None, noref=True)
return False
# else (we have a reference value), evaluate the test metric
try:
passed, metric_value = self._evaluate(diagnostic)
msg = "\n".join(
[
"Test passed" if passed else "Test did not pass",
f"Metric value: {self.METRIC_NAME} = {metric_value}",
f"Metric threshold: {self.threshold}",
f"Variable: {self.variable}",
]
)
logger.info(msg, also_console=True)
except Exception as e:
logger.info(
"An exception occurred during test evaluation!", also_console=True
)
self._plot(noref=False, metric_value=None)
raise e
# we got a metric: report the results in the archive directory
logger.info(
f"Saving current output dataset to {fname_result}", also_console=True
)
self._archive(self.value, fname_result)
logger.info(
f"Saving reference dataset locally to {fname_reference}", also_console=True
)
self._archive(self.reference, fname_reference)
self._plot(noref=False, metric_value=metric_value)
return passed
@abstractmethod
def _evaluate(self, diagnostic_chart: bool = False) -> tuple[bool, float]:
"""
Evaluate the test results and perform a comparison to the reference
based on the criterion defined in the specialized class.
Parameters
----------
diagnostic_chart : bool, optional
If ``True``, append a diagnostic chart to the test report.
Returns
-------
passed : bool
``True`` iff the test passed.
metric_value : float
The value of the test metric.
"""
pass
def _archive(self, dataset: xr.Dataset, fname_output: PathLike) -> None:
"""
Create an archive file for test result and reference storage
"""
os.makedirs(os.path.dirname(fname_output), exist_ok=True)
dataset.to_netcdf(fname_output)
def _plot(self, metric_value: float | None, noref: bool) -> None:
"""
Create a plot to visualize the results of the test.
If the ``reference only`` parameter is set, create only a simple plot
visualizing the new reference data. Otherwise, create the more complex
comparison plots for the regression test.
Parameters
----------
metric_value : float or None
The numerical value of the test metric.
noref : bool
If ``True``, create only a simple visualization of the computed
data.
"""
if not self.plot:
return
fname = self.name
ext = ".png"
archive_dir = self.archive_dir
fname_plot = archive_dir / f"{fname}{ext}"
fname_plot.parent.mkdir(parents=True, exist_ok=True)
if noref:
figure, _ = self._plot_noref()
else:
figure, _ = self._plot_ref(metric_value)
html_svg = figure_to_html(figure)
logger.info(html_svg, html=True, also_console=False)
logger.info(f"Saving PNG report chart to {fname_plot}", also_console=True)
plt.savefig(fname_plot)
plt.close()
def _plot_noref(self):
"""
Draw a simple plot when no reference data is available.
"""
if not self.plot:
return
vza = np.squeeze(self.value.vza.values)
val = np.squeeze(self.value[self.variable].values)
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
ax.plot(vza, val)
ax.set_xlabel("VZA [deg]")
ax.set_ylabel(self.variable)
ax.set_title("Simulation result, can be used as new reference")
return fig, ax
def _plot_ref(self, metric_value: float | None = None):
"""
Draw a comparison plot with reference and test data displayed together.
"""
if not self.plot:
return
vza = np.squeeze(self.value.vza.values)
val = np.squeeze(self.value[self.variable].values)
ref = np.squeeze(self.reference[self.variable].values)
return regression_test_plots(
ref,
val,
vza,
(self.METRIC_NAME, metric_value),
xlabel="VZA [deg]",
ylabel=self.variable,
)
def _plot_diagnostic(self, **diagnostic_info) -> None:
"""
Create an additional plot to display more technical information about
the test metrics and decision process. The diagnostic plot can help the
user debug a failing test, or to assess the test power and significance.
This plot is output directly to the robotframework report.
Parameters:
-----------
**diagnostic_info: dict
Variadic keyword arguments for the subclasses implementation
"""
raise NotImplementedError(
f"{type(self)} does not implement a diagnostic plot method"
)
[docs]
@define
class RMSETest(RegressionTest):
"""
This class implements a simple test based on the root mean squared
error (RMSE) of a result array with respect to the reference data.
The test will pass if the computed root mean squared deviation between
the result and reference is smaller or equal to the given threshold.
"""
METRIC_NAME = "rmse"
def _evaluate(self, diagnostic_chart=False) -> tuple[bool, float]:
value_np = self.value[self.variable].values
ref_np = self.reference[self.variable].values
if np.shape(value_np) != np.shape(ref_np):
raise ValueError(
f"Result and reference do not have the same shape! "
f"Got: {np.shape(value_np)}, {np.shape(ref_np)}"
)
result_flat = np.array(value_np).flatten()
ref_flat = np.array(ref_np).flatten()
rmse = np.linalg.norm(result_flat - ref_flat) / np.sqrt(len(ref_flat))
return rmse <= self.threshold, rmse
[docs]
@define
class Chi2Test(RegressionTest):
"""
This class implements a statistical test for the regression testing
campaign, based on Pearson's Chi-squared test.
https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test
It determines the probability for the reference and the test result
following the same distribution.
This test will pass if the computed p-value is strictly larger than the
given threshold.
"""
# The algorithm is adapted from Mitsuba's testing framework.
METRIC_NAME = "X² p-value"
def _evaluate(self, diagnostic_chart=False) -> tuple[bool, float]:
ref_np = self.reference[self.variable].values
result_np = self.value[self.variable].values
histo_bins = np.linspace(ref_np.min(), ref_np.max(), 20)
histo_ref = np.histogram(ref_np, histo_bins)[0]
histo_res = np.histogram(result_np, histo_bins)[0]
# sorting both histograms following the ascending frequencies in
# the reference. Algorithm from:
# https://stackoverflow.com/questions/9764298/how-to-sort-two-lists-which-reference-each-other-in-the-exact-same-way
histo_ref_sorted, histo_res_sorted = zip(
*sorted(zip(histo_ref, histo_res), key=lambda x: x[0])
)
from mitsuba.math_py import rlgamma
chi2val, dof, pooled_in, pooled_out = mi.math.chi2(
histo_res_sorted, histo_ref_sorted, 5
)
p_value = 1.0 - rlgamma(dof / 2.0, chi2val / 2.0)
return p_value > self.threshold, p_value
[docs]
@define
class AbstractStudentTTest(RegressionTest):
"""
Abstract Student's T-Test
=========================
Implement diagnostic chart common to subclassing T-test implementations.
"""
def _plot_diagnostic(self, dof=None, t_prim=None) -> None:
"""
Diagnostic chart for an Independent Student's T-test
Parameters:
-----------
dof: int
Degrees of Freedom
t_prim: float
t' statistic issued from the test
"""
if not self.plot:
return
fig, ax = plt.subplots()
ax.grid()
ax2 = ax.twinx()
start, end = spstats.t.ppf(0.0001, dof), spstats.t.ppf(0.9999, dof)
if (t_prim > start) and (t_prim < end):
fx = np.linspace(-np.abs(t_prim), np.abs(t_prim), 100)
fy = spstats.t(dof).pdf(fx)
ax.fill_between(np.zeros((100,)), fy)
ax.axvline(spstats.t.ppf(-self.threshold / 2.0, dof), color="red")
ax.axvline(spstats.t.ppf(self.threshold / 2.0, dof), color="red")
else:
ax.axvline(t_prim, label="T value")
ax.axvline(0.0, color="red", linestyle="--")
ax.set_title("T-statistic")
x = np.linspace(start, end, 100)
y = spstats.t(dof).pdf(x)
ax2.plot(x, y, label="target T distribution form", color="black")
ax2.legend(loc="upper right")
ax2.set_ylim([0.0, max(y) * 1.1])
chart = figure_to_html(fig)
plt.close(fig)
logger.info(chart, html=True)
[docs]
@define
class IndependentStudentTTest(AbstractStudentTTest):
"""
Independent Student's T-test
============================
This implementation of a Student's T-test is following the assumption of
independance of the two groups that are tested. The bias of the mean values
of the two groups is assumed to be the result of chance under the null
hypothesis. It is a two-tailed test.
It is less sensitive to outliers than the paired Student's T-test.
"""
METRIC_NAME = "T-test p-value"
def _evaluate(self, diagnostic_chart=False) -> tuple[bool, float]:
variable_var = self.variable + "_var"
if variable_var not in self.reference:
raise ValueError(
"The reference data for this T-test does not contain expected "
"appropriate variance values, could not find data variable "
f"'{variable_var}'"
)
if variable_var not in self.value:
raise ValueError(
"The tested data for this T-test does not contain expected "
"appropriate variance values, could not find data variable "
f"'{variable_var}'"
)
ref_np = self.reference[self.variable].values.ravel()
result_np = self.value[self.variable].values.ravel()
var_ref_np = self.reference[variable_var].values.ravel()
var_res_np = self.value[variable_var].values.ravel()
# Calculate mean values over observations and associated variances
R_res = np.mean(result_np)
R_ref = np.mean(ref_np)
var_R_res = np.sum(var_res_np) / var_res_np.size**2
var_R_ref = np.sum(var_ref_np) / var_ref_np.size**2
bias_mean = R_res - R_ref
# Calculate T-statistic and associated degree of freedom of its
# T-distribution using a pooled standard deviation
s_p = np.sqrt(
((var_res_np.size - 1.0) * var_R_res + (var_ref_np.size - 1.0) * var_R_ref)
/ (var_res_np.size + var_ref_np.size - 2)
)
t_prim = bias_mean / (
s_p * np.sqrt(1.0 / var_res_np.size + 1.0 / var_ref_np.size)
)
dof = (var_res_np.size + var_ref_np.size) - 2
assert dof > 0
# Calculate p-value of the two-tailed t-test using the T distribution
# survival function for the null hypothesis.
p_value = spstats.t.sf(np.abs(t_prim), dof) * 2
passed = p_value > self.threshold
if diagnostic_chart:
self._plot_diagnostic(dof=dof, t_prim=t_prim)
logger.info(f"bias = {bias_mean}", also_console=True)
logger.info(f"s_p = {s_p}", also_console=True)
logger.info(f"t' = {t_prim}", also_console=True)
logger.info(f"dof = {dof}", also_console=True)
logger.info(f"p-value = {p_value}", also_console=True)
logger.info(f"alpha = {self.threshold}", also_console=True)
return passed, p_value
[docs]
@define
class PairedStudentTTest(AbstractStudentTTest):
"""
Paired Student's T-test
=======================
This implementation of a Student's T-test is following the assumption of
paired samples within two groups that are tested. The mean of the bias
between the paired values is assumed to be the result of chance under the
null hypothesis. It is a two-tailed test.
The paired test allow to introduce a covariance factor between the pairs.
By default, this covariance is equal to zero, thus assuming independence of
the two variables.
Contrary to the independent Student's T-test, this paired version of the
test requires an equal degree of freedom of the two groups.
"""
METRIC_NAME = "paired T-test p-value"
cov: np.ndarray | float = documented(
attrs.field(kw_only=True, default=0.0),
doc="Covariance between observation, defaults to zero",
type="ndarray or float",
init_type="array-like or float",
)
def _evaluate(self, diagnostic_chart=False) -> tuple[bool, float]:
variable_var = self.variable + "_var"
if variable_var not in self.reference:
raise ValueError(
"The reference data for this T-test does not contain expected "
"appropriate variance values, could not find data variable "
f"'{variable_var}'"
)
if variable_var not in self.value:
raise ValueError(
"The tested data for this T-test does not contain expected "
"appropriate variance values, could not find data variable "
f"'{variable_var}'"
)
ref_np = self.reference[self.variable].values.ravel()
result_np = self.value[self.variable].values.ravel()
var_ref_np = self.reference[variable_var].values.ravel()
var_res_np = self.value[variable_var].values.ravel()
assert ref_np.shape == result_np.shape
assert ref_np.shape == var_ref_np.shape
assert ref_np.shape == var_res_np.shape
# Calculate paired mean value and associated variance
D_mean = np.mean(result_np - ref_np)
var_D = (var_res_np + var_ref_np) - 2 * self.cov
var_D_mean = np.sum(var_D) / var_D.size**2
# Calculate T-statistic and associated degree of freedom of its
# T-distribution
t_prim = D_mean / (var_D_mean / np.sqrt(var_D.size))
dof = var_D.size - 1
assert dof > 0
# Calculate p-value of the two-tailed t-test using the T distribution
# survival function for the null hypothesis.
p_value = spstats.t.sf(np.abs(t_prim), dof) * 2
passed = p_value > self.threshold
if diagnostic_chart:
self._plot_diagnostic(dof=dof, t_prim=t_prim)
logger.info(f"bias = {D_mean}", also_console=True)
logger.info(f"var mean = {var_D_mean}", also_console=True)
logger.info(f"t' = {t_prim}", also_console=True)
logger.info(f"dof = {dof}", also_console=True)
logger.info(f"p-value = {p_value}", also_console=True)
logger.info(f"alpha = {self.threshold}", also_console=True)
return passed, p_value
[docs]
@define
class ZTest(RegressionTest):
"""
Z-Test with Šidák correction factor
===================================
Implement a Z-test, testing the significance of paired differences between
a set of observations and a set of references. It considers the observations
variance.
Paired tests are aggregated into one p-value using a Šidák correction. The
test passes if the null hypothesis is accepted for at least 99.75% of the
paired Z-tests
This paired Z-test requires an equal degree of freedom of the two groups.
"""
METRIC_NAME = "Z-test p-value"
def _plot_diagnostic(self, z=None) -> None:
"""
Diagnostic chart for a Z-test
Parameters:
-----------
z: array-like
Z-statistic for each pair of measurements
"""
if not self.plot:
return
fig, ax = plt.subplots()
ax.grid()
ax2 = ax.twinx()
ax.hist(z, bins=50, label="Z values")
ax.axvline(0.0, color="red", linestyle="--")
ax.set_title("Z-statistic")
ax.legend(loc="upper left")
x = np.linspace(-4.0, 4.0, 100)
y = spstats.norm.pdf(x, 0.0, 1.0)
ax2.plot(x, y, label="target Z distribution form", color="black")
ax2.legend(loc="upper right")
ax2.set_ylim([0.0, max(y) * 1.1])
chart = figure_to_html(fig)
plt.close(fig)
logger.info(chart, html=True)
def _evaluate(self, diagnostic_chart=False) -> tuple[bool, float]:
variable_var = self.variable + "_var"
if variable_var not in self.value:
raise ValueError(
"The reference data for this Z-test does not contain expected "
"appropriate variance values, could not find data variable "
f"'{variable_var}'"
)
ref_np = self.reference[self.variable].values.ravel()
result_np = self.value[self.variable].values.ravel()
var_res_np = self.value[variable_var].values.ravel()
assert ref_np.shape == result_np.shape
assert ref_np.shape == var_res_np.shape
# Calculate Z-statistic
z = (result_np - ref_np) / np.sqrt(var_res_np)
# Calculate p-value of the two-tailed z-test null hypothesis
p_values = spstats.norm.sf(np.abs(z)) * 2
alpha_0 = 1.0 - (1.0 - self.threshold) ** (1.0 / result_np.size)
accept_null = p_values > alpha_0
passed = np.count_nonzero(accept_null) >= int(0.9975 * result_np.size)
if diagnostic_chart:
self._plot_diagnostic(z=z)
logger.info(f"min p-value = {min(p_values)}", also_console=True)
logger.info(f"max p-value = {max(p_values)}", also_console=True)
logger.info(
f"n passed = {np.count_nonzero(accept_null)}/{int(0.9975 * result_np.size)}",
also_console=True,
)
logger.info(f"alpha_1 = {self.threshold}", also_console=True)
logger.info(f"alpha_0 = {alpha_0}", also_console=True)
return passed, min(p_values)
def _plot_ref(self, metric_value: float | None = None):
"""
Draw a comparison plot with reference and test data displayed together.
"""
vza = np.squeeze(self.value.vza.values)
result = np.squeeze(self.value[self.variable].values)
result_var = np.squeeze(self.value[f"{self.variable}_var"].values)
ref = np.squeeze(self.reference[self.variable].values)
return regression_test_plots(
ref,
result,
vza,
(self.METRIC_NAME, metric_value),
result_var=result_var,
xlabel="VZA [deg]",
ylabel=self.variable,
)
[docs]
@define
class SidakTTest(RegressionTest):
"""
T-Test with Šidák correction factor
===================================
Implement a T-test, testing the significance of paired differences between
a set of observations and a set of references. It considers both the
observations and reference variance.
Paired tests are aggregated into one p-value using a Šidák correction. The
test passes if the null hypothesis is accepted for at least 99.75% of the
paired T-tests
"""
METRIC_NAME = "Sidak T-test p-value"
def _plot_diagnostic(self, t_prim=None) -> None:
"""
Diagnostic chart for the T-test
Parameters:
-----------
t_prim: array-like
T-statistic for each pair of measurements
"""
fig, ax = plt.subplots()
ax.grid()
ax2 = ax.twinx()
start, end = spstats.norm.ppf(0.0001), spstats.norm.ppf(0.9999)
ax.hist(t_prim, bins=50, label="T values")
ax.axvline(0.0, color="red", linestyle="--")
ax.set_title("T-statistic")
ax.legend(loc="upper left")
x = np.linspace(start, end, 100)
y = spstats.norm.pdf(x)
ax2.plot(x, y, label="target T distribution form", color="black")
ax2.legend(loc="upper right")
ax2.set_ylim([0.0, max(y) * 1.1])
chart = figure_to_html(fig)
plt.close(fig)
logger.info(chart, html=True)
def _evaluate(self, diagnostic_chart=False) -> tuple[bool, float]:
variable_var = self.variable + "_var"
if variable_var not in self.reference:
raise ValueError(
"The reference data for this T-test does not contain expected "
"appropriate variance values, could not find data variable "
f"'{variable_var}'"
)
ref_np = self.reference[self.variable].values.ravel()
result_np = self.value[self.variable].values.ravel()
assert ref_np.shape == result_np.shape
var_ref_np = self.reference[variable_var].values.ravel()
var_res_np = self.value[variable_var].values.ravel()
assert var_ref_np.shape == var_res_np.shape
# Calculate T-statistic
t_prim = (result_np - ref_np) / np.sqrt(var_res_np + var_ref_np)
# Calculate p-value of the two-tailed t-test using the T distribution
# survival function for the null hypothesis that there is no difference
# between the two mean distributions. It is assumed that the sample size
# is large enough for the T distribution to converge to a normal one.
p_values = spstats.norm.sf(np.abs(t_prim)) * 2
# Calculate the Šidák correction
alpha_0 = 1.0 - (1.0 - self.threshold) ** (1.0 / result_np.size)
accept_null = p_values > alpha_0
passed = np.count_nonzero(accept_null) >= int(0.9975 * result_np.size)
if diagnostic_chart:
self._plot_diagnostic(t_prim=t_prim)
logger.info(f"min p-value = {min(p_values)}", also_console=True)
logger.info(f"max p-value = {max(p_values)}", also_console=True)
logger.info(
f"n passed = {np.count_nonzero(accept_null)}/{int(0.9975 * result_np.size)}",
also_console=True,
)
logger.info(f"alpha_1 = {self.threshold}", also_console=True)
logger.info(f"alpha_0 = {alpha_0}", also_console=True)
return passed, min(p_values)