diff --git a/.gitignore b/.gitignore index 4d866453..4a198cc3 100644 --- a/.gitignore +++ b/.gitignore @@ -103,7 +103,7 @@ celerybeat.pid # Environments .env -.venv +.venv* env/ venv/ ENV/ diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index b276816b..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "python.formatting.provider": "black", - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true -} \ No newline at end of file diff --git a/.vscode/settings_template.json b/.vscode/settings_template.json new file mode 100644 index 00000000..eafd7db3 --- /dev/null +++ b/.vscode/settings_template.json @@ -0,0 +1,14 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.defaultInterpreterPath": "Enter interpreter path here for debugging", + "ruff.interpreter": [ + "Enter interpreter path here for linting" + ] +} \ No newline at end of file diff --git a/chemotools/_runtime/__init__.py b/chemotools/_runtime/__init__.py new file mode 100644 index 00000000..10dd6a18 --- /dev/null +++ b/chemotools/_runtime/__init__.py @@ -0,0 +1,21 @@ +""" +This submodule checks for the presence of the required software packages at runtime. + +The following optional packages are checked for: +- `pentapy` for solving pentadiagonal systems of equations for the Whittaker-Henderson + smoothing algorithm. + +""" + +### Imports ### + +# if possible, pentapy is imported since it provides a more efficient implementation +# of solving pentadiagonal systems of equations, but the package is not in the +# dependencies, so ``chemotools`` needs to be made aware of whether it is available +PENTAPY_AVAILABLE: bool = False +try: + import pentapy as pp # noqa: F401 + + PENTAPY_AVAILABLE: bool = True +except ImportError: + pass diff --git a/chemotools/augmentation/spectrum_scale.py b/chemotools/augmentation/spectrum_scale.py index ce105659..7365b8e1 100644 --- a/chemotools/augmentation/spectrum_scale.py +++ b/chemotools/augmentation/spectrum_scale.py @@ -1,5 +1,7 @@ +from typing import Optional + import numpy as np -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted from chemotools.utils.check_inputs import check_input @@ -17,7 +19,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): random_state : int, default=None The random state to use for the random number generator. - + Attributes ---------- n_features_in_ : int @@ -25,7 +27,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): _is_fitted : bool Whether the transformer has been fitted to data. - + Methods ------- fit(X, y=None) @@ -35,15 +37,18 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): Transform the input data by scaling the spectrum. """ - - def __init__(self, scale: int = 0.0, random_state: int = None): + def __init__( + self, + scale: float = 0.0, + random_state: Optional[int] = None, + ): self.scale = scale self.random_state = random_state def fit(self, X: np.ndarray, y=None) -> "SpectrumScale": """ Fit the transformer to the input data. - + Parameters ---------- X : np.ndarray of shape (n_samples, n_features) @@ -97,7 +102,9 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: # Check that the number of features is the same as the fitted data if X_.shape[1] != self.n_features_in_: - raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}") + raise ValueError( + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + ) # Calculate the scaled spectrum for i, x in enumerate(X_): @@ -106,6 +113,5 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: return X_.reshape(-1, 1) if X_.ndim == 1 else X_ def _scale_spectrum(self, x) -> np.ndarray: - scaling_factor = self._rng.uniform(low=1-self.scale, high=1+self.scale) + scaling_factor = self._rng.uniform(low=1 - self.scale, high=1 + self.scale) return np.multiply(x, scaling_factor) - \ No newline at end of file diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 7e82cfac..214a35b8 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -1,34 +1,74 @@ +""" +This module contains the ``AirPLS`` transformer, which performs baseline correction on +data according to the Whittaker-Henderson formulation of Penalized Least Squares which +was modified by the introduction of weights that are updated iteratively to improve the +baseline identification. + +References +---------- +It's based on the algorithms described in [1]_ and [2]_ where an implementational +adaption of [2]_ was required to make it numerically stable ([3]_). + +.. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 (2010) +.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023) +.. [3] https://math.stackexchange.com/q/4819039/1261538 + +""" + +# Authors: +# Pau Cabaneros +# Niklas Zell + + +### Imports ### + + import logging +from typing import Union + import numpy as np -from scipy.sparse import csc_matrix, eye, diags -from scipy.sparse.linalg import spsolve -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted +from chemotools.utils._whittaker_base import WhittakerLikeSolver from chemotools.utils.check_inputs import check_input logger = logging.getLogger(__name__) +### Main Class ### -class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): + +# TODO: is polynomial_order actually differences and if so, is the description correct? +class AirPls( + OneToOneFeatureMixin, + BaseEstimator, + TransformerMixin, + WhittakerLikeSolver, +): """ - This class implements the AirPLS (Adaptive Iteratively Reweighted Penalized Least Squares) algorithm for baseline - correction of spectra data. AirPLS is a common approach for removing the baseline from spectra, which can be useful - in various applications such as spectroscopy and chromatography. + This class implements the Adaptive Iteratively Reweighted Penalized Least Squares + a.k.a AirPLS algorithm for baseline correction of spectra data. AirPLS is a common + approach for removing the baseline from spectra, which can be useful in various + applications such as spectroscopy and chromatography. Parameters ---------- - lam : float, optional default=1e2 - The lambda parameter controls the smoothness of the baseline. Increasing the value of lambda results in - a smoother baseline. + lam : float or int, optional default=1e2 + The lambda parameter that controls the smoothness of the baseline. Higher values + will result in a smoother baseline. polynomial_order : int, optional default=1 - The polynomial order determines the degree of the polynomial used to fit the baseline. A value of 1 corresponds + The degree of the polynomial used to fit the baseline. A value of 1 corresponds to a linear fit, while higher values correspond to higher-order polynomials. + Higher values will result in a smoother baseline. + Currently, values ``>= 3`` are highly discouraged due to numerical instability + that might obscure the smoothing effect. nr_iterations : int, optional default=15 - The number of iterations used to calculate the baseline. Increasing the number of iterations can improve the - accuracy of the baseline correction, but also increases the computation time. + The number of iterations used to calculate the baseline. Increasing the number + of iterations can improve the accuracy of the baseline correction at the cost of + computation time. Methods ------- @@ -40,25 +80,34 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): _calculate_whittaker_smooth(x, w) Calculate the Whittaker smooth of a given input vector x, with weights w. - + _calculate_air_pls(x) Calculate the AirPLS baseline of a given input vector x. References ---------- - - Z.-M. Zhang, S. Chen, and Y.-Z. Liang, Baseline correction using adaptive iteratively reweighted penalized least - squares. Analyst 135 (5), 1138-1146 (2010). + It's based on the algorithms described in [1]_ and [2]_ where an implementational + adaption of [2]_ was required to make it numerically stable ([3]_). + + .. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 + (2010) + .. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 + (2023) + .. [3] https://math.stackexchange.com/q/4819039/1261538 + """ + # TODO: polynomial order is actually differences def __init__( self, - lam: int = 100, + lam: Union[float, int] = 100, polynomial_order: int = 1, nr_iterations: int = 15, ): - self.lam = lam - self.polynomial_order = polynomial_order - self.nr_iterations = nr_iterations + self.lam: Union[float, int] = lam + self.polynomial_order: int = polynomial_order + self.nr_iterations: int = nr_iterations def fit(self, X: np.ndarray, y=None) -> "AirPls": """Fit the AirPls baseline correction estimator to the input data. @@ -66,7 +115,8 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": Parameters ---------- X : array-like of shape (n_samples, n_features) - The input data. + The input data. It is internally promoted to ``np.float64`` to avoid loss of + precision. y : array-like of shape (n_samples,), optional (default=None) The target values. @@ -75,9 +125,25 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": ------- self : AirPls Returns the instance itself. + """ # Check that X is a 2D array and has only finite values - X = self._validate_data(X) + X = BaseEstimator._validate_data( # type: ignore + self, + X, + reset=True, + ensure_2d=True, + force_all_finite=True, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) + + # the internal solver is set up + self._setup_for_fit( + num_data=X.shape[1], + differences=self.polynomial_order, + lam=self.lam, + child_class_name=self.__class__.__name__, + ) return self @@ -96,60 +162,63 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: ------- X_ : array-like of shape (n_samples, n_features) The transformed data with the baseline removed. + """ # Check that the estimator is fitted check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values - X = check_input(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) X_ = X.copy() # Check that the number of features is the same as the fitted data - if X_.shape[1] != self.n_features_in_: + # NOTE: ``n_features_in_`` is set in ``BaseEstimator._validate_data`` when + # ``reset`` is True + if X_.shape[1] != self.n_features_in_: # type: ignore raise ValueError( - f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" # type: ignore # noqa: E501 ) # Calculate the air pls smooth for i, x in enumerate(X_): X_[i] = x - self._calculate_air_pls(x) - return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - - def _calculate_whittaker_smooth(self, x, w): - X = np.array(x) - m = X.size - E = eye(m, format="csc") - for i in range(self.polynomial_order): - E = E[1:] - E[:-1] - W = diags(w, 0, shape=(m, m)) - A = csc_matrix(W + (self.lam * E.T @ E)) - B = csc_matrix(W @ X.T).toarray().ravel() - background = spsolve(A, B) - return np.array(background) + return X_ def _calculate_air_pls(self, x): - m = x.shape[0] - w = np.ones(m) - - for i in range(1, self.nr_iterations): - z = self._calculate_whittaker_smooth(x, w) + # FIXME: this initial weighting strategy might not yield the best results + w = np.ones_like(x) + # FIXME: this initialisation will will fail for many signals and produce a + # zero-baseline + z = np.zeros_like(x) + dssn_thresh = max(1e-3 * np.abs(x).sum(), 1e-308) # to avoid 0 equalities + + # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` + for i in range(0, self.nr_iterations - 1): + # the baseline is fitted using the Whittaker smoother framework + z, _ = self._solve_single_b_fixed_lam(rhs_b=x, weights=w) d = x - z dssn = np.abs(d[d < 0].sum()) - if dssn < 0.001 * np.abs(x).sum(): + # the algorithm is stopped if the threshold is reached + if dssn <= dssn_thresh: break - if i == self.nr_iterations - 1: - break - - w[d >= 0] = 0 - w[d < 0] = np.exp(i * np.abs(d[d < 0]) / dssn) - - negative_d = d[d < 0] - if negative_d.size > 0: - w[0] = np.exp(i * negative_d.max() / dssn) + # the weights are updated + below_base_indics = d < 0 + w[~below_base_indics] = 0.0 + exp_mult = i + 1 + w[below_base_indics] = np.exp(exp_mult * np.abs(d[d < 0]) / dssn) + + d_negative = d[below_base_indics] + if d_negative.size > 0: + # FIXME: this might easily yield a weight of 1 if the maximum of the + # negative_d is very close to zero + w[0] = np.exp(exp_mult * d_negative.max() / dssn) w[-1] = w[0] diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 064621dc..958747b5 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -1,35 +1,79 @@ +""" +This module contains the ``ArPLS`` transformer, which performs baseline correction on +data according to the Whittaker-Henderson formulation of Penalized Least Squares which +was modified by the introduction of weights that are updated iteratively to improve the +baseline identification. It simultaneously estimates the baseline as well as the +baseline noise. + +References +---------- +It's based on the algorithms described in [1]_ and [2]_ where an implementational +adaption of [2]_ was required to make it numerically stable ([3]_). + +.. [1] S.-J. Baek, A. Park, Y.-J. Ahn, J. Choo, "Baseline correction using + asymmetrically reweighted penalized least squares smoothing", Analyst, 140, 250–257 + (2015) +.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023) +.. [3] https://math.stackexchange.com/q/4819039/1261538 + +""" + +# Authors: +# Pau Cabaneros +# Niklas Zell + + +### Imports ### + import logging -import numpy as np -import scipy.sparse as sp -from scipy.sparse import spdiags, csc_matrix -from scipy.sparse.linalg import splu +from numbers import Integral +from typing import Union -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin -from sklearn.utils.validation import check_is_fitted +import numpy as np +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin +from sklearn.utils.validation import check_is_fitted, check_scalar +from chemotools.utils._whittaker_base import WhittakerLikeSolver from chemotools.utils.check_inputs import check_input logger = logging.getLogger(__name__) +### Main Class ### + -class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): +class ArPls( + OneToOneFeatureMixin, + BaseEstimator, + TransformerMixin, + WhittakerLikeSolver, +): """ - This class implements the Assymmetrically Reweighted Penalized Least Squares (ArPls) is a baseline - correction method for spectroscopy data. It uses an iterative process - to estimate and remove the baseline from the spectra. + This class implements the Asymmetrically Reweighted Penalized Least Squares a.k.a + ArPLS which is a baseline correction method for spectroscopy data. It uses an + iterative process that simultaneously estimates the baseline as well as the baseline + noise. Parameters ---------- - lam : float, optional (default=1e4) - The penalty parameter for the difference matrix in the objective function. - - ratio : float, optional (default=0.01) - The convergence threshold for the weight updating scheme. + lam : float or int, default=1e4 + The lambda parameter that controls the smoothness of the baseline. Higher values + will result in a smoother baseline. + + differences : int, default=2 + The order of the differences used for the penalty terms that enforces smoothness + of the baseline. + Higher values will result in a smoother baseline. + Currently, values ``>= 3`` are highly discouraged due to numerical instability + that might obscure the smoothing effect. + + ratio : float, default=0.01 + The convergence threshold for the weight updating scheme. Lower values will + result in a more accurate baseline at the cost of computation time and even + convergence. nr_iterations : int, optional (default=100) The maximum number of iterations for the weight updating scheme. - Methods ------- fit(X, y=None) @@ -38,28 +82,31 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): transform(X, y=None) Transform the data by removing the baseline. - _calculate_diff(N) - Calculate the difference matrix for a given size. - _calculate_ar_pls(x) Calculate the baseline for a given spectrum. References ---------- - - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo - Baseline correction using asymmetrically reweighted penalized - least squares smoothing + .. [1] S.-J. Baek, A. Park, Y.-J. Ahn, J. Choo, "Baseline correction using + asymmetrically reweighted penalized least squares smoothing", Analyst, 140, + 250–257 (2015) + .. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 + (2023) + .. [3] https://math.stackexchange.com/q/4819039/1261538 + """ def __init__( self, - lam: float = 1e4, + lam: Union[float, int] = 1e4, + differences: int = 2, ratio: float = 0.01, nr_iterations: int = 100, ): - self.lam = lam - self.ratio = ratio - self.nr_iterations = nr_iterations + self.lam: Union[float, int] = lam + self.differences: int = differences + self.ratio: float = ratio + self.nr_iterations: int = nr_iterations def fit(self, X: np.ndarray, y=None) -> "ArPls": """Fit the estimator to the data. @@ -67,7 +114,8 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": Parameters ---------- X : array-like of shape (n_samples, n_features) - The input data. + The input data. It is internally promoted to ``np.float64`` to avoid loss of + precision. y : array-like of shape (n_samples,), optional (default=None) The target values. @@ -76,10 +124,37 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": ------- self : ArPls Returns the instance itself. + """ + # the constructor parameters are checked + check_scalar( + x=self.ratio, + name="ratio", + target_type=float, + min_val=1e-15, + ) + check_scalar( + x=self.nr_iterations, name="nr_iterations", target_type=Integral, min_val=1 + ) + # Check that X is a 2D array and has only finite values - X = self._validate_data(X) + X = BaseEstimator._validate_data( # type: ignore + self, + X, + reset=True, + ensure_2d=True, + force_all_finite=True, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) + + # the internal solver is setup + self._setup_for_fit( + num_data=X.shape[1], + differences=self.differences, + lam=self.lam, + child_class_name=self.__class__.__name__, + ) return self @@ -89,7 +164,8 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: Parameters ---------- X : array-like of shape (n_samples, n_features) - The input data. + The input data. It is internally promoted to ``np.float64`` to avoid loss of + precision. y : array-like of shape (n_samples,), optional (default=None) The target values. @@ -98,52 +174,56 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: ------- X_ : array-like of shape (n_samples, n_features) The transformed data with the baseline removed. + """ # Check that the estimator is fitted check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values - X = check_input(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) X_ = X.copy() # Check that the number of features is the same as the fitted data - if X_.shape[1] != self.n_features_in_: + # NOTE: ``n_features_in_`` is set in ``BaseEstimator._validate_data`` when + # ``reset`` is True + if X_.shape[1] != self.n_features_in_: # type: ignore raise ValueError( - f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" # type: ignore # noqa: E501 ) # Calculate the ar pls baseline for i, x in enumerate(X_): X_[i] = x - self._calculate_ar_pls(x) - return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - - def _calculate_diff(self, N): - I = sp.eye(N, format="csc") - D2 = sp.diags([1, -2, 1], [0, 1, 2], shape=(N - 2, N), format="csc") - return D2.dot(I).T + return X_ def _calculate_ar_pls(self, x): - N = len(x) - D = self._calculate_diff(N) - H = self.lam * D.dot(D.T) - w = np.ones(N) - iteration = 0 - while iteration < self.nr_iterations: - W = spdiags(w, 0, N, N) - C = csc_matrix(W + H) - z = splu(C).solve(w * x) + # FIXME: this initial weighting strategy might not yield the best results + w = np.ones_like(x) + # FIXME: this initialisation will will fail for many signals and produce a + # zero-baseline + z = np.zeros_like(x) + # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` + for _ in range(self.nr_iterations): + # the baseline is fitted using the Whittaker smoother framework + z, _ = self._solve_single_b_fixed_lam(rhs_b=x, weights=w) d = x - z - dn = d[d < 0] - if len(dn) == 0: + + # if there is no data point below the baseline, the baseline is considered + # to be fitted + d_negative = d[np.where(d < 0)[0]] + if len(d_negative) == 0: break - m = np.mean(dn) - s = np.std(dn) - exponent = np.clip(2 * (d - (2 * s - m)) / s, -709, 709) + m = d_negative.mean() + s = d_negative.std() + exponent = np.clip(2.0 * (d - (2.0 * s - m)) / s, -709, 709) # type: ignore wt = 1.0 / (1.0 + np.exp(exponent)) - if np.linalg.norm(w - wt) / np.linalg.norm(w) < self.ratio: + if np.linalg.norm(w - wt) / np.linalg.norm(w) < self.ratio: # type: ignore break w = wt - iteration += 1 + return z diff --git a/chemotools/smooth/__init__.py b/chemotools/smooth/__init__.py index b6befa63..7fb4133f 100644 --- a/chemotools/smooth/__init__.py +++ b/chemotools/smooth/__init__.py @@ -1,4 +1,30 @@ -from ._mean_filter import MeanFilter -from ._median_filter import MedianFilter -from ._savitzky_golay_filter import SavitzkyGolayFilter -from ._whittaker_smooth import WhittakerSmooth \ No newline at end of file +""" +The ``chemotools`` module for smoothing data. +It defines the estimator classes for smoothing data with an Sklearn-like API: + +- :class:`MeanFilter` +- :class:`MedianFilter` +- :class:`SavitzkyGolayFilter` +- :class:`WhittakerSmooth` + +as well as auxiliary models and functions to allow for convenient usage of the them: + +- :class:`WhittakerSmoothMethods` and :class:`WhittakerSmoothLambda` for the + :class:`WhittakerSmooth` class. +- :func:`estimate_noise_stddev` to estimate the local/global noise level of a spectrum + which can then be used for weighting the data. + +""" + +### Imports ### + +from chemotools.utils._finite_differences import estimate_noise_stddev # noqa: F401 +from chemotools.utils._models import ( # noqa: F401 + WhittakerSmoothLambda, + WhittakerSmoothMethods, +) + +from ._mean_filter import MeanFilter # noqa: F401 +from ._median_filter import MedianFilter # noqa: F401 +from ._savitzky_golay_filter import SavitzkyGolayFilter # noqa: F401 +from ._whittaker_smooth import WhittakerSmooth # noqa: F401 diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 00ba7e3d..6ef5da58 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -1,53 +1,209 @@ -import numpy as np -from scipy.sparse import csc_matrix, eye, diags -from scipy.sparse.linalg import spsolve -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +""" +This module contains the ``WhittakerSmooth`` transformer, which performs smoothing on +data according to the Whittaker-Henderson formulation of Penalized Least Squares. + +References +---------- +It's based on the algorithms described in [1]_ and [2]_ where an implementational +adaption of [2]_ was required to make it numerically stable ([3]_). + +.. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 (2010) +.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023) +.. [3] https://math.stackexchange.com/q/4819039/1261538 + +""" + +# Authors: +# Pau Cabaneros +# Niklas Zell + +### Imports ### + +from typing import Literal, Optional, Tuple, Union + +from numpy import ndarray +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted -from chemotools.utils.check_inputs import check_input +from chemotools.utils._types import RealNumeric +from chemotools.utils._whittaker_base import ( + WhittakerLikeSolver, + WhittakerSmoothLambda, + WhittakerSmoothMethods, +) +from chemotools.utils.check_inputs import check_input, check_weights -# This code is adapted from the following source: -# Z.-M. Zhang, S. Chen, and Y.-Z. Liang, -# Baseline correction using adaptive iteratively reweighted penalized least squares. -# Analyst 135 (5), 1138-1146 (2010). +### Main Class ### -class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): +class WhittakerSmooth( + OneToOneFeatureMixin, + BaseEstimator, + TransformerMixin, + WhittakerLikeSolver, +): """ - A transformer that calculates the Whittaker smooth of the input data. + A transformer that performs smoothing on data according to the Whittaker-Henderson + formulation of Penalized Least Squares. Parameters ---------- - lam : float, optional - The lambda parameter to use for the Whittaker smooth. Default is 1e2. + lam : float or int or (float or int, float or int, {"fixed", "logml"} or WhittakerSmoothMethods) or WhittakerSmoothLambda, default=1e2 + The lambda parameter, a.k.a. the penalty weight, for the Whittaker smooth. In + general, higher values lead to smoother results, but changes take effect in a + logarithmic rather than linear manner. + It may thus not be zero or negative (``< 1e-25``). Also high values combined + with high ``differences`` will lead to numerical instability. + Please refer to the Notes section for further details. + + differences : int, default=1 + The order of differences to use for the Whittaker smooth. If the aim is to + obtain a smooth estimate of the ``m``-th order derivative, this should be set to + at least ``m + 2``. + Currently, values ``>= 3`` are highly discouraged due to numerical instability + that might obscure the smoothing effect. + + Attributes + ---------- + n_features_in_ : int + The number of features in the input data. - differences : int, optional - The number of differences to use for the Whittaker smooth. Default is 1. + _is_fitted : bool + Whether the transformer has been fitted to data. Methods ------- fit(X, y=None) Fit the transformer to the input data. - transform(X, y=0, copy=True) - Transform the input data by calculating the Whittaker smooth. - """ + transform(X, y=None, sample_weight=None) + Transform the input data by calculating the (weighted) Whittaker smooth. + + fit_transform(X, y=None, sample_weight=None) + Fit the transformer to the input data and transform it. + + References + ---------- + It's based on the algorithms described in [1]_ and [2]_ where an implementational + adaption of [2]_ was required to make it numerically stable ([3]_). + + .. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 + (2010) + .. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 + (2023) + .. [3] https://math.stackexchange.com/q/4819039/1261538 + + Notes + ----- + For a more convenient usage of the following, it is recommended to import + ``WhittakerSmoothLambda`` and ``WhittakerSmoothMethods`` from ``chemotools.smooth``. + + The specification of ``lam`` controls how the lambda parameter is applied/chosen. + It may not be zero or negative (``< 1e-25``), but aside from that, it can be + specified in three different ways: + + - a scalar: A fixed lambda is used for all signals, which is a good starting + point. However, it is important to notice that even similar signals might + require quite different lambdas. + + ```python + # fixed lambda of 100 + smoother = WhittakerSmooth(lam=1e2) + ``` + + Internally, it is represented by the method ``"fixed"`` or ``WhittakerSmoothMethods.FIXED``, + but this never has to be specified explicitly. + + - a tuple of two scalars and a string: The scalars serve as the lower and upper + bound for searching a lambda according to the method provided by the + string. + Each signal will then have its own optimized lambda. + Currently available methods for automated selection are: + + - ``logml`` or ``WhittakerSmoothMethods.LOGML``: The lambda is chosen by + maximizing the log marginal likelihood similar to the optimization used + by the ``sklearn.gaussian_process.GaussianProcessRegressor``. + It can only be used when ``sample_weight`` can be provided for the methods + :meth:`transform` and :meth:`fit_transform`. + + ```python + # will search the optimized lambda for each signal between 1e-5 and 1e10 + smoother = WhittakerSmooth(lam=(1e-5, 1e10, "logml")) + + # which is equivalent to + smoother = WhittakerSmooth(lam=(1e-5, 1e10, WhittakerSmoothMethods.LOGML)) + ``` + + - a ``WhittakerSmoothLambda`` object: This object serves as a convenient way for + specifying the ``bounds`` for the search space and the ``method`` for the lambda + selection. It covers both the fixed lambda and its automated selection. + + ```python + # 1) fixed lambda of 100 + smoother = WhittakerSmooth(lam=WhittakerSmoothLambda(bounds=1e2)) + + + # which is equivalent to + smoother = WhittakerSmooth(lam=WhittakerSmoothLambda(bounds=(1e2, 1e2))) + + # 2) will search the optimized lambda for each signal between 1e-5 and 1e10 + smoother = WhittakerSmooth( + lam=WhittakerSmoothLambda( + bounds=(1e-5, 1e10), + method="logml", + ) + ) + + # which is equivalent to + smoother = WhittakerSmooth( + lam=WhittakerSmoothLambda( + bounds=(1e-5, 1e10), + method=WhittakerSmoothMethods.LOGML, + ) + ) + ``` + + If bounds are provided by either the tuple or the ``WhittakerSmoothLambda`` object, + the class will fall back to a fixed lambda in case the bounds are apart by less than + a factor of ``1e-5``, i.e., ``abs(upper - lower) < 1e-5 * upper``. + + """ # noqa: E501 + def __init__( self, - lam: float = 1e2, + lam: Union[ + RealNumeric, + Tuple[ + RealNumeric, + RealNumeric, + Union[Literal["fixed", "logml"], WhittakerSmoothMethods], + ], + WhittakerSmoothLambda, + ] = 1e2, differences: int = 1, ): - self.lam = lam - self.differences = differences + self.lam: Union[ + RealNumeric, + Tuple[ + RealNumeric, + RealNumeric, + Union[Literal["fixed", "logml"], WhittakerSmoothMethods], + ], + WhittakerSmoothLambda, + ] = lam + self.differences: int = differences - def fit(self, X: np.ndarray, y=None) -> "WhittakerSmooth": + def fit(self, X: ndarray, y: None = None) -> "WhittakerSmooth": """ Fit the transformer to the input data. Parameters ---------- - X : np.ndarray of shape (n_samples, n_features) - The input data to fit the transformer to. + X : ndarray of shape (n_samples, n_features) + The input data to fit the transformer to. It is internally promoted to + ``np.float64`` to avoid loss of precision. y : None Ignored. @@ -56,34 +212,91 @@ def fit(self, X: np.ndarray, y=None) -> "WhittakerSmooth": ------- self : WhittakerSmooth The fitted transformer. + """ # Check that X is a 2D array and has only finite values - X = self._validate_data(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) + + # Set the number of features ... + self.n_features_in_ = X.shape[1] + # ... and all the required attributes for fitting + self._setup_for_fit( + num_data=self.n_features_in_, + lam=self.lam, + differences=self.differences, + child_class_name=self.__class__.__name__, + ) + + # Set the fitted attribute to True + self._is_fitted = True return self - def transform(self, X: np.ndarray, y=None) -> np.ndarray: + def transform( + self, + X: ndarray, + y: None = None, + sample_weight: Optional[ndarray] = None, + ) -> ndarray: """ Transform the input data by calculating the Whittaker smooth. Parameters ---------- - X : np.ndarray of shape (n_samples, n_features) - The input data to transform. + X : ndarray of shape (n_samples, n_features) + The input data to transform. It is internally promoted to ``np.float64`` to + avoid loss of precision. y : None Ignored. + sample_weight : ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None + Individual weights for each of the input data. If only 1 weight vector is + provided, it is assumed to be the same for the features all samples. + No weights may be negative (< 0.0) and at least one weight needs to be + positive (> 0.0). + Providing them is mandatory when the optimum penalty weight ``lam`` is to be + determined automatically via the log marginal likelihood (``"logml"``) + method. + If ``None``, all features are assumed to have the same weight. + Please refer to the Notes section for further details on selecting the + weights. + Returns ------- - X_ : np.ndarray of shape (n_samples, n_features) + X_smoothed : ndarray of shape (n_samples, n_features) The transformed data. - """ + + Notes + ----- + If estimates of the standard deviations ``s_i`` of each data point are + available, e.g., from theoretical considerations or repeated measurements, it is + recommended to use the inverse of the squared standard deviations as weights, + i.e., ``w_i = 1 / (s_i * s_i)``. This is a very effective way to down-weight + noisy data points and thus reduce the risk of noise-induced artifacts in the + smoothed signal. On the other hand, features measured with high confidence will + remain well-preserved even under strong smoothing. + Sometimes, it is infeasible to provide standard deviations because theoretical + considerations are not appropriate and replicate measurements are not available/ + feasible. In such scenarios, the weights can still be estimated by making use of + the function :func:`chemotools.smooth.estimate_noise_stddev` with a `power=-2`. + It relies on the parameter ``window_size`` to estimate the local/global noise + standard deviation of the spectrum, but please refer to the documentation of the + function for further details. + + """ # noqa: E501 + # Check that the estimator is fitted - check_is_fitted(self, "n_features_in_") + check_is_fitted(self, "_is_fitted") # Check that X is a 2D array and has only finite values - X = check_input(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) X_ = X.copy() # Check that the number of features is the same as the fitted data @@ -92,21 +305,67 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: f"Expected {self.n_features_in_} features but got {X_.shape[1]}" ) + # Check the weights + sample_weight_checked, use_same_w_for_all = check_weights( + weights=sample_weight, n_samples=X_.shape[0], n_features=X_.shape[1] + ) + # Calculate the whittaker smooth - for i, x in enumerate(X_): - X_[i] = self._calculate_whittaker_smooth(x) - - return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - - def _calculate_whittaker_smooth(self, x): - X = np.array(x) - m = X.size - E = eye(m, format="csc") - w = np.ones(m) - for i in range(self.differences): - E = E[1:] - E[:-1] - W = diags(w, 0, shape=(m, m)) - A = csc_matrix(W + (self.lam * E.T @ E)) - B = csc_matrix(W @ X.T).toarray().ravel() - background = spsolve(A, B) - return np.array(background) + return self._whittaker_solve( + X=X_, weights=sample_weight_checked, use_same_w_for_all=use_same_w_for_all + )[0] + + def fit_transform( + self, + X: ndarray, + y: None = None, + sample_weight: Optional[ndarray] = None, + ) -> ndarray: + """Fit the transformer to the input data and transform it. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + The input data to fit and transform. It is internally promoted to + ``np.float64`` to avoid loss of precision. + + y : None + Ignored. + + sample_weight : ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None + Individual weights for each of the input data. If only 1 weight vector is + provided, it is assumed to be the same for the features all samples. + No weights may be negative (< 0.0) and at least one weight needs to be + positive (> 0.0). + Providing them is mandatory when the optimum penalty weight ``lam`` is to be + determined automatically via the log marginal likelihood (``"logml"``) + method. + If ``None``, all features are assumed to have the same weight. + Please refer to the Notes section for further details on selecting the + weights. + + Returns + ------- + X_smoothed : ndarray of shape (n_samples, n_features) + The transformed data. + + Notes + ----- + If estimates of the standard deviations ``s_i`` of each data point are + available, e.g., from theoretical considerations or repeated measurements, it is + recommended to use the inverse of the squared standard deviations as weights, + i.e., ``w_i = 1 / (s_i * s_i)``. This is a very effective way to down-weight + noisy data points and thus reduce the risk of noise-induced artifacts in the + smoothed signal. On the other hand, features measured with high confidence will + remain well-preserved even under strong smoothing. + Sometimes, it is infeasible to provide standard deviations because theoretical + considerations are not appropriate and replicate measurements are not available/ + feasible. In such scenarios, the weights can still be estimated by making use of + the function :func:`chemotools.smooth.estimate_noise_stddev` with a `power=-2`. + It relies on the parameter ``window_size`` to estimate the local/global noise + standard deviation of the spectrum, but please refer to the documentation of the + function for further details. + + """ # noqa: E501 + + return self.fit(X=X).transform(X=X, sample_weight=sample_weight) diff --git a/chemotools/utils/__init__.py b/chemotools/utils/__init__.py index e69de29b..82c0d1ae 100644 --- a/chemotools/utils/__init__.py +++ b/chemotools/utils/__init__.py @@ -0,0 +1,14 @@ +""" +The utility module of ``chemotools`` that offers access to various utility functions +that can come in handy when working with chemical data. + +The module contains the following functions: + +- :func:`estimate_noise_stddev` to estimate the local/global noise level of a spectrum + which can then be used for weighting the data. + +""" + +### Imports ### + +from chemotools.utils._finite_differences import estimate_noise_stddev # noqa: F401 diff --git a/chemotools/utils/_banded_linalg.py b/chemotools/utils/_banded_linalg.py new file mode 100644 index 00000000..5479424b --- /dev/null +++ b/chemotools/utils/_banded_linalg.py @@ -0,0 +1,433 @@ +""" +This utility submodule provides functions for the linear algebra with banded matrices, +namely + +- conversion from the upper banded storage for LAPACK's banded Cholesky decomposition + to the banded storage for LAPACK's banded LU decomposition, +- LU decomposition of a banded matrix and the corresponding linear solver, +- computation of the log-determinant of a banded matrix using its LU decomposition + +The decomposition functions return dataclasses that facilitate the handling of the +factorizations. + +""" + +### Imports ### + +import numpy as np +from numpy.typing import ArrayLike +from scipy.linalg import lapack + +from chemotools.utils._models import BandedLUFactorization + +### Type Aliases ### + +LAndUBandCounts = tuple[int, int] + + +### Auxiliary Functions ### + + +def _datacopied(arr, original): + """ + Strictly check for ``arr`` not sharing any data with ``original``, under the + assumption that ``arr = asarray(original)`` + + Was copied from Scipy to be consistent in the LAPACK-wrappers implemented here. + + """ + + if arr is original: + return False + + if not isinstance(original, np.ndarray) and hasattr(original, "__array__"): + return False + + return arr.base is None + + +def convert_upper_chol_banded_to_lu_banded_storage( + ab: np.ndarray, +) -> tuple[LAndUBandCounts, np.ndarray]: + """ + Converts the upper banded storage format used by LAPACK's banded Cholesky + decomposition to the banded storage format used by LAPACK's banded LU + decomposition. + + Parameters + ---------- + ab : np.ndarray of shape (n_upp_bands + 1, n_cols) + The matrix ``A`` stored in the upper banded storage format used by LAPACK's + banded Cholesky decomposition (see Notes for details). + + Returns + ------- + l_and_u : (int, int) + The number of sub- (first) and superdiagonals (second element) aside the main + diagonal which does not need to be considered here. + ab : np.ndarray of shape (l_and_u[0] + 1 + l_and_u[1], n_cols) + The matrix ``A`` stored in the banded storage format used by LAPACK's banded LU + decomposition (see Notes for details). + + Notes + ----- + The upper diagonal ordered form for LAPACK's Cholesky decomposition is given by the + following ordering + + ```python + ab[u + i - j, j] == a[i,j] + ``` + + e.g., for a symmetric matrix ``A`` of shape (7, 7) with in total 3 superdiagonals, + 3 subdiagonals, and the main diagonal, the ordering is as follows: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + ``` + + where each `*` denotes a zero element. + + For LAPACK's LU decomposition, the matrix `A` is stored in `ab` using the matrix + diagonal ordered form: + + ```python + ab[u + i - j, j] == a[i,j] + ``` + + The example from above would then look like this where basically, all the + superdiagonal rows are just copied to the subdiagonal rows and moved to the left so + that the first non-zero element of each row is in the first column: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + a01 a12 a23 a34 a45 a56 * # v subdiagonals + a02 a13 a24 a35 a46 * * + a03 a14 a25 a36 * * * + ``` + + where all entries marked with `*` are as well zero elements although they will be + set to arbitrary values by this function. + + """ + + # an Array is initialised to store the subdiagonal part + num_subdiagonals = ab.shape[0] - 1 + main_diagonal_index = num_subdiagonals + num_columns = ab.shape[1] + ab_subdiagonals = np.zeros(shape=(num_subdiagonals, num_columns), dtype=ab.dtype) + + for offset in range(1, num_subdiagonals + 1): + ab_subdiagonals[offset - 1, 0 : num_columns - offset] = ab[ + main_diagonal_index - offset, offset:None + ] + + # the subdiagonal part is then concatenated to the original array and the result is + # returned + l_and_u = (num_subdiagonals, num_subdiagonals) + return l_and_u, np.row_stack((ab, ab_subdiagonals)) + + +### LAPACK-Wrappers for banded LU decomposition ### + + +def lu_banded( + l_and_u: LAndUBandCounts, + ab: ArrayLike, + *, + check_finite: bool = True, +) -> BandedLUFactorization: + """ + Computes the LU decomposition of a banded matrix ``A`` using LAPACK-routines. + This function is a wrapper of the LAPACK-routine ``gbtrf`` which computes the LU + decomposition of a banded matrix ``A`` in-place. It wraps the routine in an + analogous way to SciPy's ``scipy.linalg.cholesky_banded``. + + Parameters + ---------- + l_and_u : (int, int) + The number of "non-zero" sub- (first) and superdiagonals (second element) aside + the main diagonal which does not need to be considered here. "Non-zero" can be + a bit misleading in this context. These numbers should count up to the diagonal + after which all following diagonals are all zero. Zero-diagonals that come + before still need to be included. + Neither of both may exceed ``num_rows``. + Wrong specification of this can lead to non-zero-diagonals being ignored or + zero-diagonals being included which corrupts the results or reduces the + performance. + ab : array_like of shape (l_and_u[0] + 1 + l_and_u[1], n) + A 2D-Array resembling the matrix ``A`` in banded storage format (see Notes). + check_finite : bool, default=True + Whether to check that the input matrix contains only finite numbers. Disabling + may give a performance gain, but may result in problems (crashes, + non-termination) if the inputs do contain infinities or NaNs. + + Returns + ------- + lub_factorization : BandedLUFactorization + A dataclass containing the LU factorization of the matrix ``A`` as follows: + ``lub``: The LU decomposition of ``A`` in banded storage format (see Notes). + ``ipiv``: The pivoting indices. + ``l_and_u``: The number of sub- and superdiagonals of the matrix ``A`` that + are non-zero. + ``singular``: A boolean indicating whether the matrix is singular. + + Notes + ----- + For LAPACK's banded LU decomposition, the matrix ``a`` is stored in ``ab`` using the + matrix diagonal ordered form: + + ```python + ab[u + i - j, j] == a[i,j] # see below for u + ``` + + An example of ``ab`` (shape of a is ``(7,7)``, ``u``=3 superdiagonals, ``l``=2 + subdiagonals) looks like: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + a10 a21 a32 a43 a54 a65 * # v subdiagonals + a20 a31 a42 a53 a64 * * + ``` + + where all entries marked with `*` are zero elements although they will be set to + arbitrary values by this function. + + Internally LAPACK relies on an expanded version of this format to perform inplace + operations that adds another ``l`` superdiagonals to the matrix in order to + overwrite them for the purpose of pivoting. The output is thus an expanded version + of the LU decomposition of ``A`` in the same format where the main diagonal of + ``L`` is implicitly taken to be a vector of ones. The output can directly be used + for the LAPACK-routine ``gbtrs`` to solve linear systems of equations based on this + decomposition. + + """ + + # the (optional) finite check and Array-conversion are performed + if check_finite: + ab = np.asarray_chkfinite(ab) + else: + ab = np.asarray(ab) + + # then, the number of lower and upper subdiagonals needs to be checked for being + # consistent with the shape of ``ab`` + num_subdiagonals, num_superdiagonals = l_and_u + if num_subdiagonals + num_superdiagonals + 1 != ab.shape[0]: # pragma: no cover + raise ValueError( + f"\nInvalid values for the number of sub- and super " + f"diagonals: l+u+1 ({num_subdiagonals + num_superdiagonals + 1}) does not " + f"equal ab.shape[0] ({ab.shape[0]})." + ) + + # now, the LAPACK-routines can be called + # to make ``ab`` compatible with the shape the LAPACK expects in this case, it + # needs to be re-written into a larger Array that has zeros elsewhere + # FIXME: for tridiagonal matrices, the SciPy wrapper for ``gttrf`` should be used + lapack_routine = "gbtrf" + (gbtrf,) = lapack.get_lapack_funcs((lapack_routine,), (ab,)) + lpkc_ab = np.row_stack( + ( + np.zeros((num_subdiagonals, ab.shape[1]), dtype=ab.dtype), + ab, + ) + ) + lub, ipiv, info = gbtrf( + ab=lpkc_ab, kl=num_subdiagonals, ku=num_superdiagonals, overwrite_ab=True + ) + + # then, the results needs to be validated and returned + # Case 1: the factorisation could be completed, which does not imply that the + # solution can be used for solving a linear system + if info >= 0: + return BandedLUFactorization( + lub=lub, + ipiv=ipiv, + l_and_u=l_and_u, + singular=info > 0, + ) + + # Case 2: the factorisation was not completed due to invalid input + raise ValueError( # pragma: no cover # noqa: E501 + f"\nIllegal value in {-info}-th argument of internal gbtrf." + ) + + +def lu_solve_banded( + lub_factorization: BandedLUFactorization, + b: ArrayLike, + *, + overwrite_b: bool = False, + check_finite: bool = True, +) -> np.ndarray: + """ + Solves a linear system of equations ``Ax=b`` with a banded matrix ``A`` using its + precomputed LU decomposition. + This function wraps the LAPACK-routine ``gbtrs`` in an analogous way to SciPy's + ``scipy.linalg.cho_solve_banded``. + + Parameters + ---------- + lub_factorization : BandedLUFactorization + The LU decomposition of the matrix ``A`` in banded storage format as returned by + the function :func:`lu_banded`. + b : ndarray of shape (n,) + A 1D-Array containing the right-hand side of the linear system of equations. + overwrite_b : bool, default=False + If ``True``, the contents of ``b`` can be overwritten by the routine. Otherwise, + a copy of ``b`` is created and overwritten. + check_finite : bool, default=True + Whether to check that the input contains only finite numbers. Disabling may give + a performance gain, but may result in problems (crashes, non-termination) if the + inputs do contain infinities or NaNs. + + Returns + ------- + x : ndarray of shape (n,) + The solution to the system ``A x = b``. + + Raises + ------ + LinAlgError + If the system to solve is singular. + + """ + + # if the matrix is singular, the solution cannot be computed + if lub_factorization.singular: + raise np.linalg.LinAlgError("\nSystem is singular.") + + # the (optional) finite check and Array-conversion are performed + if check_finite: + lub_factorization.lub = np.asarray_chkfinite(lub_factorization.lub) + lub_factorization.ipiv = np.asarray_chkfinite(lub_factorization.ipiv) + b_internal = np.asarray_chkfinite(b) + else: + lub_factorization.lub = np.asarray(lub_factorization.lub) + lub_factorization.ipiv = np.asarray(lub_factorization.ipiv) + b_internal = np.asarray(b) + + overwrite_b = overwrite_b or _datacopied(b_internal, b) + + # then, the shapes of the LU decomposition and ``b`` need to be validated against + # each other + if lub_factorization.num_cols != b_internal.shape[0]: # pragma: no cover + raise ValueError( + f"\nShapes of lub ({lub_factorization.num_cols}) and b " + f"({b_internal.shape[0]}) are not compatible." + ) + + # now, the LAPACK-routine is called + (gbtrs,) = lapack.get_lapack_funcs(("gbtrs",), (lub_factorization.lub, b_internal)) + x, info = gbtrs( + ab=lub_factorization.lub, + kl=lub_factorization.l_and_u[0], + ku=lub_factorization.l_and_u[1], + b=b_internal, + ipiv=lub_factorization.ipiv, + overwrite_b=overwrite_b, + ) + + # then, the results needs to be validated and returned + # Case 1: the solution could be computed truly successfully, i.e., without any + # NaN-values + if info == 0 and not np.isnan(x).any(): + return x + + # Case 2: the solution was computed, but there were NaN-values in it + elif info == 0: + raise np.linalg.LinAlgError("\nMatrix is singular.") + + # Case 3: the solution could not be computed due to invalid input + elif info < 0: # pragma: no cover + raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrs.") + + # Case 4: unexpected error + raise AssertionError( # pragma: no cover + f"\nThe internal gbtrs returned info > 0 ({info}) which should not happen." + ) + + +def slogdet_lu_banded( + lub_factorization: BandedLUFactorization, +) -> tuple[float, float]: + """ + Computes the logarithm of the absolute value and the sign of the determinant of a + banded matrix A using its LU decomposition. This is way more efficient than + computing the determinant directly because the LU decompositions main diagonals + already encode the determinant as the product of the diagonal entries of the + factors. + + Parameters + ---------- + lub_factorization : BandedLUFactorization + The LU decomposition of the matrix ``A`` in banded storage format as returned by + the function :func:`lu_banded`. + + Returns + ------- + sign : float + A number representing the sign of the determinant. + logabsdet : float + The natural log of the absolute value of the determinant. + If the determinant is zero, then `sign` will be 0 and `logabsdet` will be + -Inf. In all cases, the determinant is equal to ``sign * np.exp(logabsdet)``. + + Raises + ------ + OverflowError + If any of the diagonal entries of the LU decomposition leads to an overflow in + the natural logarithm. + + """ + + # first, the number of actual row exchanges needs to be counted + unchanged_row_indices = np.arange( + start=0, + stop=lub_factorization.num_cols, + step=1, + dtype=lub_factorization.ipiv.dtype, + ) + num_row_exchanges = np.count_nonzero( + lub_factorization.ipiv != unchanged_row_indices + ) + + # the sign-prefactor of the determinant is either +1 or -1 depending on whether the + # number of row exchanges is even or odd + sign = -1.0 if num_row_exchanges % 2 == 1 else 1.0 + + # since the determinant (without sign prefactor) is just the product of the diagonal + # product of L and the diagonal product of U, the calculation simplifies. As the + # main diagonal of L is a vector of ones, only the diagonal product of U is required + main_diagonal = lub_factorization.lub[lub_factorization.main_diagonal_row_index, ::] + u_diagonal_sign_is_positive = np.count_nonzero(main_diagonal < 0.0) % 2 == 0 + with np.errstate(divide="ignore", over="ignore"): + logabsdet = np.log(np.abs(main_diagonal)).sum() + + # logarithms of zero are already properly handled, so there is not reason to worry + # about, since they are -inf which will result in a zero determinant in exp(); + # overflow however needs to lead to a raise and in this case the log(det) is either + # +inf in case of overflow only or NaN in case of the simultaneous occurrence of + # zero and overflow + if np.isnan(logabsdet) or np.isposinf(logabsdet): # pragma: no cover + raise OverflowError( + "\nFloating point overflow in natural logarithm. At least 1 main diagonal " + "entry results in overflow, thereby corrupting the determinant." + ) + + # finally, the absolute value of the natural logarithm of the determinant is + # returned together with its sign + if np.isneginf(logabsdet): # pragma: no cover + return 0.0, logabsdet + + if u_diagonal_sign_is_positive: + return sign, logabsdet + + return -sign, logabsdet diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py new file mode 100644 index 00000000..68cb812d --- /dev/null +++ b/chemotools/utils/_finite_differences.py @@ -0,0 +1,660 @@ +""" +This utility submodule provides functions for the computation of forward finite +differences, namely + +- the kernel for forward and central finite differences, +- computation of related kernel matrices +- estimation of the noise standard deviation of a series + +""" + +### Imports ### + +from math import comb, factorial +from numbers import Integral, Real +from typing import Any, Callable, Dict, Literal, Optional, Tuple, Union + +import numpy as np +from scipy.ndimage import median_filter +from sklearn.utils import check_scalar + +### Constants ### + +_MAD_PREFACTOR = 1.482602 + +### Functions ### + + +def forward_finite_difference_kernel( + *, + differences: int, +) -> np.ndarray: + """ + Computes the kernel for forward finite differences which can be applied to a + series by means of a convolution, e.g., + + ```python + kernel = calc_forward_fin_diff_kernel(differences=2) + differences = np.convolve(series, np.flip(kernel), mode="valid") + # NOTE: NumPy flips the kernel internally due to the definition of convolution + ``` + + Parameters + ---------- + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 1 are not allowed. + + Returns + ------- + fin_diff_kernel : ndarray of shape (differences + 1,) + A NumPy-1D-vector resembling the kernel from the code example above. To avoid + loss of precision, the data type is ``np.int64``. + + Raises + ------ + ValueError + If ``differences`` is below 1. + + """ + # the input is validated + check_scalar( + differences, + name="differences", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + + # afterwards, the kernel is computed using the binomial coefficients with + # alternating signs + return np.array( + [ + (-1 if iter_i % 2 == 1 else 1) * comb(differences, iter_i) + for iter_i in range(differences, -1, -1) + ], + dtype=np.int64, + ) + + +def central_finite_difference_coefficients( + *, + differences: int, + accuracy: int = 2, +) -> np.ndarray: + """ + Computes the kernel for central finite differences which can be applied to a + series by means of a convolution, e.g., + + ```python + kernel = calc_central_fin_diff_kernel(differences=2, accuracy=2) + differences = np.convolve(series, np.flip(kernel), mode="valid") + # NOTE: NumPy flips the kernel internally due to the definition of convolution + ``` + + Parameters + ---------- + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 1 are not allowed. + accuracy : int, default=2 + The accuracy of the finite difference approximation, which has to be an even + integer ``>= 2``. + The higher the accuracy, the better the approximation. + + Returns + ------- + fin_diff_kernel : ndarray of shape (kernel_size,) + A NumPy-1D-vector resembling the kernel from the code example above. Since the + elements are not necessarily integers, the data type is ``np.float64``. + Its size is given by ``2 * floor((differences + 1) / 2) - 1 + accuracy`` where + ``floor`` returns the next lower integer. + + Raises + ------ + ValueError + If ``differences`` is below 1. + ValueError + If ``accuracy`` is not an even integer ``>= 2``. + + References + ---------- + The computation is based on the description in [1]_. + + .. [1] Wikipedia, "Finite difference coefficient - Central finite difference", + URL: https://en.wikipedia.org/wiki/Finite_difference_coefficient#Central_finite_difference + + """ # noqa: E501 + + ### Input Validation ### + + # first, difference order and accuracy are validated + check_scalar( + differences, + name="differences", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + + check_scalar( + accuracy, + name="accuracy", + target_type=Integral, + min_val=2, + include_boundaries="left", + ) + if accuracy % 2 == 1: + raise ValueError("Got accuracy = {accuracy}, expected an even integer.") + + ### Central Difference Kernel Computation ### + + # first, the size of the kernel is computed + kernel_size = 2 * ((differences + 1) // 2) - 1 + accuracy + half_kernel_size = kernel_size // 2 + + # then, the linear system to solve for the coefficients is set up + grid_point_vect = np.arange( + start=-half_kernel_size, + stop=half_kernel_size + 1, + step=1, + dtype=np.int64, + ) + # NOTE: lhs is "left-hand side" and rhs is "right-hand side" + lhs_matrix = np.vander( + grid_point_vect, + N=kernel_size, + increasing=True, + ) + rhs_vect = np.zeros(shape=(kernel_size,), dtype=np.int64) + rhs_vect[differences] = factorial(differences) + + # the coefficients are computed by solving the linear system + return np.linalg.solve( + lhs_matrix.transpose(), + rhs_vect, + ) + + +def _squared_forward_difference_matrix_banded_transpose_first( + *, + num_data: int, + differences: int, +) -> np.ndarray: + """ + Generates the squared forward finite differences matrix ``D.T @ D`` from the + forward finite difference matrix ``D`` of order ``differences``. It will be cast to + to the upper banded storage format used for LAPACK's banded Cholesky decomposition. + + All unused elements in the banded matrix are set to zero. + + """ + + # the problems has to be split into a leading, a central, and a trailing part + # first, the leading part is computed because it might be that this is already + # enough + # for this, the kernel for the forward differences is computed and the bandwidth is + # determined + kernel = forward_finite_difference_kernel(differences=differences) + num_diagonals = 1 + 2 * differences + leading_num_rows = min(kernel.size, num_data - kernel.size + 1) + leading_num_cols = kernel.size + leading_num_rows - 1 + + # the leading matrix is computed as a dense matrix + leading_dt_dot_d_dense = np.zeros( + shape=(leading_num_rows, leading_num_cols), + dtype=np.int64, + ) + for row_index in range(0, leading_num_rows): + leading_dt_dot_d_dense[row_index, row_index : row_index + kernel.size] = kernel + + # its squared form is computed + leading_dt_dot_d_dense = leading_dt_dot_d_dense.T @ leading_dt_dot_d_dense + + # now, the leading matrix is converted to a banded matrix + leading_dt_dot_d_banded = np.zeros( + shape=(differences + 1, leading_num_cols), + dtype=np.int64, + ) + for diagonal_index in range(0, differences + 1): + offset = differences - diagonal_index + leading_dt_dot_d_banded[diagonal_index, offset:None] = np.diag( + leading_dt_dot_d_dense, + k=offset, + ) + + # if the number of data points is less than the kernel size minus one, the + # leading matrix is already the final matrix + if num_data <= num_diagonals: + return leading_dt_dot_d_banded + + # otherwise, a central part has to be inserted + # this turns out to be just a column-wise repetition of the kernel computed with + # double the difference order, so this matrix can simple be inserted into the + # computed leading D.T @ D matrix + # NOTE: the doubled kernel is the most central column of the banded D.T @ D already + # computed + central_n_cols = num_data - leading_dt_dot_d_banded.shape[1] + kernel_double_differences = leading_dt_dot_d_banded[::, kernel.size - 1].reshape( + (-1, 1) + ) + return np.concatenate( + ( + leading_dt_dot_d_banded[::, 0 : kernel.size], + np.tile(kernel_double_differences, (1, central_n_cols)), + leading_dt_dot_d_banded[::, kernel.size :], + ), + axis=1, + ) + + +def _squared_forward_difference_matrix_banded_original_first( + *, + num_data: int, + differences: int, +) -> np.ndarray: + """ + Generates the squared forward finite differences matrix ``D @ D.T`` from the + forward finite difference matrix ``D`` of order ``differences``. It will be cast to + to the upper banded storage format used for LAPACK's banded Cholesky decomposition. + + All unused elements in the banded matrix are set to zero. + + """ + + # this case is simpler than the transposed case because the matrix is just a + # Toeplitz matrix with the kernel of double the difference order on the diagonal + kernel_double_differences = forward_finite_difference_kernel( + differences=2 * differences + )[differences:None] + # for an odd difference order, the sign of the kernel has to be flipped + if differences % 2 == 1: + kernel_double_differences = np.negative(kernel_double_differences) + + num_rows = num_data - kernel_double_differences.size + 1 + num_upper_plus_central_diagonals = min(num_rows, 1 + differences) + + # the matrix is computed as a dense and simple filled by means of a loop + d_dot_dt_banded = np.zeros( + shape=(num_upper_plus_central_diagonals, num_rows), + dtype=np.int64, + ) + main_diagonal_row_index = min(differences, num_upper_plus_central_diagonals - 1) + for offset in range(0, num_upper_plus_central_diagonals): + d_dot_dt_banded[main_diagonal_row_index - offset, offset:None] = ( + kernel_double_differences[offset] + ) + + return d_dot_dt_banded + + +def squared_forward_difference_matrix_banded( + *, + num_data: int, + differences: int, + original_first: bool, +) -> np.ndarray: + """ + Generates the squared forward finite differences matrix ``D @ D.T`` or ``D.T @ D`` + from the forward finite difference matrix ``D`` of order ``differences``. It will be + cast to to the upper banded storage format used for LAPACK's banded Cholesky + decomposition. + + All unused elements in the banded matrix are set to zero. + + Parameters + ---------- + num_data : int + The number of data points in the series to which the forward finite differences + are applied. + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 1 are not allowed. + original_first : bool + If ``True``, the squared forward finite differences matrix ``D @ D.T`` is + computed. Otherwise, the squared forward finite differences matrix ``D.T @ D`` + is computed. + + Returns + ------- + squ_fw_fin_diff_mat_cho_banded : ndarray of shape (num_bands, num_data - differences + 1) or (n_bands, num_data) + The squared forward finite differences matrix in the upper banded storage format + used for LAPACK's banded Cholesky decomposition (see Notes for details). + When ``orig_first`` is ``True``, the matrix has at maximum ``differences + 1`` + bands (rows) and ``num_data - differences + 1`` columns. + Otherwise, the matrix has at maximum ``differences + 1`` bands (rows) and + ``num_data`` columns. + + Raises + ------ + ValueError + If ``num_data`` is below ``differences + 1``, i.e., the kernel does not fit into + the data at least once. + ValueError + If ``differences`` is below 1. + + Notes + ----- + The squared forward finite differences matrix is stored in the upper banded storage + format used for LAPACK's banded Cholesky decomposition. + This upper diagonal ordered form is given by the following ordering + + ```python + ab[u + i - j, j] == a[i,j] + ``` + + e.g., for a symmetric matrix of shape (6, 6) with in total 3 superdiagonals, + 3 subdiagonals, and the main diagonal, the ordering is as follows: + + ```python + * * * a03 a14 a25 + * * a02 a13 a24 a35 + * a01 a12 a23 a34 a45 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 # main diagonal + ``` + + where each `*` denotes a zero element. + + Written out, this would give the following matrix: + + ```python + a00 a01 a02 a03 0 0 + a01 a11 a12 a13 a14 0 + a02 a12 a22 a23 a24 a25 + a03 a13 a23 a33 a34 a35 + 0 a14 a24 a34 a44 a45 + 0 0 a25 a35 a45 a55 + ``` + + """ # noqa: E501 + + # first, it needs to be ensured that the number of data points is enough to + # support the kernel for the respective difference order at least once + try: + check_scalar( + num_data, + name="num_data", + target_type=Integral, + min_val=differences + 1, + include_boundaries="left", + ) + + # NOTE: this is only for Sklearn compatibility + except ValueError: + raise ValueError(f"Got n_features = {num_data}, must be >= {differences + 1}.") + + # afterwards, the squared forward finite differences matrix is computed + if original_first: + return _squared_forward_difference_matrix_banded_original_first( + num_data=num_data, + differences=differences, + ) + + return _squared_forward_difference_matrix_banded_transpose_first( + num_data=num_data, + differences=differences, + ) + + +def estimate_noise_stddev( + series: np.ndarray, + differences: int = 6, + differences_accuracy: int = 2, + window_size: Optional[int] = None, + extrapolator: Callable[..., np.ndarray] = np.pad, + extrapolator_args: Tuple[Any, ...] = ("reflect",), + extrapolator_kwargs: Optional[Dict[str, Any]] = None, + stddev_power: Literal[-2, -1, 1, 2] = 1, + stddev_min: Union[float, int] = 1e-10, +) -> np.ndarray: + """ + EXPERIMENTAL FEATURE + + Estimates the local/global noise standard deviation of a series even in the presence + of trends, like baselines and peaks, as well as outliers by using central finite + differences. + Please see the Notes section for further details. + + Parameters + ---------- + series : ndarray of shape (num_data,) + The series for which the noise standard deviation is estimated. + differences : int, default=6 + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Empirically, 5-6 was found as a sweet spot, but even numbers work better with + the default ``extrapolator``. + Values below 1 are not allowed. + differences_accuracy : int, default=2 + The accuracy of the finite difference approximation, which has to be an even + integer ``>= 2``. + Higher values will enhance the effect of outliers that will corrupt the noise + estimation of their neighborhood. + window_size : int or None, default=None + The odd window size around a datapoint to estimate its local noise standard + deviation. + Higher values will lead to a smoother noise standard deviation estimate by + sacrificing the local resolution. At the same time, edge effects start blurring + in if the ``extrapolator`` does not provide a good extrapolation. + If provided, it has to be at least 1. + If ``None``, the global noise standard deviation is estimated, i.e., it will + be the same for each data point. + extrapolator : callable, default=np.pad + The extrapolator function that is used to pad the series before the finite + differences and the median filter are applied. It will pad the signal with + ``pad_width = (diff_kernel_size // 2) + (window_size // 2)`` elements on each + side where ``diff_kernel_size`` is the size of the central finite differences + kernel (see the Notes for details). + It has to be a callable with the following signature: + + ```python + series_extrap = extrapolator( + series, + pad_width, + *extrapolator_args, + **extrapolator_kwargs, + ) + ``` + + If ``window_size`` is ``None``, only the central finite differences kernel is + considered. + By default, the signal is padded by reflecting ``series`` at the edges on either + side, but of course the quality of the noise estimation can be improved by using + a more sophisticated extrapolation method. + extrapolator_args : tuple, default=("reflect",) + Additional positional arguments that are passed to the extrapolator function as + described for ``extrapolator``. + extrapolator_kwargs : dict or None, default=None + Additional keyword arguments that are passed to the extrapolator function as + described for ``extrapolator``. + If ``None``, no additional keyword arguments are passed. + stddev_power : {-2, -1, 1, 2}, default=1 + The power to which the noise standard deviation is raised. + This can be used to compute the: + + - original noise standard deviation (``power=1``), + - the noise variance (``power=2``), + - the inverse noise standard deviation (``power=-1``), or + - the inverse noise variance (``power=-2``; typically used as weights). + + stddev_min : float or int, default=1e-10 + The minimum noise standard deviation that is allowed. + Any estimated noise standard deviation below this value will be set to this + value. + Borrowing an idea from image processing, the minimum noise standard deviation + can, e.g., be estimated from one or more feature-free regions of ``series``. + It must be at least ``1e-15``. + + Returns + ------- + noise_stddev : ndarray of shape (num_data,) + The estimated noise standard deviation raised to ``power`` for each data point + in the series. + + Raises + ------ + ValueError + If ``series.size`` is below less than the kernel or window size (see Notes for + details). + ValueError + If ``differences`` is below 1. + ValueError + If ``diff_accuracy`` is not an even integer ``>= 2``. + ValueError + If ``window_size`` is below 1. + + + References + ---------- + The estimation algorithm is an adaption of the global estimation logic applied for + the "DER SNR" proposed in [1]_ (see the Notes for further details). + + .. [1] Stoehr F., et al., "DER SNR: A Simple & General Spectroscopic Signal-to-Noise + Measurement Algorithm", Astronomical Data Analysis Software and Systems XVII P5.4 + ASP Conference Series, Vol. XXX, 2008 + + Notes + ----- + The "DER SNR" algorithm estimates a global noise level in a robust fashion by + applying a modified version of the Median Absolute Deviation (MAD) to the + derivative/differences of the signal. By using a moving MAD filter, the local noise + level can be estimated as well. + + From a workflow perspective, the following steps are performed on the signal: + + - The signal is extrapolated to avoid edge effects. + - The central finite differences are computed. + - Their absolute values are taken. + - The median (global) or median filter (local) is applied to these absolute + differences. With proper scaling, this will give an estimate of the noise level. + + There is one limitation, namely that the algorithm does not work well for signals + that are perfectly noise-free, but this is a rare case in practice. + + The kernel size for the central finite difference kernel is given by + ``2 * floor((differences + 1) / 2) - 1 + diff_accuracy``. + + """ + + ### Input Validation ### + + # first, the window size, power, and minimum standard deviation are validated + # NOTE: the difference order and accuracy are by the central finite differences + # kernel function + # window size + if window_size is not None: + check_scalar( + window_size, + name="window_size", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + if window_size % 2 == 0: + raise ValueError( + f"Got window_size = {window_size}, expected an odd integer." + ) + + # power + if stddev_power not in {-2, -1, 1, 2}: + raise ValueError( + f"Got stddeev_power = {stddev_power}, expected -2, -1, 1, or 2." + ) + + # minimum standard deviation + check_scalar( + stddev_min, + name="stddev_min", + target_type=Real, + min_val=1e-15, + include_boundaries="left", + ) + + # for validation of the series, the central finite differences kernel has to be + # computed + difference_kernel = central_finite_difference_coefficients( + differences=differences, + accuracy=differences_accuracy, + ) + + # afterwards, the series is validated + if series.size < difference_kernel.size: + raise ValueError( + f"Got series.size = {series.size}, must be >= {difference_kernel.size} " + f"(kernel size)." + ) + + if window_size is not None: + if series.size < window_size: + raise ValueError( + f"Got series.size = {series.size}, must be >= {window_size} (window " + "size)." + ) + + ### Preparation ### + + # the keyword arguments for the extrapolator are set up + extrapolator_kwargs = ( + extrapolator_kwargs if extrapolator_kwargs is not None else dict() + ) + + ### Noise Standard Deviation Estimation ### + + # the signal is extrapolated to avoid edge effects + pad_width = difference_kernel.size // 2 + pad_width += 0 if window_size is None else window_size // 2 + extrapolated_series = extrapolator( + series, + pad_width, + *extrapolator_args, + **extrapolator_kwargs, + ) + + # the absolute central finite differences are computed ... + absolute_differences_series = np.abs( + np.convolve( + extrapolated_series, + np.flip(difference_kernel), + mode="valid", + ) + ) + size_after_differentiation = absolute_differences_series.size + + # ... and the median filter is applied to theses differences + prefactor = _MAD_PREFACTOR / np.linalg.norm(difference_kernel) + # Case 1: the global noise standard deviation is estimated + if window_size is None: + noise_stddev = np.full_like( + series, + fill_value=prefactor * np.median(absolute_differences_series), + ) + + # Case 2: the local noise standard deviation is estimated + else: + half_window_size = window_size // 2 + noise_stddev = ( + prefactor + * median_filter( + absolute_differences_series, + size=window_size, + mode="constant", + )[half_window_size : size_after_differentiation - half_window_size] + ) + + # the minimum-bounded noise standard deviation is raised to the power + noise_stddev = np.maximum(noise_stddev, stddev_min) + + if stddev_power in {-2, 2}: + noise_stddev = np.square(noise_stddev) + + if stddev_power in {-2, -1}: + noise_stddev = np.reciprocal(noise_stddev) + + return noise_stddev diff --git a/chemotools/utils/_models.py b/chemotools/utils/_models.py new file mode 100644 index 00000000..cd88a3f9 --- /dev/null +++ b/chemotools/utils/_models.py @@ -0,0 +1,281 @@ +""" +This utility submodule implements important models, i.e., constants, Enums, and +dataclasses used throughout the package. + +""" + +### Imports ### + +from dataclasses import dataclass, field +from enum import Enum +from math import log +from typing import Literal, Tuple, Union + +import numpy as np + +### Enums ### + +# an Enum class for the solve types used for solving linear systems that involve banded +# matrices + + +class BandedSolvers(str, Enum): + """ + Defines the types of solvers that can be used to solve linear systems involving + banded matrices, i.e., + + - ``PIVOTED_LU``: LU decomposition with partial pivoting + - ``PENTAPY``: pentadiagonal "decomposition" (it's actually a direct solve) + + """ + + PIVOTED_LU = "partially pivoted LU decomposition" + PENTAPY = "direct pentadiagonal solver" + + +# an Enum class for the kinds of smoothing by the Whittaker-Henderson smoother that can +# be applied to the data + + +class WhittakerSmoothMethods(str, Enum): + """ + Defines the types of smoothing methods that can be applied to the data using the + Whittaker-Henderson smoother, i.e., + + - ``FIXED``: fixed penalty weight (shorthand "fixed") + - ``LOGML``: smoothing based on the maximization of the log marginal likelihood + (shorthand "logml") + + Except for ``FIXED``, the penalty weight is automatically determined when using the + other methods. + + """ + + FIXED = "fixed" + LOGML = "logml" + + +# a type hint is defined for the Whittaker-Henderson smoother specification +_WhittakerSmoothMethodsAll = Union[WhittakerSmoothMethods, Literal["fixed", "logml"]] + +### (Data) Classes ### + + +# a dataclass for specification of the smoothing penalty weight lambda for the +# Whittaker-Henderson smoother + + +@dataclass() +class WhittakerSmoothLambda: + """ + A dataclass that holds the specification of the smoothing penalty weight or + smoothing parameter lambda for the Whittaker-Henderson smoother. + + Attributes + ---------- + bounds: int or float or (int or float, int or float), default=100.0 + The bounds for the search space of the penalty weight lambda. The specification + can be either + + - a single value for a fixed penalty weight (requires ``method`` to be set to + ``WhittakerSmoothMethods.FIXED``), or + - a tuple of two values for the lower and upper bounds of the search space + (then ``method`` may not be set to ``WhittakerSmoothMethods.FIXED`` unless + the bounds are too close to each other as described below). + + Independently of the specification, the values have to be greater than or equal + to the zero tolerance ``1e-25``. + If a lower and an upper bound are provided, they are flipped if necessary. + After that, the difference ``abs(upp_bound - low_bound)`` has to be at least + ``1e-5 * upp_bound`` for any method other than ``WhittakerSmoothMethods.FIXED``. + Otherwise, the method is set to ``WhittakerSmoothMethods.FIXED`` and the + ``fixed_lambda`` is set to the upper bound. + method: WhittakerSmoothMethods or {"fixed", "logml"}, default="fixed" + The method to use for the selection of the penalty weight. If the bounds are too + close to each other, this will be set to ``WhittakerSmoothMethods.FIXED``. + + Raises + ------ + ValueError + If ``method`` is invalid, i.e., it does not correspond to any of the + ``WhittakerSmoothMethods`` or their shorthands, or if it cannot be used in + combination with ``bounds``. + ValueError + If the bounds are invalid, i.e., they are not greater than or equal to the zero + tolerance ``1e-25``. + + """ + + bounds: Union[int, float, tuple[Union[int, float], Union[int, float]]] = 100.0 + method: _WhittakerSmoothMethodsAll = field(default=WhittakerSmoothMethods.FIXED) + + fixed_lambda: float = field(default=float("nan"), init=False) + auto_bounds: tuple[float, float] = field( + default=(float("nan"), float("nan")), init=False + ) + method_used: WhittakerSmoothMethods = field( + default=WhittakerSmoothMethods.FIXED, init=False + ) + fit_auto: bool = field(default=False, init=False) + + __zero_tol: float = field(default=1e-25, init=False, repr=False) + __diff_tol: float = field(default=1e-5, init=False, repr=False) + + def _set_validated_method(self) -> None: + try: + self.method_used = WhittakerSmoothMethods(self.method.lower()) + except ValueError: + raise ValueError( + f"\nThe method '{self.method}' is not valid. " + f"Please choose one of the following: " + f"'fixed', 'logml', {WhittakerSmoothMethods.FIXED.name}, " + f"{WhittakerSmoothMethods.LOGML.name}." + ) + + def __post_init__(self): + # the bounds are checked for validity + # Case 1: a single value is provided + if isinstance(self.bounds, (int, float)): + # first, the method is validated + self._set_validated_method() + + # in this case, the method has to be set to FIXED + if self.method_used != WhittakerSmoothMethods.FIXED: + raise ValueError( + f"\nThe method '{self.method_used.name}' was selected for a fixed " + f"penalty weight (i.e., bounds are just a scalar)." + ) + + # the bound has to be greater than or equal to the zero tolerance + if self.bounds < self.__zero_tol: + raise ValueError( + f"\nThe penalty weight lambda has to be greater than or equal to " + f"the zero tolerance {self.__zero_tol}." + ) + + # the fixed lambda is set to the bound + self.fixed_lambda = float(self.bounds) + self.fit_auto = False + + return + + # Case 2: a tuple of two values is provided + elif isinstance(self.bounds, tuple): + + # the bounds are flipped if necessary + lower_bound, upper_bound = sorted(self.bounds) + + # the bounds have to be greater than or equal to the zero tolerance + if lower_bound < self.__zero_tol or upper_bound < self.__zero_tol: + raise ValueError( + f"\nThe bounds for the penalty weight lambda have to be greater " + f"than or equal to the zero tolerance {self.__zero_tol}, but " + f"they are {lower_bound} and {upper_bound}." + ) + + # the difference has to be at least 1e-5 * upp_bound to be considered + # as a search space + if abs(upper_bound - lower_bound) >= self.__diff_tol * upper_bound: + # for this, the method is validated + self._set_validated_method() + + # if the method is not FIXED, the bounds are set as the search space + if self.method_used != WhittakerSmoothMethods.FIXED: + self.auto_bounds = (float(lower_bound), float(upper_bound)) + self.fit_auto = True + return + + # if the bounds are a search space, but the method is set to FIXED, + # an error is raised + raise ValueError( + f"\nThe bounds for the penalty weight lambda are a search space " + f"({lower_bound}, {upper_bound}), but the method is set to FIXED." + ) + + # otherwise, if the penalty weights is fixed, the method is set to FIXED as + # well + self.method_used = WhittakerSmoothMethods.FIXED + self.fixed_lambda = float(upper_bound) + self.fit_auto = False + + return + + # Case 3: the bounds are neither a scalar nor a tuple of two values + raise TypeError( + f"\nThe bounds for the penalty weight lambda have to be either a scalar " + f"or a tuple of two values, but they are {self.bounds}." + ) + + @property + def log_auto_bounds(self) -> Tuple[float, float]: + """ + The natural logarithms of the search space bounds for the penalty weight lambda. + + Returns + ------- + log_auto_bounds : (float, float) + The natural logarithms of the lower and upper bounds of the search space. + + """ + + return (log(self.auto_bounds[0]), log(self.auto_bounds[1])) + + +# a fake class for representing the factorization of a pentadiagonal matrix with +# pentapy which is empty since pentapy does not factorize the matrix but directly solves +# the system of equations + + +class BandedPentapyFactorization: + """ + A class that resembles the factorization of a pentadiagonal matrix with ``pentapy``. + It has no attributes since the factorization is not stored, but the class is used to + provide an easy way to check if the factorization is available. + + """ + + pass + + +# a dataclass for the factorization of a banded matrix with LU decomposition with p +# partial pivoting + + +@dataclass() +class BandedLUFactorization: + """ + A dataclass that holds the partially pivoted LU factorization of a banded matrix. + + Attributes + ---------- + lub: ndarray of shape (n_rows, n_cols) + The LU factorization of the matrix ``A`` in banded storage format. + ipiv: ndarray of shape (n_rows,) + The pivot indices. + l_and_u: tuple[int, int] + The number of lower and upper bands in the LU factorization. + singular: bool + If ``True``, the matrix ``A`` is singular. + shape : (int, int) + The shape of the matrix ``A`` in dense form. + n_rows, n_cols : int + The number of rows and columns of the matrix ``A`` in dense form. + main_diag_row_idx : int + The index of the main diagonal in the banded storage format. + + """ + + lub: np.ndarray + ipiv: np.ndarray + l_and_u: tuple[int, int] + singular: bool + + shape: tuple[int, int] = field(default=(-1, -1), init=False) + num_rows: int = field(default=-1, init=False) + num_cols: int = field(default=-1, init=False) + main_diagonal_row_index: int = field(default=-1, init=False) + + def __post_init__(self): + self.shape = self.lub.shape # type: ignore + self.num_rows, self.num_cols = self.shape + self.main_diagonal_row_index = self.num_rows - 1 - self.l_and_u[0] diff --git a/chemotools/utils/_types.py b/chemotools/utils/_types.py new file mode 100644 index 00000000..7ac4f2d3 --- /dev/null +++ b/chemotools/utils/_types.py @@ -0,0 +1,12 @@ +""" +This utility submodule provides type hints for ``chemotools``. + +""" + +### Imports ### + +from typing import Union + +### Types ### + +RealNumeric = Union[int, float] diff --git a/chemotools/utils/_whittaker_base/__init__.py b/chemotools/utils/_whittaker_base/__init__.py new file mode 100644 index 00000000..35f1249d --- /dev/null +++ b/chemotools/utils/_whittaker_base/__init__.py @@ -0,0 +1,21 @@ +""" +This submodule contains the base class ``WhittakerLikeSolver`` which is used to +efficiently solve the Penalized Least Squares problems that arise in the +Whittaker-Henderson smoothing algorithm and its variants, e.g., for baseline correction. + +Since the class would be too big if all the methods were implemented in a single file, +the implementation is split into the class itself and a utility module that contains +utility functions used by the class. + +""" + +# Authors: +# Niklas Zell + +### Imports ### + +from chemotools.utils._models import ( # noqa: F401 + WhittakerSmoothLambda, + WhittakerSmoothMethods, +) +from chemotools.utils._whittaker_base.main import WhittakerLikeSolver # noqa: F401 diff --git a/chemotools/utils/_whittaker_base/auto_lambda/__init__.py b/chemotools/utils/_whittaker_base/auto_lambda/__init__.py new file mode 100644 index 00000000..5232d113 --- /dev/null +++ b/chemotools/utils/_whittaker_base/auto_lambda/__init__.py @@ -0,0 +1,19 @@ +""" +This submodule contains the functions used for the automated fitting of the penalty +weight lambda within the ``WhittakerLikeSolver`` class that would have cluttered the +class implementation. + +""" + +### Imports ### + +from chemotools.utils._whittaker_base.auto_lambda.log_marginal_likelihood import ( # noqa: F401 + get_log_marginal_likelihood, + get_log_marginal_likelihood_constant_term, +) +from chemotools.utils._whittaker_base.auto_lambda.optimization import ( # noqa: F401 + get_optimized_lambda, +) +from chemotools.utils._whittaker_base.auto_lambda.shared import ( # noqa: F401 + _Factorization, +) diff --git a/chemotools/utils/_whittaker_base/auto_lambda/log_marginal_likelihood.py b/chemotools/utils/_whittaker_base/auto_lambda/log_marginal_likelihood.py new file mode 100644 index 00000000..3c698dd2 --- /dev/null +++ b/chemotools/utils/_whittaker_base/auto_lambda/log_marginal_likelihood.py @@ -0,0 +1,202 @@ +""" +This submodule contains the functions used for the automated fitting of the penalty +weight lambda within the ``WhittakerLikeSolver`` class based on the log marginal +likelihood that would have cluttered the class implementation. + +""" + +### Imports ### + +from typing import Union + +import numpy as np + +from chemotools.utils import _models +from chemotools.utils._banded_linalg import slogdet_lu_banded +from chemotools.utils._whittaker_base.auto_lambda.shared import ( + smooth_weighted_sum_of_squared_residuals, +) + +### Constants ### + +_LN_OF_TWO_PI: float = 1.8378770664093453 # ln(2 * pi) + +### Type Aliases ### + +# TODO: add QR factorization +_FactorizationForLogMarginalLikelihood = _models.BandedLUFactorization + +### Functions ### + + +def get_log_marginal_likelihood_constant_term( + differences: int, + penalty_matrix_log_pseudo_determinant: float, + weights: np.ndarray, + zero_weight_tol: float, +) -> float: + """ + Computes the constant term of the log marginal likelihood for the automatic fitting + of the penalty weight lambda, i.e., + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(pseudo_det(D.T @ D))`` + + or better + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(det(D @ D.T))`` + + with: + + - ``ln`` as the natural logarithm, + - ``pseudo_det(A)`` as the pseudo-determinant of the matrix ``A``, i.e., the product + of its non-zero eigenvalues, + - ``det(A)`` as the determinant of the matrix ``A``, i.e., the product of its + eigenvalues, + - ``W`` as the diagonal matrix with the weights on the main diagonal, + - ``D.T @ D`` as the squared forward finite differences matrix, + - ``D @ D.T`` as the flipped squared forward finite differences matrix, + - ``d`` as the difference order used for the smoothing, and + - ``n^`` as the number of data points with non-zero weights in the series to smooth. + + It should be noted that ``pseudo_det(D.T @ D)`` is replaced by ``det(D @ D.T)`` here + because the latter is not rank-deficient. + + """ + + # first, the constant terms of the log marginal likelihood are computed starting + # from the log pseudo-determinant of the weight matrix, i.e., the product of the + # non-zero elements of the weight vector + nonzero_w_flags = weights > weights.max() * zero_weight_tol + num_nonzero_w = nonzero_w_flags.sum() + log_pseudo_determinant_w = np.log(weights[nonzero_w_flags]).sum() + + # the constant term of the log marginal likelihood is computed + return ( + (num_nonzero_w - differences) * _LN_OF_TWO_PI + - log_pseudo_determinant_w + - penalty_matrix_log_pseudo_determinant + ) + + +def get_log_marginal_likelihood( + factorization: _FactorizationForLogMarginalLikelihood, + log_lam: float, + lam: float, + differences: int, + difference_kernel_flipped: np.ndarray, + rhs_b: np.ndarray, + rhs_b_smooth: np.ndarray, + weights: Union[float, np.ndarray], + w_plus_penalty_plus_num_samples_term: float, +) -> float: + """ + Computes the log marginal likelihood for the automatic fitting of the penalty + weight lambda. For the definitions used (and manipulated here), please refer to + the Notes section. + + Parameters + ---------- + factorization : BandedLUFactorization + The factorization of the matrix to solve the linear system of equations, + i.e., ``W + lambda * D.T @ D`` from the description above. + Currently, only partially pivoted banded LU decompositions can be used to + compute the log marginal likelihood. + log_lam : float + The natural logarithm of the penalty weight lambda used for the smoothing. + lam : float + The penalty weight lambda used for the smoothing, i.e., ``exp(log_lam)``. + differences : int + The order of the finite differences to use for the smoothing. + difference_kernel_flipped : ndarray of shape (differences + 1,) + The flipped forward finite differences kernel used for the smoothing. + b, b_smooth : ndarray of shape (m,) + The original series and its smoothed counterpart. + w : float or ndarray of shape (m,) + The weights to use for the smoothing. + w_plus_penalty_plus_num_samples_term : float + The last term of the log marginal likelihood that is constant since it + involves the weights, the penalty matrix, and the number of data points + which are all constant themselves (see the Notes for details). + + Notes + ----- + The log marginal likelihood is given by: + + ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - ln(pseudo_det(lambda * D.T @ D)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` + + or better + + ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - (n - d) * ln(lambda) - ln(det(D @ D.T)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` + + with: + + - ``wRSS`` as the weighted Sum of Squared Residuals between the original and the + smoothed series, + - ``PSS`` as the Penalty Sum of Squares which is given by the sum of the squared + elements of the ``d``-th order forward finite differences of the smoothed + series, + - ``lambda`` as the penalty weight used for the smoothing, + - ``d`` as the difference order used for the smoothing, + - ``ln`` as the natural logarithm, + - ``pseudo_det(A)`` as the pseudo-determinant of the matrix ``A``, i.e., the + product of its non-zero eigenvalues, + - ``det(A)`` as the determinant of the matrix ``A``, i.e., the product of its + eigenvalues, + - ``W`` as the diagonal matrix with the weights on the main diagonal, + - ``D.T @ D`` as the squared forward finite differences matrix, + - ``D @ D.T`` as the flipped squared forward finite differences matrix, + - ``n`` is the number of data points in the series to smooth, and + - ``n^`` is the number of data points with non-zero weights in the series to + smooth. + + It should be noted that ``pseudo_det(D.T @ D)`` is replaced by ``det(D @ D.T)`` + here because the latter is not rank-deficient. + + """ # noqa: E501 + + # first, the weighted Sum of Squared Residuals is computed ... + weighted_sum_of_squared_residuals = smooth_weighted_sum_of_squared_residuals( + rhs_b=rhs_b, + rhs_b_smooth=rhs_b_smooth, + weights=weights, + ) + # ... followed by the Penalty Sum of Squares which requires the squared forward + # finite differences of the smoothed series + # NOTE: ``np.convolve`` is used to compute the forward finite differences and + # since it flips the provided kernel, an already flipped kernel is used + sum_of_squared_penalties = ( + lam + * np.square( + np.convolve(rhs_b_smooth, difference_kernel_flipped, mode="valid") + ).sum() + ) + + # besides the determinant of the combined left hand side matrix has to be + # computed from its decomposition + lhs_logdet_sign, lhs_logabsdet = slogdet_lu_banded( + lub_factorization=factorization, + ) + + # if the sign of the determinant is positive, the log marginal likelihood is + # computed and returned + if lhs_logdet_sign > 0.0: + return -0.5 * ( + weighted_sum_of_squared_residuals + + sum_of_squared_penalties + - (rhs_b.size - differences) * log_lam + + lhs_logabsdet + + w_plus_penalty_plus_num_samples_term + ) + + # otherwise, if the determinant is negative, the system is extremely + # ill-conditioned and the log marginal likelihood cannot be computed + # NOTE: since it is very hard to trigger this exception, it is not covered by the + # tests + raise RuntimeError( # pragma: no cover + "\nThe determinant of the combined left hand side matrix " + "W + lambda * D.T @ D is negative, indicating that the system is extremely " + "ill-conditioned.\n" + "The log marginal likelihood cannot be computed.\n" + "Please consider reducing the number of data points to smooth by, e.g., " + "binning or lowering the difference order." + ) diff --git a/chemotools/utils/_whittaker_base/auto_lambda/optimization.py b/chemotools/utils/_whittaker_base/auto_lambda/optimization.py new file mode 100644 index 00000000..801cc0d4 --- /dev/null +++ b/chemotools/utils/_whittaker_base/auto_lambda/optimization.py @@ -0,0 +1,85 @@ +""" +This submodule contains the functions used for the optimization in the automated fitting +of the penalty weight lambda within the ``WhittakerLikeSolver`` class that would have +cluttered the class implementation. + +""" + +### Imports ### + +from math import ceil, exp +from typing import Callable, Tuple + +from scipy.optimize import minimize_scalar + +from chemotools.utils._models import WhittakerSmoothLambda + +### Constants ### + +_LN_OF_A_DECADE: float = 2.302585092994046 # ln(10) +_half_ln_of_a_decade: float = 0.5 * _LN_OF_A_DECADE +_X_ABS_LN_TOL: float = 0.0049 # ~0.5% when converted from log to real + +### Optimization Functions ### + + +def get_optimized_lambda( + fun: Callable[..., float], + lam: WhittakerSmoothLambda, + args: Tuple, +) -> float: + """ + This function optimises the penalty weight lambda with the brute force method. + + Since the number of optimisations carried out is so little, the function uses a + custom from-scratch-implementation of a brute force search to tackle the problem + directly without too much overhead. + This will also allow for a more direct control in case this is taken to a lower + level implementation in the future. + + """ + + # unless the search space spans less than 1 decade, i.e., ln(10) ~= 2.3, a grid + # search is carried out to shrink the search space for the final optimization; + # the grid is spanned with an integer number of steps of half a decade + log_lower_bound, log_upper_bound = lam.log_auto_bounds + bound_log_difference = log_upper_bound - log_lower_bound + if bound_log_difference > _LN_OF_A_DECADE: + target_best_so_far = float("inf") + num_steps = 1 + ceil(bound_log_difference / _half_ln_of_a_decade) + # NOTE: the following ensures that the upper bound is not exceeded + step_size = bound_log_difference / (num_steps - 1) + + # all the trial values are evaluated and the best one is stored + for trial in range(0, num_steps): + log_lam_current = log_lower_bound + trial * step_size + target_current = fun(log_lam_current, *args) + + if target_current < target_best_so_far: + log_lam_best_so_far = log_lam_current + target_best_so_far = target_current + + # then, the bounds for the final optimization are shrunk to plus/minus half + # a decade around the best trial value + # NOTE: the following ensures that the bounds are not violated + log_lower_bound = max( + log_lam_best_so_far - _half_ln_of_a_decade, + log_lower_bound, + ) + log_upper_bound = min( + log_lam_best_so_far + _half_ln_of_a_decade, + log_upper_bound, + ) + + # finally, a scalar optimization is performed + # NOTE: since the optimization is carried out over the log of lambda, the + # exponential of the result is returned + return exp( + minimize_scalar( + fun=fun, + bounds=(log_lower_bound, log_upper_bound), + args=args, + method="bounded", + options={"xatol": _X_ABS_LN_TOL}, + ).x + ) diff --git a/chemotools/utils/_whittaker_base/auto_lambda/shared.py b/chemotools/utils/_whittaker_base/auto_lambda/shared.py new file mode 100644 index 00000000..1760bab1 --- /dev/null +++ b/chemotools/utils/_whittaker_base/auto_lambda/shared.py @@ -0,0 +1,41 @@ +""" +This submodule contains the shared logics when it comes to the automated fitting of the +penalty weight lambda within the ``WhittakerLikeSolver`` class that would have cluttered +the class implementation. + +""" + +### Imports ### + +from typing import Union + +import numpy as np + +from chemotools.utils import _models + +### Type Aliases ### + +_Factorization = Union[ + _models.BandedLUFactorization, _models.BandedPentapyFactorization +] + +### Functions ### + + +def smooth_weighted_sum_of_squared_residuals( + rhs_b: np.ndarray, + rhs_b_smooth: np.ndarray, + weights: Union[float, np.ndarray], +) -> float: + """ + Computes the (weighted) Sum of Squared Residuals (w)RSS between the original and + the smoothed series. + + """ + + # Case 1: no weights are provided + if isinstance(weights, float): + return np.square(rhs_b - rhs_b_smooth).sum() + + # Case 2: weights are provided + return (weights * np.square(rhs_b - rhs_b_smooth)).sum() diff --git a/chemotools/utils/_whittaker_base/initialisation.py b/chemotools/utils/_whittaker_base/initialisation.py new file mode 100644 index 00000000..bcdf3406 --- /dev/null +++ b/chemotools/utils/_whittaker_base/initialisation.py @@ -0,0 +1,208 @@ +""" +This submodule contains the utility functions used at the initialisation of the +``WhittakerLikeSolver`` class that would have cluttered the class implementation. + +""" + +### Imports ### + +from typing import Any, Literal, Tuple, Type, Union + +import numpy as np + +from chemotools.utils import _models +from chemotools.utils._banded_linalg import ( + LAndUBandCounts, + convert_upper_chol_banded_to_lu_banded_storage, + lu_banded, + slogdet_lu_banded, +) +from chemotools.utils._finite_differences import ( + forward_finite_difference_kernel, + squared_forward_difference_matrix_banded, +) +from chemotools.utils._types import RealNumeric + +### Type Aliases ### + +_StrWhittakerSmoothMethods = Literal["fixed", "logml"] +_AllWhittakerSmoothMethods = Union[ + _models.WhittakerSmoothMethods, _StrWhittakerSmoothMethods +] +_WhittakerSmoothLambdaPlain = Tuple[ + RealNumeric, + RealNumeric, + _AllWhittakerSmoothMethods, +] +_LambdaSpecs = Union[ + RealNumeric, + _WhittakerSmoothLambdaPlain, + _models.WhittakerSmoothLambda, +] + +### Constants ### + +RealNumericTypes = (int, float) + +### Functions ### + + +def get_checked_lambda(lam: Any) -> _models.WhittakerSmoothLambda: + """ + Checks the penalty weights lambda and casts it to the respective dataclass used + inside the ``WhittakerLikeSolver`` class. + + """ + + # if lambda is already the correct dataclass, it can be returned directly since all + # the checks have already been performed + if isinstance(lam, _models.WhittakerSmoothLambda): + return lam + + # now, there are other cases to check + # Case 1: lambda is a single number + if isinstance(lam, RealNumericTypes): + return _models.WhittakerSmoothLambda( + bounds=lam, method=_models.WhittakerSmoothMethods.FIXED + ) + + # Case 2: lambda is a tuple + if isinstance(lam, tuple): + # if the tuple has the wrong length, an error is raised + if len(lam) != 3: + raise ValueError( + f"\nThe lambda parameter must be a tuple of three elements (lower " + f"bound, upper bound, method), but it has {len(lam)} elements " + f"instead." + ) + + # otherwise, the tuple is unpacked and the dataclass is created + return _models.WhittakerSmoothLambda( + bounds=(lam[0], lam[1]), + method=lam[2], + ) + + # Case 3: lambda is not a valid type + raise TypeError( + f"\nThe lambda parameter must be an integer, a float, a tuple of (lower bound, " + f"upper bound, method), or an instance of WhittakerSmoothLambda, but it is " + f"{type(lam)} instead." + ) + + +def get_squared_forward_finite_difference_matrix_banded( + num_data: int, + differences: int, + original_first: bool, + dtype: Type, +) -> Tuple[LAndUBandCounts, np.ndarray]: + """ + Returns the squared forward finite difference penalty matrix ``D.T @ D`` or its + "flipped" counterpart ``D @ D.T`` in the banded storage format used for LAPACK's + banded LU decomposition. + + """ + + # the squared forward finite difference matrix D.T @ D or D @ D.T is generated ... + # NOTE: the matrix is returned with integer entries because integer computations + # can be carried out at maximum precision; this has to be converted to + # double precision for the LU decomposition + penalty_matrix_banded = squared_forward_difference_matrix_banded( + num_data=num_data, + differences=differences, + original_first=original_first, + ).astype(dtype) + + # ... and cast to the banded storage format for LAPACK's LU decomposition + return convert_upper_chol_banded_to_lu_banded_storage(ab=penalty_matrix_banded) + + +def get_flipped_fw_diff_kernel( + differences: int, + dtype: Type, +) -> np.ndarray: + """ + Returns the flipped forward finite difference kernel for the specified difference + order. + + """ + + return np.flip(forward_finite_difference_kernel(differences=differences)).astype( + dtype + ) + + +def get_penalty_log_pseudo_determinant( + num_data: int, + differences: int, + dtype: Type, +) -> float: + """ + Computes the natural logarithm of the pseudo-determinant of the squared forward + finite differences matrix ``D.T @ D`` which is necessary for the calculation of + the log marginal likelihood for the automatic fitting of the penalty weight. + + Returns + ------- + log_pseudo_determinant : float + The natural logarithm of the pseudo-determinant of the penalty matrix. + + Raises + ------ + RuntimeError + If the pseudo-determinant of the penalty matrix is negative, thereby indicating + that the system is extremely ill-conditioned and the automatic fitting of the + penalty weight is not possible. + + Notes + ----- + Basically, this could be solved by evaluation of the eigenvalues of ``D.T @ D`` with + a banded eigensolver, but this is computationally expensive and not necessary (the + function is tested against this though). + The pseudo-determinant of ``D.T @ D`` is the determinant of ``D @ D.T`` because + ``D.T @ D`` is rank-deficient with ``differences`` zero eigenvalues while + ``D @ D.T`` has full rank. + Since both matrices share the same non-zero eigenvalues, the pseudo-determinant is + easily computed as the determinant of ``D @ D.T`` via a partially pivoted LU + decomposition. + + Throughout this function, the matrix ``D.T @ D`` is referred to as the "flipped + penalty matrix" even though it is not actually flipped. + + """ + + # the flipped penalty matrix D @ D.T is computed + flipped_l_and_u, flipped_penalty_matrix_banded = ( + get_squared_forward_finite_difference_matrix_banded( + num_data=num_data, + differences=differences, + original_first=True, + dtype=dtype, + ) + ) + + # the pseudo-determinant is computed from the partially pivoted LU decomposition + # of the flipped penalty matrix + log_pseudo_det_sign, log_pseudo_determinant = slogdet_lu_banded( + lub_factorization=lu_banded( + l_and_u=flipped_l_and_u, + ab=flipped_penalty_matrix_banded, + check_finite=False, + ), + ) + + # if the sign of the pseudo-determinant is positive, the log pseudo-determinant + # is returned + if log_pseudo_det_sign > 0.0: + return log_pseudo_determinant + + # otherwise, if is negative, the penalty matrix is extremely ill-conditioned and + # the automatic fitting of the penalty weight is not possible + raise RuntimeError( + f"\nThe pseudo-determinant of the penalty D.T @ D matrix is negative, " + f"indicating that the system is extremely ill-conditioned.\n" + f"Automatic fitting for {num_data} data points and difference order " + f"{differences} is not possible.\n" + f"Please consider reducing the number of data points to smooth by, e.g., " + f"binning or lowering the difference order." + ) diff --git a/chemotools/utils/_whittaker_base/main.py b/chemotools/utils/_whittaker_base/main.py new file mode 100644 index 00000000..373c8fa1 --- /dev/null +++ b/chemotools/utils/_whittaker_base/main.py @@ -0,0 +1,462 @@ +""" +This utility submodule provides the base class for the Whittaker-like smoothing +algorithm. It is used to solve linear systems of equations that involve banded +matrices as they occur in applications like the Whittaker-Henderson-smoothing or +derived methods like Asymmetric Least Squares (ALS) baseline correction. + +""" + +### Imports ### + +from math import exp +from typing import Optional, Union +from warnings import warn + +import numpy as np + +from chemotools._runtime import PENTAPY_AVAILABLE +from chemotools.utils import _models +from chemotools.utils._banded_linalg import LAndUBandCounts +from chemotools.utils._whittaker_base.auto_lambda import ( + _Factorization, + get_log_marginal_likelihood, + get_log_marginal_likelihood_constant_term, + get_optimized_lambda, +) +from chemotools.utils._whittaker_base.initialisation import ( + _LambdaSpecs, + get_checked_lambda, + get_flipped_fw_diff_kernel, + get_penalty_log_pseudo_determinant, + get_squared_forward_finite_difference_matrix_banded, +) +from chemotools.utils._whittaker_base.misc import get_weight_generator +from chemotools.utils._whittaker_base.solvers import solve_normal_equations + +### Class Implementation ### + + +class WhittakerLikeSolver: + """ + This class can be used to solve linear systems of equations that involve banded + matrices as they occur in applications like the Whittaker-Henderson-smoothing or + derived methods like Asymmetric Least Squares (ALS) baseline correction. + It support weights and tries to use the most efficient method available. + + Attributes + ---------- + num_data_ : int + The number of data points within the series to smooth. It is equivalent to + ``n_features_in_``, but it was renamed to be allow for definition after the + initialisation. + differences_ : int + The number of differences to use for the smoothing. If the aim is to obtain a + smooth estimate of the ``m``-th order derivative, this should be set to + at least ``m + 2``. + For higher orders, the systems to solve tend to get numerically instable, + especially when ``num_data_`` grows large and high values for ``lam_`` are used. + Values below 1 are not allowed. + _lam_internal_ : WhittakerSmoothLambda + The internal representation of the lambda parameter to use for the smoothing, + a.k.a. the penalty weight or smoothing parameter. + It is internally stored as an instance of the dataclass :class:`WhittakerSmoothLambda`. + _l_and_u_ : (int, int) + The number of sub- (first) and superdiagonals (second element) of the final + matrix to solve for smoothing. Both elements will equal ``differences_``. + _difference_kernel_flipped_ : ndarray of shape (0, ) or (differences + 1,) + The flipped kernel to use for the forward finite differences. It is only + required for the automatic fitting of the lambda parameter by maximizing the log + marginal likelihood, i.e., when ``lam_ == WhittakerSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. + Flipping is required due to NumPy's definition of convolution. + _penalty_matrix_banded_ : ndarray of shape (num_data - differences + 1, num_data - differences + 1) + The squared forward finite differences matrix ``D.T @ D`` stored in the banded + storage format used for LAPACK's banded LU decomposition. + _penalty_matrix_log_pseudo_determinant_ : float + The natural logarithm of the pseudo-determinant of the squared forward finite + differences matrix ``D.T @ D`` which is used for the automatic fitting of the + lambda parameter by maximizing the log marginal likelihood, i.e., when + ``lam_ == WhittakerSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. + If ``lam_`` is fixed, this is a NaN-value. + _pentapy_enabled_ : bool + Whether the Pentapy solver is enabled for the smoothing (``True``) or not + (``False``). + It can only be used if the number of differences is 2 and the lambda parameter + is fixed (and of course if ``pentapy`` is available). + __child_class_name : str + The name of the child class that inherits from this base class. It is used for + warning messages and debugging purposes. + __dtype : type, default=np.float64 + The data type to which the series to be smoothed will be converted to. To avoid + numerical issues, all series are converted to double precision. + __allow_pentapy : bool, default=True + Whether to enable the Pentapy solver if available. This is only used for + debugging and testing purposes. + __zero_weight_tol : float, default=1e-10 + If any of the weights drops below ``weights.max() * __zero_weight_tol``, the + weight is considered zero for the evaluation of the log marginal likelihood. + + """ # noqa: E501 + + __dtype: type = np.float64 + __allow_pentapy: bool = True + __zero_weight_tol: float = 1e-10 + + def __init__(self) -> None: # pragma: no cover + pass + + ### Initialization and Setup Methods ### + + def _setup_for_fit( + self, + num_data: int, + differences: int, + lam: _LambdaSpecs, + child_class_name: str, + ) -> None: + """ + Pre-computes everything that can be computed for the smoothing in general as + well as for fitting the lambda parameter itself. + + For the parameters, please refer to the documentation of the class. + + """ + + # the input arguments are stored and validated + self.num_data_: int = num_data + self.differences_: int = differences + self._lam_internal_: _models.WhittakerSmoothLambda = get_checked_lambda(lam=lam) + self.__child_class_name: str = child_class_name + + # if the difference order exceeds 2, a warning is issued because then the + # current implementation cannot guarantee numerical stability + if self.differences_ > 2: + warn( + f"\nWARNING: With the current implementation, the numerical stability " + f"of the smoothing cannot be guaranteed for difference orders higher " + f"than 2.\n" + f"Please refer to the documentation of the class " + f"'{self.__child_class_name}' for further information.", + UserWarning, + ) + + # the squared forward finite difference matrix D.T @ D is computed in band + # storage format for LAPACK's banded LU decomposition + self._l_and_u_: LAndUBandCounts + self._penalty_matrix_banded_: np.ndarray + self._l_and_u_, self._penalty_matrix_banded_ = ( + get_squared_forward_finite_difference_matrix_banded( + num_data=self.num_data_, + differences=self.differences_, + original_first=False, + dtype=self.__dtype, + ) + ) + + # if the penalty weight is fitted automatically by maximization of the + # log marginal likelihood, the natural logarithm of the pseudo-determinant of + # D.T @ D is pre-computed together with the forward finite difference kernel + self._difference_kernel_flipped_: np.ndarray = np.ndarray( + [], dtype=self.__dtype + ) + self._penalty_matrix_log_pseudo_determinant_: float = float("nan") + if self._lam_internal_.fit_auto and self._lam_internal_.method_used in { + _models.WhittakerSmoothMethods.LOGML, + }: + # NOTE: the kernel is also returned with integer entries because integer + # computations can be carried out at maximum precision + self._difference_kernel_flipped_ = get_flipped_fw_diff_kernel( + differences=self.differences_, + dtype=self.__dtype, + ) + self._penalty_matrix_log_pseudo_determinant_ = ( + get_penalty_log_pseudo_determinant( + num_data=self.num_data_, + differences=self.differences_, + dtype=self.__dtype, + ) + ) + + # finally, Pentapy is enabled if available, the number of differences is 2, + # and the lambda parameter is not fitted automatically + self._pentapy_enabled_: bool = ( + PENTAPY_AVAILABLE + and self.differences_ == 2 + and self.__allow_pentapy + and not self._lam_internal_.fit_auto + ) + + ### Solver Methods ### + + # TODO: implement solver that does not rely on normal equations + def _solve( + self, + lam: float, + rhs_b_weighted: np.ndarray, + weights: Union[float, np.ndarray], + ) -> tuple[np.ndarray, _models.BandedSolvers, _Factorization]: + """ + Internal wrapper for the solver methods to solve the linear system of equations + for the Whittaker-like smoother. + It will first attempt to solve the system via the normal equations via either + a direct pentadiagonal solve or an LU decomposition of the banded normal + equations matrix. This is less numerically stable because the condition number + of the normal equations matrix is the square of the condition number of the + original system, but on the other hand, it can be way faster. + If this fails, it will fall back to the more numerically stable QR + decomposition (to be implemented). + + """ # noqa: E501 + + return solve_normal_equations( + lam=lam, + differences=self.differences_, + l_and_u=self._l_and_u_, + penalty_matrix_banded=self._penalty_matrix_banded_, + rhs_b_weighted=rhs_b_weighted, + weights=weights, + pentapy_enabled=self._pentapy_enabled_, + ) + + ### Auxiliary Methods for automated fitting of the penalty weight ### + + def _marginal_likelihood_objective( + self, + log_lam: float, + rhs_b: np.ndarray, + weights: np.ndarray, + w_plus_penalty_plus_n_samples_term: float, + ) -> float: + """ + The objective function to minimize for the automatic fitting of the penalty + weight lambda by maximizing the log marginal likelihood. + For the definition of the log marginal likelihood, please refer to the + description of the method :meth:`_calc_log_marginal_likelihood`. + + """ + + # first, the linear system of equations is solved with the given penalty weight + # lambda + lam = exp(log_lam) + + # the solution of the linear system of equations is computed + b_smooth, _, factorization = self._solve( + lam=lam, + rhs_b_weighted=rhs_b * weights, + weights=weights, + ) + + # finally, the log marginal likelihood is computed and returned (negative since + # the objective function is minimized, but the log marginal likelihood is + # to be maximized) + return (-1.0) * get_log_marginal_likelihood( + factorization=factorization, # type: ignore + log_lam=log_lam, # type: ignore + lam=lam, + differences=self.differences_, + difference_kernel_flipped=self._difference_kernel_flipped_, + rhs_b=rhs_b, + rhs_b_smooth=b_smooth, + weights=weights, + w_plus_penalty_plus_num_samples_term=w_plus_penalty_plus_n_samples_term, + ) + + ### Solver management methods ### + + def _solve_single_b_fixed_lam( + self, + rhs_b: np.ndarray, + weights: Union[float, np.ndarray], + lam: Optional[float] = None, + ) -> tuple[np.ndarray, float]: + """ + Solves for the Whittaker-like smoother solution for a single series with a fixed + penalty weight lambda. + + """ + + # if no value was provided for the penalty weight lambda, the respective class + # attribute is used instead + lam = self._lam_internal_.fixed_lambda if lam is None else lam + + # the weights and the weighted series are computed depending on whether weights + # are provided or not + # Case 1: no weights are provided + # TODO: this case is not possible under the current implementations for the + # calls of any of the child classes because they either use weights or + # the most efficient way around going into this method in the first place; + # in the future this might change and thus, this case is kept for now, but + # ignored for coverage + if isinstance(weights, float): # pragma: no cover + return ( + self._solve( + lam=lam, + rhs_b_weighted=rhs_b, + weights=weights, + )[0], + lam, + ) + + # Case 2: weights are provided + return ( + self._solve( + lam=lam, + rhs_b_weighted=rhs_b * weights, + weights=weights, + )[0], + lam, + ) + + def _solve_single_b_auto_lam_logml( + self, + rhs_b: np.ndarray, + weights: Union[float, np.ndarray], + ) -> tuple[np.ndarray, float]: + """ + Solves for the Whittaker-like smoother solution for a single series with an + automatically fitted penalty weight lambda by maximizing the log marginal + likelihood. + + """ + + # if the weights are not provided, the log marginal likelihood cannot be + # computed - at least not in a meaningful way + if isinstance(weights, (float, int)): + raise ValueError( + "\nAutomatic fitting of the penalty weight lambda by maximizing the " + "log marginal likelihood is only possible if weights are provided.\n" + "Please provide weights for the series to smooth." + ) + + # the term that is constant for the log marginal likelihood is computed + w_plus_num_samples_term = get_log_marginal_likelihood_constant_term( + differences=self.differences_, + penalty_matrix_log_pseudo_determinant=self._penalty_matrix_log_pseudo_determinant_, + weights=weights, + zero_weight_tol=self.__zero_weight_tol, + ) + + # the optimization of the log marginal likelihood is carried out + opt_lambda = get_optimized_lambda( + fun=self._marginal_likelihood_objective, + lam=self._lam_internal_, + args=(rhs_b, weights, w_plus_num_samples_term), + ) + + # the optimal penalty weight lambda is returned together with the smoothed + # series + return self._solve_single_b_fixed_lam( + rhs_b=rhs_b, + weights=weights, + lam=opt_lambda, + ) + + def _solve_multiple_b( + self, + X: np.ndarray, + weights: Optional[np.ndarray], + ) -> tuple[np.ndarray, np.ndarray]: + """ + Solves for the Whittaker-like smoother solution for multiple series when the + lambda parameter is fixed and the same weights are applied to all series. + It leverages the ability of LAPACK (not ``pentapy``) to solve multiple linear + systems of equations at once from the same factorization. + + For the parameters, please refer to the documentation of ``_solve``. + + """ + + # then, the solution of the linear system of equations is computed for the + # transposed series matrix (expected right-hand side format for the solvers) + # Case 1: no weights are provided + if weights is None: + X_smooth, _, _ = self._solve( + lam=self._lam_internal_.fixed_lambda, + rhs_b_weighted=X.transpose(), + weights=1.0, + ) + + # Case 2: weights are provided + else: + X_smooth, _, _ = self._solve( + lam=self._lam_internal_.fixed_lambda, + rhs_b_weighted=(X * weights).transpose(), + weights=weights[0, ::], + ) + + return ( + X_smooth.transpose(), + np.full( + shape=(X.shape[0],), + fill_value=self._lam_internal_.fixed_lambda, + ), + ) + + ### Main Solver Entry Point ### + + def _whittaker_solve( + self, + X: np.ndarray, + *, + weights: Optional[np.ndarray] = None, + use_same_w_for_all: bool = False, + ) -> tuple[np.ndarray, np.ndarray]: + """ + Solves for the Whittaker-like smoother solution for Arrays that are stored in + 2D format, i.e., each series is stored as a row. + Internally it chooses the most appropriate method and solver depending on the + data dimensionality, the weights, and the system's available packages + (``pentapy``). + + Parameters + ---------- + X : ndarray of shape (m, n) + The series to be smoothed stored as individual rows. + weights : ndarray of shape(1, n) or shape(m, n) or None + The weights to be applied for smoothing. If only a single row is provided + and ``use_same_w_for_all`` is ``True``, the same weights can be applied + for all series in ``X``, which enhances the smoothing a lot for fixed + smoothing parameters ``lam``. + If ``None``, no weights are applied and each datapoint is assumed to have + equal importance. This allows for ``use_same_w_for_all`` to be ``True`` + as well. + use_same_w_for_all + Whether to use the same weights for all series in ``X``. This is only + possible if ``w`` is a single row or ``None``. + + Returns + ------- + X_smooth : ndarray of shape(m, n) + The smoothed series stored as individual rows. + lam : np.ndarray of shape(m, ) + The lambda parameter used for the smoothing of each series. If ``lam`` was + fixed, this is a vector of length ``m`` with the same value for each series. + + """ # noqa: E501 + + # if multiple x with the same weights are to be solved for fixed lambda, this + # can be done more efficiently by leveraging LAPACK'S (not pentapy's) ability to + # perform multiple solves from the same inversion at once + if use_same_w_for_all and not self._lam_internal_.fit_auto: + return self._solve_multiple_b(X=X, weights=weights) + + # otherwise, the solution of the linear system of equations is computed for + # each series + # first, the smoothing method is specified depending on whether the penalty + # weight lambda is fitted automatically or not + smooth_method_assignment = { + _models.WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, + _models.WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_logml, + } + smooth_method = smooth_method_assignment[self._lam_internal_.method_used] + + # then, the solution is computed for each series by means of a loop + X_smooth = np.empty_like(X) + lam = np.empty(shape=(X.shape[0],)) + w_gen = get_weight_generator(weights=weights, num_series=X.shape[0]) + for iter_i, (x_vect, wght) in enumerate(zip(X, w_gen)): + X_smooth[iter_i], lam[iter_i] = smooth_method( + rhs_b=x_vect, + weights=wght, + ) + + return X_smooth, lam diff --git a/chemotools/utils/_whittaker_base/misc.py b/chemotools/utils/_whittaker_base/misc.py new file mode 100644 index 00000000..6a61d2ff --- /dev/null +++ b/chemotools/utils/_whittaker_base/misc.py @@ -0,0 +1,48 @@ +""" +This submodule contains miscellaneous functions used by ``WhittakerLikeSolver`` class +that would have cluttered the class implementation. + +""" + +### Imports ### + +from typing import Any, Generator, Union + +import numpy as np + +### Functions ### + + +def get_weight_generator( + weights: Any, + num_series: int, +) -> Generator[Union[float, np.ndarray], None, None]: + """ + Generates a generator that yields the weights for each series in a series matrix + ``X``. + + """ + + # if the weights are neither None nor a 2D-Array, an error is raised + if not (weights is None or isinstance(weights, np.ndarray)): + raise TypeError( + f"The weights must either be None or a NumPy-2D-Array, but they are of " + f"type '{type(weights)}'." + ) + + # Case 1: No weights + if weights is None: + for _ in range(num_series): + yield 1.0 + + # Case 2: 2D weights + elif weights.ndim == 2: + for idx in range(0, num_series): + yield weights[idx] + + # Case 3: Invalid weights + elif weights.ndim != 2: + raise ValueError( + f"If provided as an Array, the weights must be a 2D-Array, but they are " + f"{weights.ndim}-dimensional with shape {weights.shape}." + ) diff --git a/chemotools/utils/_whittaker_base/solvers.py b/chemotools/utils/_whittaker_base/solvers.py new file mode 100644 index 00000000..5a4bfe1c --- /dev/null +++ b/chemotools/utils/_whittaker_base/solvers.py @@ -0,0 +1,228 @@ +""" +This submodule contains the solver functions used by the ``WhittakerLikeSolver`` class +that would have cluttered the class implementation. + +""" + +### Imports ### + + +from typing import Union + +import numpy as np + +from chemotools._runtime import PENTAPY_AVAILABLE +from chemotools.utils._banded_linalg import LAndUBandCounts, lu_banded, lu_solve_banded +from chemotools.utils._models import ( + BandedLUFactorization, + BandedPentapyFactorization, + BandedSolvers, +) + +if PENTAPY_AVAILABLE: + import pentapy as pp + +### Type Aliases ### + +_Factorization = Union[BandedLUFactorization, BandedPentapyFactorization] + +### Functions ### + + +def solve_pentapy( + lhs_a_banded: np.ndarray, + rhs_b_weighted: np.ndarray, +) -> np.ndarray: + """ + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with the + ``pentapy`` package. This is the same as solving the linear system ``A @ x = b`` + where ``A = W + lam * D.T @ D`` and ``b = W @ b``. + + Notes + ----- + Pentapy does not (maybe yet) allow for 2D right-hand side matrices, so the + solution is computed for each column of ``bw`` separately. + + """ + + # for 1-dimensional right-hand side vectors, the solution is computed directly + if rhs_b_weighted.ndim == 1: + return pp.solve( + mat=lhs_a_banded, + rhs=rhs_b_weighted, + is_flat=True, + index_row_wise=False, + solver=1, + ) + + # for 2-dimensional right-hand side matrices, the solution is computed for each + # column separately + else: + # NOTE: the solutions are first written into the rows of the solution matrix + # because row-access is more efficient for C-contiguous arrays; + # afterwards, the solution matrix is transposed + solution_matrix = np.empty( + shape=(rhs_b_weighted.shape[1], rhs_b_weighted.shape[0]) + ) + for iter_j in range(0, rhs_b_weighted.shape[1]): + solution_matrix[iter_j, ::] = pp.solve( + mat=lhs_a_banded, + rhs=rhs_b_weighted[::, iter_j], + is_flat=True, + index_row_wise=False, + solver=1, + ) + + return solution_matrix.transpose() + + +def solve_ppivoted_lu( + l_and_u: LAndUBandCounts, + lhs_a_banded: np.ndarray, + rhs_b_weighted: np.ndarray, +) -> tuple[np.ndarray, BandedLUFactorization]: + """ + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with a + partially pivoted LU decomposition. This is the same as solving the linear system + ``A @ x = b`` where ``A = W + lam * D.T @ D`` and ``b = W @ b``. + + If the LU decomposition fails, a ``LinAlgError`` is raised which is fatal since + the next level of escalation would be using a QR-decomposition which is not + implemented (yet). + + """ + + lub_factorization = lu_banded( + l_and_u=l_and_u, + ab=lhs_a_banded, + check_finite=False, + ) + return ( + lu_solve_banded( + lub_factorization=lub_factorization, + b=rhs_b_weighted, + check_finite=False, + overwrite_b=True, + ), + lub_factorization, + ) + + +def solve_normal_equations( + lam: float, + differences: int, + l_and_u: LAndUBandCounts, + penalty_matrix_banded: np.ndarray, + rhs_b_weighted: np.ndarray, + weights: Union[float, np.ndarray], + pentapy_enabled: bool, +) -> tuple[np.ndarray, BandedSolvers, _Factorization]: + """ + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` where + ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and ``D`` is + the finite difference matrix of order ``differences``. ``lam`` represents the + penalty weight for the smoothing. + For details on why the system is not formulated in a more efficient way, please + refer to the Notes section. + + Parameters + ---------- + lam : float + The penalty weight lambda to use for the smoothing. + differences : int + The order of the finite differences to use for the smoothing. + l_and_u : LAndUBandCounts + The number of sub- and super-diagonals of ``penalty_mat_banded``. + penalty_matrix_banded : ndarray of shape (2 * differences + 1, m) + The penalty matrix ``D.T @ D`` in the banded storage format used for LAPACK's + banded LU decomposition. + b_weighted : ndarray of shape (m,) or (m, n) + The weighted right-hand side vector or matrix of the linear system of equations + given by ``W @ b``. + w : float or ndarray of shape (m,) + The weights to use for the linear system of equations given in terms of the main + diagonal of the weight matrix ``W``. + It can either be a vector of weights for each data point or a single scalar - + namely ``1.0`` - if no weights are provided. + pentapy_enabled : bool + Determines whether the ``pentapy`` solver is enabled (``True``) or not + (``False``). + + Returns + ------- + x : np.ndarray of shape (m,) + The solution vector of the linear system of equations. + decomposition_type : BandedSolveDecompositions + The type of decomposition used to solve the linear system of equations. + decomposition : BandedLUFactorization or BandedPentapyFactorization + The decomposition used to solve the linear system of equations which is stored + as a class instance specifying everything required to solve the system with + the ``decomposition_type`` used. + + Raises + ------ + RuntimeError + If all available solvers failed to solve the linear system of equations which + indicates a highly ill-conditioned system. + + Notes + ----- + It might seem more efficient to solve the linear system ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` + because this only requires a multiplication of ``m`` weights with the reciprocal of + the penalty weight whereas the multiplication with ``D.T @ D`` requires roughly + ``m * (1 + 2 * differences)`` multiplications with ``m`` as the number of data + points and ``differences`` as the difference order. On top of that, ``m * differences`` + multiplications - so roughly 50% - would be redundant given that the penalty + ``D.T @ D`` matrix is symmetric. + However, NumPy's scalar multiplication is so highly optimized that the + multiplication with ``D.T @ D`` without considering symmetry is almost as fast as + the multiplication with the diagonal matrix ``W``, especially when compared to the + computational load of the banded solvers. + + """ # noqa: E501 + + # the banded storage format for the LAPACK LU decomposition is computed by + # scaling the penalty matrix with the penalty weight lambda and then adding the + # diagonal matrix with the weights + lhs_a_banded = lam * penalty_matrix_banded + lhs_a_banded[differences, ::] += weights + + # the linear system of equations is solved with the most efficient method + # Case 1: Pentapy can be used + if pentapy_enabled: + x = solve_pentapy( + lhs_a_banded=lhs_a_banded, + rhs_b_weighted=rhs_b_weighted, + ) + if np.isfinite(x).all(): + return ( + x, + BandedSolvers.PENTAPY, + BandedPentapyFactorization(), + ) + + # Case 2: LU decomposition (final fallback for pentapy) + try: + x, lub_factorization = solve_ppivoted_lu( + l_and_u=l_and_u, + lhs_a_banded=lhs_a_banded, + rhs_b_weighted=rhs_b_weighted, + ) + return ( + x, + BandedSolvers.PIVOTED_LU, + lub_factorization, + ) + + except np.linalg.LinAlgError: + available_solvers = f"{BandedSolvers.PIVOTED_LU}" + if pentapy_enabled: + available_solvers = f"{BandedSolvers.PENTAPY}, {available_solvers}" + + raise RuntimeError( + f"\nAll available solvers ({available_solvers}) failed to solve the " + f"linear system of equations which indicates a highly ill-conditioned " + f"system.\n" + f"Please consider reducing the number of data points to smooth by, " + f"e.g., binning or lowering the difference order." + ) diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index 88b28293..1854b197 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -1,9 +1,16 @@ +from typing import Literal, Optional, Tuple, Type, Union + +import numpy as np from sklearn.utils.validation import check_array -def check_input(X, y=None): +def check_input( + X, + y=None, + dtype: Union[Type, Literal["numeric"], None] = "numeric", +): # Check that X is a 2D array and has only finite values - X = check_array(X, ensure_2d=True, force_all_finite=True) + X = check_array(X, ensure_2d=True, force_all_finite=True, dtype=dtype) # Check that y is None or a 1D array of the same length as X if y is not None: @@ -12,3 +19,56 @@ def check_input(X, y=None): if len(y) != X.shape[0]: raise ValueError("y must have the same number of samples as X") return X + + +def check_weights( + weights: Optional[np.ndarray], + n_samples: int, + n_features: int, +) -> Tuple[Optional[np.ndarray], bool]: + # if the weights are None, None is returned and a flag that the same weights should + # be applied for all samples + if weights is None: + return None, True + + # if the weights are a 1D array, they are reshaped to a 2D array with one row + if weights.ndim == 1: + weights_checked = weights.reshape((1, -1)) + else: + weights_checked = weights + + # now, the need to be checked for having the right shape + weights_checked = check_array( + weights_checked, + ensure_2d=True, + force_all_finite=True, + ) + + # afterwards, they are checked for having the right shape + if weights_checked.shape[0] not in {1, n_samples}: + raise ValueError( + f"Weights must have either 1 or {n_samples} rows, but they have " + f"{weights_checked.shape[0]} rows." + ) + if weights_checked.shape[1] != n_features: + raise ValueError( + f"Weights must have {n_features} columns, but they have " + f"{weights_checked.shape[1]} columns." + ) + + # finally, it needs to be checked that the weights are all non-negative ... + if (weights_checked < 0.0).any(): + raise ValueError( + f"Weights may not be negative, but {(weights_checked < 0.0).sum(axis=1)} " + f"negative weights were found (one entry per vector)." + ) + # ... and also at least one of them is positive + if (weights_checked.sum(axis=1) <= 0.0).any(): + raise ValueError( + f"At least one weights needs to be > 0, but all weights were 0.0 for " + f"vector index {np.where(weights_checked.sum(axis=1) <= 0.0)[0]}." + ) + + # the weights are returned together with a flag whether to apply the same weights + # for all samples or not + return weights_checked, weights_checked.shape[0] == 1 diff --git a/docs/smooth.md b/docs/smooth.md index f48ac6da..4bdab865 100644 --- a/docs/smooth.md +++ b/docs/smooth.md @@ -44,7 +44,7 @@ It is an automated smoothing algorithm that uses a penalized least squares appro | Argument | Description | Type | Default | | --- | --- | --- | --- | -| ```lam``` | smoothing factor. | ```float``` | ```1e2``` | +| ```lam``` | smoothing factor. | ```float or tuple[float, float, str] or WhittakerSmoothLambda``` | ```1e2``` | | ```differences``` | The number of differences to use. | ```int``` | ```1``` | ### __Usage examples__: diff --git a/pyproject.toml b/pyproject.toml index b5a3c468..c0c5a5c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,4 +3,49 @@ requires = [ "setuptools>=42", "wheel" ] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" + +[tool.coverage.run] +omit = [ + "chemotools/_runtime/*", + ] + +[tool.ruff] +# Enable pycodestyle (`E`), Pyflakes (`F`) checks. +select = ["E", "F"] +ignore = [] + +# Allow autofix for all enabled rules (when `--fix`) is provided. +# "A", "B", "C", "D", "E", "F", "..." +fixable = [] +unfixable = [] + +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv*", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + ".vscode", +] + +line-length = 88 +target-version = "py311" + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..2bed0f3a --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --doctest-modules \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..85a1b2d2 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,8 @@ +black +isort +line_profiler +matplotlib +pentapy +pytest +pytest-cov +pytest-xdist[psutil] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 77e39e5b..1b631ca6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ numpy>=1.24.1 pandas>=1.3.4 polars>=0.20.0 pyarrow>=15.0.0 -scikit-learn>=1.4.0 +scikit-learn>=1.4.0 diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 00000000..67343e7d --- /dev/null +++ b/tests/README.md @@ -0,0 +1,243 @@ +# ✅❌ Good practices for testing with ``pytest`` 🤔 + +## 1) ✍️ Type hint your tests +While this might seem tedious because tests are not the actual code after all, this helps catching errors early when writing tests, especially those one did not think of before. A good type checker will help here by underlining type mismatches in red. + +## 2) 🪛 Test your test utility functions +Some tests require utility functions to be written. Since a utility function is also code that can have bugs, it is important to test it as well.
+They can either be tested with a dedicated test + +```python +def utility_function(): + return 2.0 + +def test_utility_function(): + assert utility_function() == 2.0 +``` + +or via a doctest that will be included in ``chemotools``' test suite by ``pytest``. + +```python +def utility_function(): + """ + Doctests + -------- + >>> utility_function() + 2.0 + """ + + return 2.0 +``` + +## 3) 🦾🤖 Don't write the same test twice - use parametrization +If you have a test that is repeated with different inputs, use parametrisation to avoid writing the same test twice.
+This will make your test suite more readable and maintainable. With the ``pytest.mark.parametrize`` decorator, you can run the same test with different inputs. In the following example, the test will run 5 times with the inputs 1, 4, 9, 16, and 25. + +```python +import pytest + +@pytest.mark.parametrize("input", [1, 4, 9, 16, 25]) +def test_is_square(input: int) -> None: + assert input ** 0.5 == int(input ** 0.5) +``` + +In case you want to test multiple input combinations, you can use multiple wrappings of `@pytest.mark.parametrize`. The next test will run 5 x 5 = 25 combinations of inputs. + +```python +import pytest + +@pytest.mark.parametrize("input_2", [1, 4, 9, 16, 25]) +@pytest.mark.parametrize("input_1", [1, 4, 9, 16, 25]) +def test_sum_is_positive( + input_1: int, + input_2: int +) -> None: + assert input_1 + input_2 > 0 +``` + +If you need multiple wrappings, but some combinations are not valid, you can use `pytest.skip` to skip the test. The following will run 5 x 5 = 25 tests, but will skip the test when both inputs are 1. + +```python +import pytest + +@pytest.mark.parametrize("input_2", [1, 4, 9, 16, 25]) +@pytest.mark.parametrize("input_1", [1, 4, 9, 16, 25]) +def test_sum_is_positive( + input_1: int, + input_2: int +) -> None: + if input_1 == 1 and input_2 == 1: + pytest.skip("This test is not valid") + + assert input_1 + input_2 > 0 +``` + +Finally, in case your test runs on multiple specific combinations of inputs and expected outputs, you can parametrize the full combination in a ``pytest.mark.parametrize`` decorator. + +```python +import pytest + + +@pytest.mark.parametrize( + "input_1, input_2, expected", + [ + (1, 2, 3), + (2, 3, 5), + (3, 4, 7), + ], +) +def test_sum_is_correct( + input_1: int, + input_2: int, + expected: int, +) -> None: + assert input_1 + input_2 == expected +``` + +## 4) 💣❌ Test your error handling thoroughly based on error messages +Functions that have error handling which is not properly covered by the tests should be classified as not tested at all because all kind of unexpected behavior can occur.
+If your function raises an error, you should test that it raises the correct error. You can use the ``pytest.raises`` context manager to check that the function raises the expected error. + +```python +import pytest + +def divide(a: int, b: int) -> float: + return a / b + +def test_divide_by_zero_raises_error() -> None: + with pytest.raises(ZeroDivisionError): + divide(1, 0) +``` + +Note that it's crucial to put a ``return`` statement at the end of an error test to avoid that everything that comes after the test is also executed. + +```python +from typing import List, Union + +import numpy as np +import pytest + + +def function_for_an_array(input: Union[List[int], np.ndarray]) -> np.ndarray: + if isinstance(input, list): + raise TypeError("Input must be a numpy array") + + # Do something with the input + return input + +@pytest.mark.parametrize( + "input", + [ + [1, 2, 3], + np.array([1, 2, 3]), + ], +) +def test_function_for_an_array( + input: Union[List[int], np.ndarray], +) -> None: + if isinstance(input, list): + with pytest.raises(TypeError): + function_for_an_array(input) + + return # without this, the following code would still be executed and fail + + result = function_for_an_array(np.array(input)) + assert result is not None + +``` + +However, this is not reliable enough for functions that can raise the same exception type in different contexts. In this case, you can use the ``match`` argument of ``pytest.raises`` to check the error message. For the next function to test, a ``ValueError`` will be encountered for both ``a`` and ``b`` being negative. + +```python +import pytest + +def sum_non_negative_values(a: int, b: int) -> int: + if a < 0: + raise ValueError("a must be non-negative") + + if b < 0: + raise ValueError("b must be non-negative") + + return a + b +``` + +Now, the following test will pass but ``b`` being negative is never tested. + +```python +@pytest.mark.parametrize( + "a, b, expected", + [ + (-1, 1, ValueError()), + (-1, -1, ValueError()), + ], +) +def test_sum_non_negative_values_raises_error( + a: int, + b: int, + expected: Exception, +) -> None: + with pytest.raises(type(expected)): + sum_non_negative_values(a, b) +``` + +``b`` being negative is never hit because ``a`` is negative in both tests. Yet, the ``ValueError`` is still properly raised. Such a situation can be avoided by using the ``match`` argument of ``pytest.raises`` to catch and check the error message. + +```python +@pytest.mark.parametrize( + "a, b, expected", + [ + (-1, 1, ValueError("a must be non-negative")), + (-1, -1, ValueError("b must be non-negative")), # this test will fail + ], +) +def test_sum_non_negative_values_raises_error( + a: int, + b: int, + expected: Exception, +) -> None: + error_catch_phrase = str(expected) + with pytest.raises(type(expected), match=error_catch_phrase): + sum_non_negative_values(a, b) +``` + +Due to the enhanced test, the test will now fail with the following output: + +```bash + with pytest.raises(ValueError, match=expected): +> sum_non_negative_values(a, b) +E AssertionError: Regex pattern did not match. +E Regex: 'b must be non-negative' +E Input: 'a must be non-negative' +``` + +Of course, the same principles apply for warnings that can be caught with ``pytest.warns``. + +```python +import pytest + +def function_that_warns() -> None: + import warnings + warnings.warn("This is a warning", UserWarning) + +def test_function_that_warns() -> None: + with pytest.warns(UserWarning, match="This is a warning"): + function_that_warns() + + return +``` + +## 5) 🧪🧫 Test edge cases +Edge cases are the limits of the input space. They are often the source of bugs in code.
+Let's say your function starts to misbehave when the input is 0. You should write a test +for that case. + +```python +import pytest + +def divide(a: int, b: int) -> float: + return a / b + +def test_divide_by_zero() -> None: + with pytest.raises(ZeroDivisionError): + divide(1, 0) +``` \ No newline at end of file diff --git a/tests/fixtures.py b/tests/fixtures.py index 54f59a25..757da973 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,89 +1,224 @@ -import numpy as np +### Imports ### + import os +from typing import List + +import numpy as np import pytest +from tests.tests_for_utils.utils_models import ( + NoiseEstimationReference, + RefDifferenceKernel, +) + +### Constants ### test_directory = os.path.dirname(os.path.abspath(__file__)) path_to_resources = os.path.join(test_directory, "resources") +### Fixtures ### + @pytest.fixture -def spectrum() -> np.ndarray: +def spectrum() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "spectrum.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "spectrum.csv"), + delimiter=",", + ) ] @pytest.fixture -def spectrum_arpls() -> np.ndarray: +def spectrum_arpls() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "spectrum_arpls.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "spectrum_arpls.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_airpls() -> np.ndarray: +def reference_airpls() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_airpls.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_airpls.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_arpls() -> np.ndarray: +def reference_arpls() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_arpls.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_arpls.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_msc_mean() -> np.ndarray: +def reference_msc_mean() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_msc_mean.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_msc_mean.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_msc_median() -> np.ndarray: +def reference_msc_median() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_msc_median.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_msc_median.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_sg_15_2() -> np.ndarray: +def reference_sg_15_2() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_sg_15_2.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_sg_15_2.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_snv() -> np.ndarray: +def reference_snv() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_snv.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_snv.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_whitakker() -> np.ndarray: +def reference_whittaker() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_whitakker.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_whittaker.csv"), + delimiter=",", + ) ] + + +@pytest.fixture +def spectrum_whittaker_auto_lambda() -> np.ndarray: + spectral_data = np.loadtxt( + os.path.join(path_to_resources, "spectrum_whittaker_auto_lambda.csv"), + delimiter=",", + skiprows=1, + ) + + return spectral_data[::, 2] + + +@pytest.fixture +def noise_level_whittaker_auto_lambda() -> np.ndarray: + spectral_data = np.loadtxt( + os.path.join(path_to_resources, "spectrum_whittaker_auto_lambda.csv"), + delimiter=",", + skiprows=1, + ) + + return spectral_data[::, 3] + + +@pytest.fixture +def reference_finite_differences(kind: str) -> List[RefDifferenceKernel]: + fpath = os.path.join( + path_to_resources, + f"./finite_differences/reference_{kind}_differences.csv", + ) + fin_diff_table = np.genfromtxt( + fpath, + skip_header=2, + delimiter=",", + filling_values=np.nan, + dtype=np.float64, + ) + fin_diff_ordered_coeffs = [] + for row_idx in range(0, fin_diff_table.shape[0]): + # the first column is the difference order, the second column is the accuracy, + # and the remaining columns are the coefficients where the trailing NaNs are + # removed + row = fin_diff_table[row_idx, ::] + fin_diff_ordered_coeffs.append( + RefDifferenceKernel( + differences=round(row[0]), + accuracy=round(row[1]), + kernel=row[2:][~np.isnan(row[2:])], + ) + ) + + return fin_diff_ordered_coeffs + + +@pytest.fixture +def noise_level_estimation_signal() -> np.ndarray: + fpath = os.path.join( + path_to_resources, + "noise_level_estimation/noise_estimation_refs.csv", + ) + data = np.genfromtxt( + fpath, + delimiter=",", + skip_header=1, + filling_values=np.nan, + dtype=np.float64, + ) + + # the original signal is indicated by the first 4 columns with metadata being NaN + metadata = data[::, 0:4] + signal_idx = np.where(np.isnan(metadata).all(axis=1))[0][0] + + return data[signal_idx, 4:] + + +@pytest.fixture +def noise_level_estimation_references() -> List[NoiseEstimationReference]: + fpath = os.path.join( + path_to_resources, + "noise_level_estimation/noise_estimation_refs.csv", + ) + data = np.genfromtxt( + fpath, + delimiter=",", + skip_header=1, + filling_values=np.nan, + dtype=np.float64, + ) + + # the original signal is indicated by the first 4 columns with metadata being NaN + # it has to be excluded from the references + metadata = data[::, 0:4] + signal_idx = np.where(np.isnan(metadata).all(axis=1))[0][0] + data = np.delete(data, obj=signal_idx, axis=0) + + # then, all the references are extracted + noise_level_refs = [] + for row_idx in range(0, data.shape[0]): + row = data[row_idx, ::] + # if the window size is 0, it is set to None because this indicates that the + # global noise level is to be estimated rather than a local one + window_size = int(row[0]) + window_size = window_size if window_size > 0 else None + noise_level_refs.append( + NoiseEstimationReference( + window_size=window_size, + min_noise_level=row[1], + differences=round(row[2]), + accuracy=round(row[3]), + noise_level=row[4:], + ) + ) + + return noise_level_refs diff --git a/tests/resources/finite_differences/reference_central_differences.csv b/tests/resources/finite_differences/reference_central_differences.csv new file mode 100644 index 00000000..ca0e00a6 --- /dev/null +++ b/tests/resources/finite_differences/reference_central_differences.csv @@ -0,0 +1,22 @@ +From https://en.wikipedia.org/wiki/Finite_difference_coefficient#Central_finite_difference,,,,,,,,,,,, +Difference Order,Accuracy,,,,,,,,,,, +1,2,-0.5,0,0.5,,,,,,,, +1,4,0.0833333333333333,-0.666666666666667,0,0.666666666666667,-0.0833333333333333,,,,,, +1,6,-0.0166666666666667,0.15,-0.75,0,0.75,-0.15,0.0166666666666667,,,, +1,8,0.00357142857142857,-0.0380952380952381,0.2,-0.8,0,0.8,-0.2,0.0380952380952381,-0.00357142857142857,, +2,2,1,-2,1,,,,,,,, +2,4,-0.0833333333333333,1.33333333333333,-2.5,1.33333333333333,-0.0833333333333333,,,,,, +2,6,0.0111111111111111,-0.15,1.5,-2.72222222222222,1.5,-0.15,0.0111111111111111,,,, +2,8,-0.00178571428571429,0.0253968253968254,-0.2,1.6,-2.84722222222222,1.6,-0.2,0.0253968253968254,-0.00178571428571429,, +3,2,-0.5,1,0,-1,0.5,,,,,, +3,4,0.125,-1,1.625,0,-1.625,1,-0.125,,,, +3,6,-0.0291666666666667,0.3,-1.40833333333333,2.03333333333333,0,-2.03333333333333,1.40833333333333,-0.3,0.0291666666666667,, +4,2,1,-4,6,-4,1,,,,,, +4,4,-0.166666666666667,2,-6.5,9.33333333333333,-6.5,2,-0.166666666666667,,,, +4,6,0.0291666666666667,-0.4,2.81666666666667,-8.13333333333333,11.375,-8.13333333333333,2.81666666666667,-0.4,0.0291666666666667,, +5,2,-0.5,2,-2.5,0,2.5,-2,0.5,,,, +5,4,0.166666666666667,-1.5,4.33333333333333,-4.83333333333333,0,4.83333333333333,-4.33333333333333,1.5,-0.166666666666667,, +5,6,-0.0451388888888889,0.527777777777778,-2.71875,6.5,-6.72916666666667,0,6.72916666666667,-6.5,2.71875,-0.527777777777778,0.0451388888888889 +6,2,1,-6,15,-20,15,-6,1,,,, +6,4,-0.25,3,-13,29,-37.5,29,-13,3,-0.25,, +6,6,0.0541666666666667,-0.791666666666667,5.4375,-19.5,40.375,-51.15,40.375,-19.5,5.4375,-0.791666666666667,0.0541666666666667 diff --git a/tests/resources/finite_differences/reference_forward_differences.csv b/tests/resources/finite_differences/reference_forward_differences.csv new file mode 100644 index 00000000..6dfea2af --- /dev/null +++ b/tests/resources/finite_differences/reference_forward_differences.csv @@ -0,0 +1,6 @@ +From https://en.wikipedia.org/wiki/Finite_difference_coefficient#Forward_finite_difference,,,,,,,,,, +Difference Order,Accuracy,0,1,2,3,4,5,6,7,8 +1,1,-1,1,,,,,,, +2,1,1,-2,1,,,,,, +3,1,-1,3,-3,1,,,,, +4,1,1,-4,6,-4,1,,,, diff --git a/tests/resources/noise_level_estimation/noise_estimation_refs.csv b/tests/resources/noise_level_estimation/noise_estimation_refs.csv new file mode 100644 index 00000000..65db6a81 --- /dev/null +++ b/tests/resources/noise_level_estimation/noise_estimation_refs.csv @@ -0,0 +1,8 @@ +Window Size,Min Noise Level,Difference Order,Accuracy,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100 +,,,,-0.0590418038387701,0.0943535834281746,0.1996106967074170,0.2076665665445250,0.2565976378911490,0.3130822235037050,0.4272820436955330,0.4336807688473590,0.5529790761949000,0.6205116979795420,0.6367459615029690,0.6402323907080600,0.7612810390503280,0.7352360002255730,0.7677988900730690,0.8308269827744960,0.8643816497026570,0.8600246311281250,0.9150360580810840,0.8493913653842080,0.9063026644138470,0.9989087664664360,1.0102321479428500,0.9726077451254480,1.0229378673527500,1.0037443170690300,0.9506403941093230,0.8659025013506060,1.0425258859932300,1.0269948414240000,1.0210347447476800,0.9869294147835280,0.9694273518552160,0.8751237973313290,0.8520166643388930,0.8161569956410880,0.7678425167804110,0.7700632777094800,0.7446175573498970,0.6553331368003540,0.7449736813899550,0.6302711853278030,0.6799413262506640,0.7227251574958850,0.8517863323804210,0.8181644158567000,0.9213256152994750,1.2810814194972500,0.8946588499365640,1.3736047892705300,1.2260807298006400,1.0611294804157300,0.9219014587530430,0.5680112835139860,0.5377392990469220,0.3298106843898140,-0.1719686918509980,-0.2819466900079930,-0.1992173465552170,-0.3814389372698620,-0.5068395639674160,-0.5440866677749160,-0.7586247980167080,-0.7157705137539000,-0.7071693108577800,-0.7763499947312900,-0.8502087885332560,-0.9487019031392380,-0.8849128283116610,-1.0185413995359800,-0.9063415914629020,-0.8742798194799050,-1.0850527768695800,-0.9846158086303880,-0.9157723830506820,-0.9821058771097520,-0.9963697347184110,-0.9650347191718880,-1.0039196559098500,-1.0209341512160900,-0.9892918059416240,-0.9840537215786370,-0.8851893179125000,-0.8769338218316060,-0.8940188473463740,-0.8425875126869280,-0.8289408469012870,-0.7532063497603390,-0.7700985647336680,-0.6656819958289950,-0.5902129175975290,-0.5184668194250220,-0.4741418132576440,-0.4164773414535820,-0.3324609887739230,-0.2674577016845110,-0.3231561578354770,-0.1671549465881780,-0.0991688934094777,0.0184878102069366,0.0593230959994008 +0,0.02,5,2,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795 +1,0.02,5,2,0.0200000000000000,0.1451029282433400,0.0772047120710864,0.0200000000000000,0.0200000000000000,0.0489717431487879,0.0274495861433574,0.0357637508810199,0.0684993097853416,0.0200000000000000,0.0700522575202647,0.0612439573762328,0.0260638611495695,0.0812309192143508,0.0461588271225360,0.0222450189854631,0.0531891974716099,0.0513633900333628,0.0200000000000000,0.0733367134797685,0.0775479046939276,0.0217871289061783,0.0717740549983404,0.0200000000000000,0.0422277561726533,0.0200000000000000,0.1153667013297750,0.0589669911798556,0.0820261518839881,0.0926552550358730,0.0200000000000000,0.0257106262844677,0.0200000000000000,0.0254223605983094,0.0476623506657087,0.0364680830983335,0.0200000000000000,0.0679620090761974,0.1080690093938710,0.0765226435820176,0.0200000000000000,0.0788703870443346,0.0320053196940122,0.0459419537572654,0.0262818803884840,0.2703113465235740,0.4519676897481760,0.3204245391241220,0.0830564581735304,0.4104779108823870,0.3857155691067090,0.2175026323541390,0.1222975788331630,0.0636683385614897,0.2978745616108900,0.2054137210245730,0.1715374549314800,0.2692057206749080,0.0200000000000000,0.1740928219363430,0.1602392629955660,0.0694865118001325,0.0731196713058297,0.1363617883550300,0.0551904931965811,0.0200000000000000,0.0733696078640926,0.0964238550462272,0.0360082885289664,0.1034773002961070,0.2403755230857390,0.1714901119416400,0.0772462931068471,0.1921249073584610,0.0700341809602307,0.0647348495205131,0.0649691733350237,0.0200000000000000,0.0501078392212950,0.0421229430929182,0.0327442257339355,0.0200000000000000,0.0538442229886795,0.0727395594456153,0.0287270678498866,0.0200000000000000,0.0262830354573370,0.0200000000000000,0.0312480515132131,0.0509995579399268,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0796640661166812,0.0931431553133826,0.0200000000000000,0.0595196003015288,0.0510785917754346,0.0443036193627499,0.0200000000000000 +11,0.02,5,2,0.0489717431487881,0.0274495861433573,0.0357637508810200,0.0489717431487881,0.0357637508810200,0.0357637508810200,0.0489717431487881,0.0357637508810200,0.0357637508810200,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0513633900333629,0.0513633900333629,0.0513633900333629,0.0531891974716106,0.0513633900333629,0.0513633900333629,0.0513633900333629,0.0461588271225360,0.0422277561726497,0.0513633900333629,0.0513633900333629,0.0589669911798476,0.0717740549983399,0.0589669911798476,0.0422277561726497,0.0422277561726497,0.0257106262844691,0.0422277561726497,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0257106262844691,0.0364680830983336,0.0364680830983336,0.0459419537572652,0.0459419537572652,0.0459419537572652,0.0679620090761976,0.0765226435820174,0.0788703870443343,0.0788703870443343,0.0830564581735283,0.2175026323541430,0.2175026323541430,0.2175026323541430,0.2703113465235760,0.2703113465235760,0.2175026323541430,0.2175026323541430,0.2054137210245730,0.2054137210245730,0.1740928219363430,0.1715374549314810,0.1602392629955660,0.1602392629955660,0.1602392629955660,0.1363617883550290,0.0733696078640913,0.0733696078640913,0.0731196713058293,0.0733696078640913,0.0733696078640913,0.0733696078640913,0.0772462931068475,0.0964238550462242,0.0772462931068475,0.0772462931068475,0.0772462931068475,0.0772462931068475,0.0700341809602293,0.0700341809602293,0.0649691733350243,0.0647348495205135,0.0538442229886796,0.0538442229886796,0.0501078392212965,0.0421229430929192,0.0327442257339349,0.0287270678498867,0.0312480515132135,0.0312480515132135,0.0287270678498867,0.0262830354573363,0.0262830354573363,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0312480515132135,0.0443036193627497,0.0200000000000000,0.0443036193627497,0.0443036193627497,0.0510785917754344,0.0510785917754344,0.0510785917754344 +25,0.02,5,2,0.0357637508810199,0.0489717431487883,0.0461588271225360,0.0357637508810199,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0489717431487883,0.0489717431487883,0.0489717431487883,0.0513633900333628,0.0489717431487883,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0489717431487883,0.0513633900333628,0.0513633900333628,0.0513633900333628,0.0513633900333628,0.0461588271225360,0.0476623506657080,0.0461588271225360,0.0422277561726498,0.0461588271225360,0.0461588271225360,0.0476623506657080,0.0476623506657080,0.0476623506657080,0.0422277561726498,0.0459419537572654,0.0422277561726498,0.0422277561726498,0.0459419537572654,0.0459419537572654,0.0476623506657080,0.0589669911798475,0.0679620090761978,0.0679620090761978,0.0765226435820176,0.0679620090761978,0.0679620090761978,0.0765226435820176,0.0788703870443342,0.0830564581735285,0.0830564581735285,0.1080690093938710,0.1222975788331700,0.1222975788331700,0.1222975788331700,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1222975788331700,0.1363617883550290,0.1222975788331700,0.1034773002961070,0.0964238550462243,0.0772462931068477,0.0772462931068477,0.0733696078640913,0.0731196713058293,0.0700341809602294,0.0694865118001325,0.0700341809602294,0.0694865118001325,0.0649691733350244,0.0647348495205134,0.0551904931965811,0.0538442229886798,0.0509995579399262,0.0509995579399262,0.0501078392212966,0.0421229430929192,0.0421229430929192,0.0421229430929192,0.0421229430929192,0.0327442257339349,0.0327442257339349,0.0327442257339349,0.0327442257339349,0.0312480515132135,0.0312480515132135,0.0327442257339349,0.0327442257339349,0.0312480515132135,0.0312480515132135,0.0443036193627499,0.0312480515132135,0.0287270678498868,0.0262830354573363,0.0262830354573363,0.0312480515132135,0.0312480515132135 +11,0.02,5,6,0.0423226649461898,0.0213382604260963,0.0345832777894842,0.0423226649461898,0.0345832777894842,0.0345832777894842,0.0423226649461898,0.0345832777894842,0.0345832777894842,0.0423226649461898,0.0423226649461898,0.0466425139275356,0.0503726990070914,0.0503726990070914,0.0503726990070914,0.0522689341160636,0.0503726990070914,0.0503726990070914,0.0503726990070914,0.0466425139275356,0.0306469700143294,0.0503726990070914,0.0486302113596101,0.0486302113596101,0.0573742018717883,0.0486302113596101,0.0306469700143294,0.0306469700143294,0.0294598485057315,0.0306469700143294,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0294598485057315,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0486731792847783,0.0647624501115769,0.0767373878041499,0.0775063075538756,0.0796945243644425,0.0796945243644425,0.0855298076266875,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2717040293160000,0.2268032219320890,0.1736214700293550,0.1736214700293550,0.1713455406124420,0.1574551250434120,0.1317389301256910,0.1317389301256910,0.1317389301256910,0.1279514761457010,0.0821466649254990,0.0821466649254990,0.0796827022609885,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0932762768253730,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0811093220566633,0.0573316881612044,0.0573316881612044,0.0563031203635952,0.0504775230475304,0.0504775230475304,0.0504775230475304,0.0471399118656961,0.0422763918127036,0.0422763918127036,0.0356466128786073,0.0356466128786073,0.0356466128786073,0.0303400136608894,0.0272555144285884,0.0272555144285884,0.0253196032708275,0.0253196032708275,0.0253196032708275,0.0253196032708275,0.0272555144285884,0.0303400136608894,0.0447277571118028,0.0272555144285884,0.0447277571118028,0.0447277571118028,0.0538429638790465,0.0538429638790465,0.0538429638790465 +11,0.02,6,2,0.0299189178180329,0.0299189178180329,0.0439481529019891,0.0439481529019891,0.0439481529019891,0.0330467400508825,0.0330467400508825,0.0330467400508825,0.0439481529019891,0.0439481529019891,0.0439481529019891,0.0330467400508825,0.0330467400508825,0.0330467400508825,0.0442014748656228,0.0442014748656228,0.0442014748656228,0.0213260087479265,0.0213260087479265,0.0213260087479265,0.0268493269616991,0.0348197958494532,0.0432342655432602,0.0432342655432602,0.0432342655432602,0.0348197958494532,0.0348197958494532,0.0415996328057206,0.0415996328057206,0.0385091946216850,0.0385091946216850,0.0385091946216850,0.0292728953624526,0.0292728953624526,0.0292728953624526,0.0292728953624526,0.0385091946216850,0.0415996328057206,0.0556937879041626,0.0688174135868027,0.0688174135868027,0.0730194668621024,0.0789472020180212,0.1022476970064910,0.1113502653907550,0.1302253619997060,0.1022476970064910,0.1404897947406030,0.1438409548185280,0.1438409548185280,0.1438409548185280,0.1438409548185280,0.1404897947406030,0.1128033154936850,0.1028090077284430,0.1025892369546820,0.1025892369546820,0.1028090077284430,0.1025892369546820,0.0539406445041053,0.0539406445041053,0.0518727238279349,0.0518727238279349,0.0539406445041053,0.0539406445041053,0.0539406445041053,0.1025742752639520,0.1025742752639520,0.0539406445041053,0.0528933997260198,0.0528933997260198,0.0528933997260198,0.0528933997260198,0.0287996226521029,0.0277483480339819,0.0277483480339819,0.0287996226521029,0.0287996226521029,0.0277483480339819,0.0277483480339819,0.0287996226521029,0.0309753027350675,0.0377844829340734,0.0388385937007103,0.0388385937007103,0.0388385937007103,0.0377844829340734,0.0372670600985638,0.0309753027350675,0.0309753027350675,0.0372670600985638,0.0372670600985638,0.0372670600985638,0.0372670600985638,0.0242780219496502,0.0238192920369165,0.0242780219496502,0.0242780219496502,0.0530368406787244,0.0530368406787244,0.0759226297230423 diff --git a/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods b/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods new file mode 100644 index 00000000..b684017c Binary files /dev/null and b/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods differ diff --git a/tests/resources/reference_whitakker.csv b/tests/resources/reference_whittaker.csv similarity index 100% rename from tests/resources/reference_whitakker.csv rename to tests/resources/reference_whittaker.csv diff --git a/tests/resources/spectrum_whittaker_auto_lambda.csv b/tests/resources/spectrum_whittaker_auto_lambda.csv new file mode 100644 index 00000000..87a6d774 --- /dev/null +++ b/tests/resources/spectrum_whittaker_auto_lambda.csv @@ -0,0 +1,502 @@ +# x,y_true, y_noisy,std +-5.000000000000000000e+00,3.917848549326277130e+00,3.913954426305540135e+00,3.096822336193649078e-02 +-4.980000000000000426e+00,3.921217953957871316e+00,3.913295001715116683e+00,4.157579901324756633e-02 +-4.959999999999999964e+00,3.924518194950613825e+00,3.967828933711835759e+00,2.021882259976331628e-02 +-4.940000000000000391e+00,3.927749153496185919e+00,3.913240339328647899e+00,3.283002638476407303e-02 +-4.919999999999999929e+00,3.930910713280428848e+00,3.906660793153114319e+00,2.629869250880701587e-02 +-4.900000000000000355e+00,3.934002760487532058e+00,3.977753591201872219e+00,2.303151065858748819e-02 +-4.879999999999999893e+00,3.937025183804129469e+00,3.918578498160504520e+00,nan +-4.860000000000000320e+00,3.939977874423308712e+00,3.909640759183042213e+00,2.308082479438558487e-02 +-4.839999999999999858e+00,3.942860726048525777e+00,3.928403360537521927e+00,2.120712443290193039e-02 +-4.820000000000000284e+00,3.945673634897434390e+00,3.950831318015709748e+00,2.136998567160089402e-02 +-4.799999999999999822e+00,3.948416499705618588e+00,3.931835980130326647e+00,nan +-4.780000000000000249e+00,3.951089221730241796e+00,3.913960091908467831e+00,3.219054411264235527e-02 +-4.759999999999999787e+00,3.953691704753600877e+00,3.944102620606957821e+00,4.689772360937648904e-02 +-4.740000000000000213e+00,3.956223855086586916e+00,3.927372355534060500e+00,3.710244685607267529e-02 +-4.719999999999999751e+00,3.958685581572061629e+00,3.935926500645895665e+00,4.757583831925741441e-02 +-4.700000000000000178e+00,3.961076795588137855e+00,3.940090812557987210e+00,2.609806421525888431e-02 +-4.679999999999999716e+00,3.963397411051368557e+00,4.006116813960685086e+00,4.372331764659243697e-02 +-4.660000000000000142e+00,3.965647344419848785e+00,3.981343849480365460e+00,2.459103331165375314e-02 +-4.639999999999999680e+00,3.967826514696219053e+00,3.957171589280387813e+00,3.317473331736146142e-02 +-4.620000000000000107e+00,3.969934843430585225e+00,3.918252069140263139e+00,2.429907714232983768e-02 +-4.599999999999999645e+00,3.971972254723340701e+00,3.989915028726924273e+00,4.662424792287282738e-02 +-4.580000000000000071e+00,3.973938675227898898e+00,3.984671553543385869e+00,3.256017889923769337e-02 +-4.559999999999999609e+00,3.975834034153333807e+00,4.036915022948532439e+00,2.863787788929512274e-02 +-4.540000000000000036e+00,3.977658263266929062e+00,4.018106961074466632e+00,4.080141249468031972e-02 +-4.519999999999999574e+00,3.979411296896634198e+00,3.935916303030416064e+00,4.359886618341140413e-02 +-4.500000000000000000e+00,3.981093071933426764e+00,3.933475657586883667e+00,3.361348387306223318e-02 +-4.480000000000000426e+00,3.982703527833586943e+00,3.943095738054793742e+00,2.516707436675810414e-02 +-4.459999999999999964e+00,3.984242606620877147e+00,4.000466770834566255e+00,3.088943627207259290e-02 +-4.439999999999999503e+00,3.985710252888627458e+00,3.964649483902519478e+00,2.963575571223017041e-02 +-4.419999999999999929e+00,3.987106413801730032e+00,3.984856670488913455e+00,nan +-4.400000000000000355e+00,3.988431039098542463e+00,3.971856089063352968e+00,2.495601165215273129e-02 +-4.379999999999999893e+00,3.989684081092698342e+00,4.030635978593705815e+00,3.734265590577109939e-02 +-4.360000000000000320e+00,3.990865494674820546e+00,3.981218782347658802e+00,nan +-4.339999999999999858e+00,3.991975237314147051e+00,3.968899147970850194e+00,3.269095491082592980e-02 +-4.320000000000000284e+00,3.993013269060063486e+00,3.982898269224011578e+00,2.946282537533189208e-02 +-4.299999999999999822e+00,3.993979552543539313e+00,3.939100003418123386e+00,2.865522681337081753e-02 +-4.280000000000000249e+00,3.994874052978472534e+00,3.945635266010562692e+00,3.472014240355719705e-02 +-4.259999999999999787e+00,3.995696738162945127e+00,4.000947172556999831e+00,nan +-4.240000000000000213e+00,3.996447578480378571e+00,4.020343398727571405e+00,3.157915029390712114e-02 +-4.219999999999999751e+00,3.997126546900602762e+00,4.063889834011682467e+00,4.463116837108123403e-02 +-4.200000000000000178e+00,3.997733618980828574e+00,4.019044487495743390e+00,2.820994850940350754e-02 +-4.179999999999999716e+00,3.998268772866525378e+00,3.999693957024348379e+00,2.842345038179301675e-02 +-4.160000000000000142e+00,3.998731989292211964e+00,4.014216556726715623e+00,nan +-4.139999999999999680e+00,3.999123251582146654e+00,3.997568023205597232e+00,3.579063885698285929e-02 +-4.120000000000000107e+00,3.999442545650929937e+00,3.969380819221903778e+00,3.710054561456280209e-02 +-4.099999999999999645e+00,3.999689860004008501e+00,3.973378322910303240e+00,2.674569508640989302e-02 +-4.080000000000000071e+00,3.999865185738094020e+00,4.018743284686774331e+00,4.125625535047831166e-02 +-4.059999999999999609e+00,3.999968516541477559e+00,4.002858211233585983e+00,3.722005134771839280e-02 +-4.040000000000000036e+00,3.999999848694262283e+00,4.021668953186641637e+00,2.647815044301944643e-02 +-4.019999999999999574e+00,3.999959181068493574e+00,3.963600307242701959e+00,2.404215547884083759e-02 +-4.000000000000000000e+00,3.999846515128201663e+00,4.001938886864440548e+00,2.316279516014600401e-02 +-3.979999999999999982e+00,3.999661854929348337e+00,3.961121319087129944e+00,3.165838742547153611e-02 +-3.959999999999999964e+00,3.999405207119680838e+00,3.988298094713540465e+00,2.927789155113174474e-02 +-3.939999999999999947e+00,3.999076580938492498e+00,4.015211499281781116e+00,1.965729088192920068e-02 +-3.919999999999999929e+00,3.998675988216290555e+00,3.996739007072117822e+00,2.217006837373004455e-02 +-3.899999999999999911e+00,3.998203443374369392e+00,4.006967280403993747e+00,3.548421177927273823e-02 +-3.879999999999999893e+00,3.997658963424292722e+00,3.992420505343744708e+00,nan +-3.859999999999999876e+00,3.997042567967279414e+00,3.989833605239252012e+00,1.982777448634700374e-02 +-3.839999999999999858e+00,3.996354279193499615e+00,3.977133776300772361e+00,nan +-3.820000000000000284e+00,3.995594121881275385e+00,3.968534106303187148e+00,4.404511893658814153e-02 +-3.799999999999999822e+00,3.994762123396186304e+00,4.046166805023654334e+00,3.313448396509128396e-02 +-3.780000000000000249e+00,3.993858313690090256e+00,3.981901684829826316e+00,2.802464965497018790e-02 +-3.759999999999999787e+00,3.992882725300037183e+00,3.968265699229948318e+00,3.089639612993963155e-02 +-3.740000000000000213e+00,3.991835393347104688e+00,3.993594708763612111e+00,2.708670552718019792e-02 +-3.719999999999999751e+00,3.990716355535129711e+00,3.978060837482736645e+00,3.452291125902230001e-02 +-3.700000000000000178e+00,3.989525652149353174e+00,3.999624538760653270e+00,3.961449084431505063e-02 +-3.679999999999999716e+00,3.988263326054968250e+00,3.959951862232888420e+00,2.127983196111482220e-02 +-3.660000000000000142e+00,3.986929422695578484e+00,3.992008806185594505e+00,3.730351277723908954e-02 +-3.639999999999999680e+00,3.985523990091560886e+00,3.983111142787472581e+00,nan +-3.620000000000000107e+00,3.984047078838337974e+00,3.997789721210798675e+00,4.189555115844698086e-02 +-3.599999999999999645e+00,3.982498742104557010e+00,3.978584700131533758e+00,3.025928312227217989e-02 +-3.580000000000000071e+00,3.980879035630175089e+00,3.999259303534389520e+00,2.857730727887747985e-02 +-3.560000000000000053e+00,3.979188017724452298e+00,3.990540510474509084e+00,nan +-3.540000000000000036e+00,3.977425749263855170e+00,4.006843582528653691e+00,3.607893641315891575e-02 +-3.520000000000000018e+00,3.975592293689864665e+00,3.989502366165125213e+00,5.280686333954774658e-02 +-3.500000000000000000e+00,3.973687717006691322e+00,3.971463221592128612e+00,3.195062668002687095e-02 +-3.479999999999999982e+00,3.971712087778907385e+00,3.984236422056169680e+00,3.653447718907179564e-02 +-3.459999999999999964e+00,3.969665477128972331e+00,3.965390902218237645e+00,nan +-3.439999999999999947e+00,3.967547958734682023e+00,3.988691848103080417e+00,3.194360253100081426e-02 +-3.419999999999999929e+00,3.965359608826521054e+00,3.915564628228665800e+00,3.335365548095119148e-02 +-3.399999999999999911e+00,3.963100506184926264e+00,3.933238574213607119e+00,1.955557934578588258e-02 +-3.379999999999999893e+00,3.960770732137463668e+00,3.984902392991356290e+00,3.531213606899333973e-02 +-3.359999999999999876e+00,3.958370370555916118e+00,3.966998602981461008e+00,3.858838304594502944e-02 +-3.339999999999999858e+00,3.955899507853295471e+00,3.980905662625323860e+00,nan +-3.320000000000000284e+00,3.953358232980767717e+00,3.914229362381833166e+00,3.160429335949033886e-02 +-3.299999999999999822e+00,3.950746637424507934e+00,3.994917048087540667e+00,3.132077094697865299e-02 +-3.280000000000000249e+00,3.948064815202489974e+00,3.943874923203797955e+00,2.512446779038992120e-02 +-3.259999999999999787e+00,3.945312862861222403e+00,3.950489243789083726e+00,2.346503295241367942e-02 +-3.240000000000000213e+00,3.942490879472442256e+00,3.927196810974445729e+00,3.915093278749896127e-02 +-3.219999999999999751e+00,3.939598966629797694e+00,3.947917455856836622e+00,2.476543934279373110e-02 +-3.200000000000000178e+00,3.936637228445541759e+00,3.969343297431183970e+00,3.412387431505126134e-02 +-3.179999999999999716e+00,3.933605771547286523e+00,3.925906099200734189e+00,2.901476964540048928e-02 +-3.160000000000000142e+00,3.930504705074882921e+00,3.923668551812799166e+00,2.540509909929494770e-02 +-3.139999999999999680e+00,3.927334140677494201e+00,3.947226670336628995e+00,2.993884543265358869e-02 +-3.120000000000000107e+00,3.924094192511003776e+00,3.928346744753318465e+00,3.630472353955733339e-02 +-3.099999999999999645e+00,3.920784977235900914e+00,3.900075964511259929e+00,4.332776229755198882e-02 +-3.080000000000000071e+00,3.917406614015876531e+00,3.935772299731806978e+00,2.449157550065346814e-02 +-3.060000000000000053e+00,3.913959224517423507e+00,3.915896784887003879e+00,1.992532293741383978e-02 +-3.040000000000000036e+00,3.910442932910866976e+00,3.904143184923001275e+00,nan +-3.020000000000000018e+00,3.906857865873376578e+00,3.897907823509206615e+00,3.955967022312247006e-02 +-3.000000000000000000e+00,3.903204152594716536e+00,3.866438996497719049e+00,nan +-2.979999999999999982e+00,3.899481924786757148e+00,3.903357248827355352e+00,2.678761408580850739e-02 +-2.959999999999999964e+00,3.895691316698114193e+00,3.867880849871169868e+00,4.056478219242153765e-02 +-2.939999999999999947e+00,3.891832465135722785e+00,3.882397409790404375e+00,3.205584351100022877e-02 +-2.919999999999999929e+00,3.887905509495784173e+00,3.936961090451624568e+00,3.332833371997186156e-02 +-2.899999999999999911e+00,3.883910591807264279e+00,3.866043917754864445e+00,2.340248663650760716e-02 +-2.879999999999999893e+00,3.879847856792183247e+00,3.874818538198867124e+00,3.363872375973592233e-02 +-2.859999999999999876e+00,3.875717451948193837e+00,3.874819534869030413e+00,3.412069939766745885e-02 +-2.839999999999999858e+00,3.871519527660709503e+00,3.852214585211989117e+00,3.848297388549758169e-02 +-2.819999999999999840e+00,3.867254237353940471e+00,3.853548185955963934e+00,nan +-2.799999999999999822e+00,3.862921737693025825e+00,3.854754553304260423e+00,2.534333019107942730e-02 +-2.779999999999999805e+00,3.858522188852927748e+00,3.841071553729384025e+00,2.160748256322369762e-02 +-2.759999999999999787e+00,3.854055754874210926e+00,3.906409499587252476e+00,3.989927560475871327e-02 +-2.739999999999999769e+00,3.849522604131409675e+00,3.892863359620862163e+00,4.394563528675898834e-02 +-2.719999999999999751e+00,3.844922909946673517e+00,3.865947691256906626e+00,4.331492627779613691e-02 +-2.699999999999999734e+00,3.840256851390052351e+00,3.824190739531758254e+00,2.394046463981249276e-02 +-2.680000000000000160e+00,3.835524614318547076e+00,3.910131192202466188e+00,2.358546713714732707e-02 +-2.660000000000000142e+00,3.830726392719251638e+00,3.842141175582946655e+00,3.143049748680051825e-02 +-2.640000000000000124e+00,3.825862390438136362e+00,3.823047218261981239e+00,3.201948188508568077e-02 +-2.620000000000000107e+00,3.820932823395655653e+00,3.872275415949148147e+00,2.823873364965024529e-02 +-2.600000000000000089e+00,3.815937922414227135e+00,3.805037068723069016e+00,2.923310143617065260e-02 +-2.580000000000000071e+00,3.810877936811229549e+00,3.847414491372637801e+00,4.797550196894023039e-02 +-2.560000000000000053e+00,3.805753138945383807e+00,3.843464708155962750e+00,3.266237082284365739e-02 +-2.540000000000000036e+00,3.800563829944950900e+00,3.798922067708248473e+00,3.133758414632323297e-02 +-2.520000000000000018e+00,3.795310346894044784e+00,3.827628701786526033e+00,2.317422859332170931e-02 +-2.500000000000000000e+00,3.789993071809343306e+00,3.809526646402432881e+00,3.295109809401969292e-02 +-2.479999999999999982e+00,3.784612442804545118e+00,3.747985523101889260e+00,4.050258952135226548e-02 +-2.459999999999999964e+00,3.779168967914883659e+00,3.806809286093004729e+00,1.923621539387884369e-02 +-2.439999999999999947e+00,3.773663242139748686e+00,3.781100587500339838e+00,nan +-2.419999999999999929e+00,3.768095968358509573e+00,3.749411789432546716e+00,3.866553106380841293e-02 +-2.399999999999999911e+00,3.762467982883645590e+00,3.792726529826150994e+00,5.083314405848265299e-02 +-2.379999999999999893e+00,3.756780286536163693e+00,3.749106715790383237e+00,1.836938560312795232e-02 +-2.359999999999999876e+00,3.751034082261140767e+00,3.757735405285781649e+00,2.169879163528952090e-02 +-2.339999999999999858e+00,3.745230820445019315e+00,3.752632605069845528e+00,3.800387835900527478e-02 +-2.319999999999999840e+00,3.739372253249956035e+00,3.746880786136597141e+00,3.890813293409937923e-02 +-2.299999999999999822e+00,3.733460499441795921e+00,3.750215780850299829e+00,5.729187485296455273e-02 +-2.279999999999999805e+00,3.727498121354196670e+00,3.711741146279374792e+00,3.449917492064095981e-02 +-2.259999999999999787e+00,3.721488215798009147e+00,3.630930613745085189e+00,3.742083549552085997e-02 +-2.239999999999999769e+00,3.715434520886934155e+00,3.713166467526545222e+00,3.498465794338397744e-02 +-2.219999999999999751e+00,3.709341540901138323e+00,3.681110389341025968e+00,3.815477170164736920e-02 +-2.199999999999999734e+00,3.703214691441905515e+00,3.702545598010270389e+00,3.134983031602142645e-02 +-2.180000000000000160e+00,3.697060467232880399e+00,3.715653624123065768e+00,3.811872814876098720e-02 +-2.160000000000000142e+00,3.690886634985821591e+00,3.604065445077755392e+00,4.082881623022253242e-02 +-2.140000000000000124e+00,3.684702453758294638e+00,3.679127871616473033e+00,2.843288863848685011e-02 +-2.120000000000000107e+00,3.678518925173064336e+00,3.723207343503561972e+00,3.295457755380135079e-02 +-2.100000000000000089e+00,3.672349075728456036e+00,3.593677436562992877e+00,nan +-2.080000000000000071e+00,3.666208273188952926e+00,3.730259422314661233e+00,9.533727857204726819e-02 +-2.060000000000000053e+00,3.660114578688402620e+00,3.652676325481325481e+00,4.814528861415776767e-02 +-2.040000000000000036e+00,3.654089135686744871e+00,3.664039414557082708e+00,nan +-2.020000000000000018e+00,3.648156596278114172e+00,3.605572286811380867e+00,7.161423172160229222e-02 +-2.000000000000000000e+00,3.642345584537503900e+00,3.539623085803978331e+00,6.708869557030820718e-02 +-1.979999999999999982e+00,3.636689195601250812e+00,3.619072317723509347e+00,5.743569593331862566e-02 +-1.959999999999999964e+00,3.631225527992778090e+00,3.711185604571868524e+00,8.364311972260785044e-02 +-1.939999999999999947e+00,3.625998245323204472e+00,3.719738846211896988e+00,7.184953346731948187e-02 +-1.919999999999999929e+00,3.621057161915839373e+00,3.640954715256664809e+00,6.558577658908354424e-02 +-1.899999999999999911e+00,3.616458845130710476e+00,3.606185742205383793e+00,1.060721402286213394e-01 +-1.879999999999999893e+00,3.612267225214248700e+00,3.649108418714726465e+00,9.092475993307447268e-02 +-1.859999999999999876e+00,3.608554201394059913e+00,3.709944524969695401e+00,7.155454639976135955e-02 +-1.839999999999999858e+00,3.605400230713310616e+00,3.419438852172148824e+00,1.228594590938914127e-01 +-1.819999999999999840e+00,3.602894883799263592e+00,3.593174533405012916e+00,1.550942001530101755e-01 +-1.799999999999999822e+00,3.601137349443074065e+00,3.520638523828726107e+00,1.219140064569037563e-01 +-1.779999999999999805e+00,3.600236867602554458e+00,3.608267192342823115e+00,1.205649991710633223e-01 +-1.759999999999999787e+00,3.600313068307190889e+00,3.453201243804023601e+00,1.376335561212327918e-01 +-1.739999999999999769e+00,3.601496192037469957e+00,3.607774685478800425e+00,1.665844544798526783e-01 +-1.719999999999999751e+00,3.603927165569829061e+00,3.640026211733285333e+00,nan +-1.699999999999999734e+00,3.607757506133415060e+00,3.642227320257549028e+00,1.268407969723837903e-01 +-1.679999999999999716e+00,3.613149026128606334e+00,3.803890280567367999e+00,1.576114673042917080e-01 +-1.660000000000000142e+00,3.620273310724677618e+00,3.735897857753192053e+00,2.214995227455958393e-01 +-1.640000000000000124e+00,3.629310941496409093e+00,3.663913595669907153e+00,nan +-1.620000000000000107e+00,3.640450440980700364e+00,3.641401992387191200e+00,1.551600530436650083e-01 +-1.600000000000000089e+00,3.653886915724804396e+00,3.217048317236007815e+00,2.630495244546968703e-01 +-1.580000000000000071e+00,3.669820379129825394e+00,3.892532742459869688e+00,nan +-1.560000000000000053e+00,3.688453740214230780e+00,3.360368769070483985e+00,1.982329599456646163e-01 +-1.540000000000000036e+00,3.709990450350114877e+00,3.752961584101674131e+00,2.460402241728126826e-01 +-1.520000000000000018e+00,3.734631807042549934e+00,3.745930947639183550e+00,3.309654834926570310e-01 +-1.500000000000000000e+00,3.762573921872505167e+00,3.798556507140971927e+00,2.244332830917858124e-01 +-1.479999999999999982e+00,3.794004368706074271e+00,3.810433536595719151e+00,2.921392931771470414e-01 +-1.459999999999999964e+00,3.829098538041090460e+00,3.916929666201640181e+00,2.653814591464132566e-01 +-1.439999999999999947e+00,3.868015733723898286e+00,4.031456282874307462e+00,3.642239947464401273e-01 +-1.419999999999999929e+00,3.910895058985774497e+00,4.756015559092563905e+00,5.166291421943192130e-01 +-1.399999999999999911e+00,3.957851149539173718e+00,3.594508053563978400e+00,3.764090765633805180e-01 +-1.379999999999999893e+00,4.008969822019845530e+00,3.995552010376214991e+00,4.221795343405287393e-01 +-1.359999999999999876e+00,4.064303716012481082e+00,3.802382220218354369e+00,4.175557607001874061e-01 +-1.339999999999999858e+00,4.123868016884467913e+00,4.654621606587832972e+00,3.786510240332576771e-01 +-1.319999999999999840e+00,4.187636354293537977e+00,4.498210774344039820e+00,3.245373535245575369e-01 +-1.299999999999999822e+00,4.255536977153194123e+00,4.167062990719260895e+00,5.052235057933155415e-01 +-1.279999999999999805e+00,4.327449309673636613e+00,4.216951262431840597e+00,6.532837690567284694e-01 +-1.259999999999999787e+00,4.403200994517432854e+00,4.559110373914525560e+00,3.273162563351011967e-01 +-1.239999999999999769e+00,4.482565527836990249e+00,4.762139581965920243e+00,5.769640282217669558e-01 +-1.219999999999999751e+00,4.565260586777036167e+00,3.280248253976095540e+00,5.275499350589685221e-01 +-1.199999999999999734e+00,4.650947142787573263e+00,4.276145716520765028e+00,nan +-1.179999999999999716e+00,4.739229443748227411e+00,4.600817916539200247e+00,6.092834368481748752e-01 +-1.160000000000000142e+00,4.829655934498800818e+00,4.534854213018084224e+00,6.756216430995778399e-01 +-1.140000000000000124e+00,4.921721169053698830e+00,5.024019049064923692e+00,5.385345533169113574e-01 +-1.120000000000000107e+00,5.014868748808485321e+00,5.239870345111208749e+00,5.402099371398212391e-01 +-1.100000000000000089e+00,5.108495299791377064e+00,5.485974253161926661e+00,4.018729221391404982e-01 +-1.080000000000000071e+00,5.201955478938754318e+00,5.318244642725155735e+00,6.373361947883031675e-01 +-1.060000000000000053e+00,5.294567975042502539e+00,6.760430236052386910e+00,6.124735590859445855e-01 +-1.040000000000000036e+00,5.385622445066512221e+00,5.752138952060318999e+00,4.249735541418221407e-01 +-1.020000000000000018e+00,5.474387301659097815e+00,4.159619060982223004e+00,8.032549364880220422e-01 +-1.000000000000000000e+00,5.560118243634827095e+00,6.137115336978383517e+00,6.702421100272603072e-01 +-9.799999999999995381e-01,5.642067398715594706e+00,5.872802388731122925e+00,nan +-9.599999999999999645e-01,5.719492927646578906e+00,6.281236755604411748e+00,6.472217891646340204e-01 +-9.399999999999995026e-01,5.791668921639725909e+00,5.565492156114431488e+00,6.959215464098150727e-01 +-9.199999999999999289e-01,5.857895411580281930e+00,6.546745314566285501e+00,9.349172151308963175e-01 +-9.000000000000003553e-01,5.917508298105428999e+00,4.235996624335836458e+00,nan +-8.799999999999998934e-01,5.969889006956252686e+00,5.895946189351668032e+00,1.151543603478050626e+00 +-8.600000000000003197e-01,6.014473674211297194e+00,8.090416456392947708e+00,9.886224714269703506e-01 +-8.399999999999998579e-01,6.050761671276578824e+00,7.099654579905307017e+00,nan +-8.200000000000002842e-01,6.078323289820192876e+00,7.783447599187353738e+00,1.563679351082028157e+00 +-7.999999999999998224e-01,6.096806422023442806e+00,5.860696892165741190e+00,1.070756444618480741e+00 +-7.800000000000002487e-01,6.105942091237313463e+00,6.197255583792041556e+00,7.878498533061101261e-01 +-7.599999999999997868e-01,6.105548711894689617e+00,5.755018464355096341e+00,8.295877048549399335e-01 +-7.400000000000002132e-01,6.095534984708045556e+00,7.207746759775751855e+00,4.903793932628179864e-01 +-7.199999999999997513e-01,6.075901363035685065e+00,6.849506693526540069e+00,8.751365338238001890e-01 +-7.000000000000001776e-01,6.046740057990412254e+00,7.063283370964628105e+00,8.139025491327271933e-01 +-6.799999999999997158e-01,6.008233582492343849e+00,3.945646506068891224e+00,1.013587577515847382e+00 +-6.600000000000001421e-01,5.960651867099278967e+00,6.267470566415362043e+00,7.742101094832944952e-01 +-6.399999999999996803e-01,5.904348012150392400e+00,6.558919601455357906e+00,7.859065498006260198e-01 +-6.200000000000001066e-01,5.839752770631418954e+00,7.077991229023441022e+00,6.918590236314428044e-01 +-5.999999999999996447e-01,5.767367883374772575e+00,6.213534208408828086e+00,9.083962692379881076e-01 +-5.800000000000000711e-01,5.687758412002065889e+00,5.040827980153054000e+00,9.659222627816541618e-01 +-5.599999999999996092e-01,5.601544234771209219e+00,6.088292857633700095e+00,nan +-5.400000000000000355e-01,5.509390885716415553e+00,4.933795191345740960e+00,6.901311785896823148e-01 +-5.199999999999995737e-01,5.411999927827702450e+00,4.549334630528049317e+00,5.649737176498387248e-01 +-5.000000000000000000e-01,5.310099056329699252e+00,5.385998717145486836e+00,8.874764677140921654e-01 +-4.799999999999995381e-01,5.204432128371307087e+00,5.833853142262386271e+00,8.402770111488416793e-01 +-4.599999999999999645e-01,5.095749310771889640e+00,5.659937411166305665e+00,8.734890865389354220e-01 +-4.399999999999995026e-01,4.984797528177232095e+00,3.821608855697117058e+00,6.361529153821200433e-01 +-4.199999999999999289e-01,4.872311380485815135e+00,4.956658156492856016e+00,7.052981973370683333e-01 +-3.999999999999994671e-01,4.759004681251896685e+00,4.579878580113756037e+00,5.913716519320653120e-01 +-3.799999999999998934e-01,4.645562748588058177e+00,5.313046851124894943e+00,6.604572801362433854e-01 +-3.600000000000003197e-01,4.532635557571438767e+00,3.276085072396378806e+00,5.970258352233933641e-01 +-3.399999999999998579e-01,4.420831839040548772e+00,4.945262532692082047e+00,6.546549831258263730e-01 +-3.200000000000002842e-01,4.310714184698730023e+00,5.074998639686343793e+00,5.939270740467554877e-01 +-2.999999999999998224e-01,4.202795193348162428e+00,5.647855412945030906e+00,5.294028271246801198e-01 +-2.800000000000002487e-01,4.097534668557971216e+00,4.105665388254435832e+00,6.313947350577137074e-01 +-2.599999999999997868e-01,3.995337854753744811e+00,4.450308562157782255e+00,nan +-2.400000000000002132e-01,3.896554677160165792e+00,4.071521720834863522e+00,4.594469840608414457e-01 +-2.199999999999997513e-01,3.801479931696105652e+00,2.963422081644037753e+00,7.669847097205949593e-01 +-2.000000000000001776e-01,3.710354354176940817e+00,3.868455002797947184e+00,4.251092352493783255e-01 +-1.799999999999997158e-01,3.623366484278372823e+00,3.536676735169854702e+00,3.809115333435043049e-01 +-1.600000000000001421e-01,3.540655228812251032e+00,3.211077182715823319e+00,3.539182369697171926e-01 +-1.399999999999996803e-01,3.462313021005367197e+00,3.157142724539423462e+00,4.179098049588486075e-01 +-1.200000000000001066e-01,3.388389467611172101e+00,3.253957454666461580e+00,nan +-9.999999999999964473e-02,3.318895373688555139e+00,3.548219807515344382e+00,3.531347515347905142e-01 +-8.000000000000007105e-02,3.253807035547039650e+00,3.507299359211579315e+00,2.648647778152579124e-01 +-5.999999999999960920e-02,3.193070695419219618e+00,2.682990980990552998e+00,4.679258280625686051e-01 +-4.000000000000003553e-02,3.136607056572494390e+00,2.988681850963338160e+00,4.187317005153019522e-01 +-1.999999999999957367e-02,3.084315764476654120e+00,3.143729582095237340e+00,2.296789574801562372e-01 +0.000000000000000000e+00,3.036079767953566932e+00,2.421701960787510988e+00,3.810571805066982343e-01 +2.000000000000046185e-02,2.991769483600934976e+00,2.880319783599672778e+00,2.221249274397878171e-01 +4.000000000000003553e-02,2.951246696869161745e+00,3.722343570451513095e+00,2.199211530517474755e-01 +6.000000000000049738e-02,2.914368143665852173e+00,2.857603579947825878e+00,2.448851310521519553e-01 +8.000000000000007105e-02,2.880988726986476234e+00,3.095978508429543030e+00,3.840540878242945655e-01 +1.000000000000005329e-01,2.850964333578392740e+00,2.914637263903819164e+00,1.904934648461840685e-01 +1.200000000000001066e-01,2.824154225837062882e+00,2.960170259963005979e+00,1.938639502937708481e-01 +1.399999999999996803e-01,2.800422993847327291e+00,2.949001397061277618e+00,2.123753112352188555e-01 +1.600000000000001421e-01,2.779642061600731573e+00,3.049640755278711701e+00,1.228069598001861884e-01 +1.799999999999997158e-01,2.761690749861532179e+00,2.728754984418608753e+00,nan +2.000000000000001776e-01,2.746456905873486232e+00,2.727849241201167452e+00,2.231426434949308757e-01 +2.199999999999997513e-01,2.733837117079805257e+00,2.992638787130469158e+00,2.191637740227992959e-01 +2.400000000000002132e-01,2.723736532275115696e+00,2.251918265667214758e+00,2.480313535065964692e-01 +2.599999999999997868e-01,2.716068319142801890e+00,2.889134209031075429e+00,2.459585819361506664e-01 +2.800000000000002487e-01,2.710752791984413435e+00,2.424532517989040237e+00,9.053335818983149208e-02 +2.999999999999998224e-01,2.707716247654336161e+00,2.165867195473284212e+00,1.703346508348992372e-01 +3.200000000000002842e-01,2.706889551304180852e+00,2.834687805641520786e+00,2.862061388394843697e-01 +3.399999999999998579e-01,2.708206516542300157e+00,2.506058393497093029e+00,2.187887655218112293e-01 +3.600000000000003197e-01,2.711602127038729826e+00,2.878502009412273299e+00,3.009170618786285445e-01 +3.799999999999998934e-01,2.717010648456478439e+00,2.538209946113609838e+00,2.513645475901037374e-01 +4.000000000000003553e-01,2.724363680854988701e+00,2.323923192893809109e+00,3.558336096401703008e-01 +4.199999999999999289e-01,2.733588202366449948e+00,2.335711674975006424e+00,2.440863314527701444e-01 +4.400000000000003908e-01,2.744604654955604239e+00,3.188210899075321514e+00,2.757914055844488677e-01 +4.599999999999999645e-01,2.757325122396069972e+00,2.392353676431516529e+00,2.469282079526576301e-01 +4.800000000000004263e-01,2.771651649184879052e+00,2.758761172409790241e+00,3.680833929312681274e-01 +5.000000000000000000e-01,2.787474746926924762e+00,3.169012719153432300e+00,3.436627873706666336e-01 +5.200000000000004619e-01,2.804672131714068595e+00,2.033325531486355864e+00,2.563589488507343472e-01 +5.400000000000000355e-01,2.823107732173131268e+00,2.606195281701388033e+00,2.688410546078896513e-01 +5.600000000000004974e-01,2.842631003153919700e+00,2.581526900163175764e+00,2.837953043520541341e-01 +5.800000000000000711e-01,2.863076574485301506e+00,2.467688230198862964e+00,3.711213884661984097e-01 +6.000000000000005329e-01,2.884264257883038418e+00,2.831073027789893892e+00,3.175362955311629154e-01 +6.200000000000001066e-01,2.905999428015011876e+00,3.161240012035097191e+00,1.997911486567817974e-01 +6.399999999999996803e-01,2.928073786015819024e+00,2.956347334622390743e+00,4.285265589597905822e-01 +6.600000000000001421e-01,2.950266505522154858e+00,2.527608156534642880e+00,nan +6.799999999999997158e-01,2.972345752731148316e+00,2.971776586979084644e+00,5.310266580236164824e-01 +7.000000000000001776e-01,2.994070563250425199e+00,2.877290794515752204e+00,nan +7.199999999999997513e-01,3.015193049817741500e+00,3.389214360428408845e+00,4.743984853334949348e-01 +7.400000000000002132e-01,3.035460906542356785e+00,2.522099656711926485e+00,4.929410237222000557e-01 +7.599999999999997868e-01,3.054620167391726149e+00,2.155763592687337749e+00,3.841695803658565778e-01 +7.800000000000002487e-01,3.072418169448793535e+00,2.799985290900282031e+00,5.452928167360916456e-01 +7.999999999999998224e-01,3.088606665222755066e+00,3.181999059836660315e+00,nan +8.200000000000002842e-01,3.102945023219247478e+00,3.057996485246090135e+00,nan +8.399999999999998579e-01,3.115203452249224458e+00,2.115576753796697851e+00,3.280525299425075536e-01 +8.600000000000003197e-01,3.125166182731077491e+00,2.721097204259094404e+00,5.254098256952898227e-01 +8.799999999999998934e-01,3.132634537629653160e+00,3.061150513259692563e+00,4.445749637844202407e-01 +9.000000000000003553e-01,3.137429826744533301e+00,2.669388958342580498e+00,4.072730575664818553e-01 +9.199999999999999289e-01,3.139396000823940547e+00,3.627533156198332609e+00,6.026970045505178897e-01 +9.400000000000003908e-01,3.138402006403019140e+00,2.765688681358758227e+00,5.117093613705018251e-01 +9.599999999999999645e-01,3.134343788256103824e+00,3.192183883069087624e+00,3.587640992401981976e-01 +9.800000000000004263e-01,3.127145893770689078e+00,3.968383003330220760e+00,nan +1.000000000000000000e+00,3.116762642207336675e+00,2.338013832820840854e+00,nan +1.020000000000000462e+00,3.103178831473855315e+00,3.313408994267729835e+00,6.526803408414116880e-01 +1.040000000000000036e+00,3.086409965448550174e+00,3.040682318981518861e+00,5.836565795219444963e-01 +1.060000000000000497e+00,3.066501995744913334e+00,2.779849182033996335e+00,4.256561800169474385e-01 +1.080000000000000071e+00,3.043530582812236762e+00,2.904547044921763188e+00,3.041189794730353446e-01 +1.100000000000000533e+00,3.017599892100946590e+00,2.986790168515752075e+00,3.526160436787704744e-01 +1.120000000000000107e+00,2.988840951381988020e+00,3.510619812891737190e+00,4.589691190502168672e-01 +1.140000000000000568e+00,2.957409604905899325e+00,3.610893289988389654e+00,5.683411129188775668e-01 +1.160000000000000142e+00,2.923484108655963531e+00,2.241228078826833059e+00,3.469867901057105963e-01 +1.179999999999999716e+00,2.887262418262057651e+00,3.814941693212685614e+00,3.599045368114944798e-01 +1.200000000000000178e+00,2.848959227012042739e+00,3.265716891407834943e+00,2.892595048204359420e-01 +1.219999999999999751e+00,2.808802815688143717e+00,3.081223862719026485e+00,nan +1.240000000000000213e+00,2.767031778581812596e+00,2.922621628709752617e+00,nan +1.259999999999999787e+00,2.723891690971223412e+00,2.600552915231134854e+00,5.720317054932907430e-01 +1.280000000000000249e+00,2.679631782603588697e+00,2.295526789533440581e+00,4.703156894625505902e-01 +1.299999999999999822e+00,2.634501679384664286e+00,2.134518269816672920e+00,2.073651949699803210e-01 +1.320000000000000284e+00,2.588748271661790223e+00,2.505688380794310000e+00,3.565010685590284667e-01 +1.339999999999999858e+00,2.542612762358322342e+00,2.657778015600396060e+00,2.054430250094419763e-01 +1.360000000000000320e+00,2.496327941974050724e+00,2.880342521059242600e+00,3.269488151166322631e-01 +1.379999999999999893e+00,2.450115730332577790e+00,2.744123595985678232e+00,3.190639167541055032e-01 +1.400000000000000355e+00,2.404185017174084393e+00,2.834484243824526128e+00,3.600015743496797760e-01 +1.419999999999999929e+00,2.358729825510707645e+00,3.111700384330541080e+00,3.055750774141779980e-01 +1.440000000000000391e+00,2.313927813331210892e+00,1.984596428286149017e+00,2.535374835126482096e-01 +1.459999999999999964e+00,2.269939121002487958e+00,2.184618266171591827e+00,4.125742803545879300e-01 +1.480000000000000426e+00,2.226905563791397480e+00,2.541586847516879466e+00,3.075604621153351093e-01 +1.500000000000000000e+00,2.184950161522252188e+00,1.886697152297341695e+00,3.969470656999851688e-01 +1.520000000000000462e+00,2.144176990664828697e+00,2.042829442663604667e+00,3.025771102881774643e-01 +1.540000000000000036e+00,2.104671338254583635e+00,2.260314403072414713e+00,2.367490966951061038e-01 +1.560000000000000497e+00,2.066500132085010044e+00,1.967071821134906751e+00,3.089001509687088198e-01 +1.580000000000000071e+00,2.029712617650177631e+00,2.255154594264408896e+00,2.813968051504651124e-01 +1.600000000000000533e+00,1.994341249385676251e+00,1.664795947481280347e+00,2.310721598292219126e-01 +1.620000000000000107e+00,1.960402761857115372e+00,1.898006700231108290e+00,2.653301084336139870e-01 +1.640000000000000568e+00,1.927899385643779162e+00,2.167986247869837069e+00,nan +1.660000000000000142e+00,1.896820172700660834e+00,2.052209084359961633e+00,2.480990399266391477e-01 +1.679999999999999716e+00,1.867142396870883125e+00,2.030829428195580988e+00,nan +1.700000000000000178e+00,1.838832996860484403e+00,2.079538602632515332e+00,nan +1.719999999999999751e+00,1.811850031262943528e+00,1.447949949786532686e+00,1.487681488926025641e-01 +1.740000000000000213e+00,1.786144118008170745e+00,1.984142869518430441e+00,2.126409080700615295e-01 +1.759999999999999787e+00,1.761659833782886153e+00,1.594984196307964508e+00,1.531504013947989629e-01 +1.780000000000000249e+00,1.738337052400659166e+00,1.689744286673886542e+00,nan +1.799999999999999822e+00,1.716112204670008801e+00,1.505327862233904135e+00,1.433670028956879894e-01 +1.820000000000000284e+00,1.694919445906048372e+00,1.657357557672590298e+00,1.936888522083639597e-01 +1.839999999999999858e+00,1.674691720755129953e+00,1.627728676663654062e+00,1.973138182971920507e-01 +1.860000000000000320e+00,1.655361718365914392e+00,1.562003428819664252e+00,1.075696220393412700e-01 +1.879999999999999893e+00,1.636862714072504277e+00,1.495731507212113787e+00,1.246181847495901329e-01 +1.900000000000000355e+00,1.619129296599083778e+00,1.696456855549726805e+00,1.184320222108378451e-01 +1.919999999999999929e+00,1.602097982310095814e+00,1.753621707095358806e+00,1.654974896688882480e-01 +1.940000000000000391e+00,1.585707720189084124e+00,1.510696371828758267e+00,nan +1.959999999999999964e+00,1.569900293020817461e+00,1.593205776328715517e+00,9.488123223315308996e-02 +1.980000000000000426e+00,1.554620621675399939e+00,1.371252828445131744e+00,9.083416534289066868e-02 +2.000000000000000000e+00,1.539816980460987805e+00,1.631767388510174444e+00,5.605529023766811503e-02 +2.020000000000000462e+00,1.525441132243590348e+00,1.510870281561765172e+00,9.794045190385711197e-02 +2.040000000000000036e+00,1.511448392455632117e+00,1.509917665204379755e+00,5.847087504586767626e-02 +2.060000000000000497e+00,1.497797631262067775e+00,1.457511736987583184e+00,1.026291527197704528e-01 +2.080000000000000071e+00,1.484451223059952474e+00,1.337502930906462328e+00,nan +2.100000000000000533e+00,1.471374952192380681e+00,1.428158853424737362e+00,6.186049157060879988e-02 +2.120000000000000107e+00,1.458537883298729465e+00,1.334146609616660761e+00,nan +2.140000000000000568e+00,1.445912204137405288e+00,1.389119451915253656e+00,6.753635107838724949e-02 +2.160000000000000142e+00,1.433473048039973996e+00,1.411502044605835504e+00,4.818573805837670504e-02 +2.179999999999999716e+00,1.421198302418976933e+00,1.512147721837651781e+00,4.450992438755188574e-02 +2.200000000000000178e+00,1.409068408984782028e+00,1.342215912911993270e+00,5.676913138113579699e-02 +2.219999999999999751e+00,1.397066160554620851e+00,1.533350868768530884e+00,6.970841109848661954e-02 +2.240000000000000213e+00,1.385176498580572568e+00,1.398941725956053928e+00,6.789068920224745896e-02 +2.259999999999999787e+00,1.373386314799874164e+00,1.315483430698168199e+00,4.651874555391048022e-02 +2.280000000000000249e+00,1.361684259733650704e+00,1.373201771977372143e+00,4.002537228954656384e-02 +2.299999999999999822e+00,1.350060560138784949e+00,1.285003032494459685e+00,3.701739336027300153e-02 +2.320000000000000284e+00,1.338506846958248842e+00,1.337936194998987194e+00,4.090671416312927772e-02 +2.339999999999999858e+00,1.327015994821441502e+00,1.276400380907636301e+00,5.126562293055141051e-02 +2.360000000000000320e+00,1.315581973718558784e+00,1.300022331653995611e+00,5.490064925439376575e-02 +2.379999999999999893e+00,1.304199713110817394e+00,1.333665544228300748e+00,nan +2.400000000000000355e+00,1.292864978438369006e+00,1.293747202699239374e+00,3.450761667350280004e-02 +2.419999999999999929e+00,1.281574259746148581e+00,1.244199007704499316e+00,nan +2.440000000000000391e+00,1.270324671959447871e+00,1.324761412428399510e+00,nan +2.459999999999999964e+00,1.259113866200388898e+00,1.235109059399808418e+00,2.588565954245041975e-02 +2.480000000000000426e+00,1.247939951437686767e+00,1.302786487079408806e+00,nan +2.500000000000000000e+00,1.236801425699423307e+00,1.193293907935522258e+00,4.554179185929631279e-02 +2.520000000000000462e+00,1.225697116046164714e+00,1.166485983069556198e+00,2.969047947990152009e-02 +2.540000000000000036e+00,1.214626126494402270e+00,1.227974018485518126e+00,nan +2.560000000000000497e+00,1.203587793092782743e+00,1.304406572007846821e+00,2.881781570521451882e-02 +2.580000000000000071e+00,1.192581645381629141e+00,1.118044531986680745e+00,5.065123842298295542e-02 +2.600000000000000533e+00,1.181607373505606073e+00,1.176494597509851525e+00,3.111566005871567453e-02 +2.620000000000000107e+00,1.170664800296814967e+00,1.138071003896516276e+00,1.588287971280955349e-02 +2.640000000000000568e+00,1.159753857697950474e+00,1.182284596889840778e+00,3.734954384966652358e-02 +2.660000000000000142e+00,1.148874566950204290e+00,1.142711990112235343e+00,2.222259134544606399e-02 +2.679999999999999716e+00,1.138027022026154800e+00,1.172697850945799924e+00,3.267050585695246284e-02 +2.700000000000000178e+00,1.127211375842620544e+00,1.125106052110490307e+00,2.879231061755915463e-02 +2.719999999999999751e+00,1.116427828841014547e+00,1.063066446130191745e+00,3.453316633484269926e-02 +2.740000000000000213e+00,1.105676619572390829e+00,1.091014756190950008e+00,3.593049971171793922e-02 +2.759999999999999787e+00,1.094958016970547288e+00,1.117608734824651284e+00,3.620445660684060152e-02 +2.780000000000000249e+00,1.084272314038812191e+00,1.070946674106443952e+00,2.945674602064272604e-02 +2.799999999999999822e+00,1.073619822714509731e+00,1.158073375390544557e+00,4.317271897172083456e-02 +2.820000000000000284e+00,1.063000869709371354e+00,1.068532570468847309e+00,3.608477525373053607e-02 +2.839999999999999858e+00,1.052415793154658274e+00,1.073138054167278588e+00,3.920116041946775098e-02 +2.860000000000000320e+00,1.041864939906452348e+00,1.051324237483512336e+00,3.217497612874499480e-02 +2.879999999999999893e+00,1.031348663389939579e+00,1.048394425887490877e+00,2.863637825214125979e-02 +2.900000000000000355e+00,1.020867321881581136e+00,1.043197653893642496e+00,2.303674148816653097e-02 +2.919999999999999929e+00,1.010421277145405083e+00,9.927415994114875408e-01,2.750601863091974020e-02 +2.940000000000000391e+00,1.000010893354289720e+00,1.041076924330221587e+00,2.705651838895691344e-02 +2.959999999999999964e+00,9.896365362395938003e-01,9.525405867745834199e-01,1.703397553553930369e-02 +2.980000000000000426e+00,9.792985724228823186e-01,9.813458308074753944e-01,3.980445657729349968e-02 +3.000000000000000000e+00,9.689973688922464135e-01,9.788892868312137896e-01,3.501065160424287476e-02 +3.019999999999999574e+00,9.587332925929010763e-01,9.791733570405278808e-01,2.074376790171749904e-02 +3.040000000000000924e+00,9.485067101077330198e-01,9.484615006489022226e-01,3.100365139014824370e-02 +3.060000000000000497e+00,9.383179874083366068e-01,9.351859625054994574e-01,3.481369483420125588e-02 +3.080000000000000071e+00,9.281674896610263570e-01,9.057035218946286603e-01,2.772343988678164020e-02 +3.099999999999999645e+00,9.180555810756014790e-01,8.651708341540602643e-01,3.921165500460101205e-02 +3.120000000000000995e+00,9.079826247871616296e-01,9.303704135098522787e-01,2.771220111105161463e-02 +3.140000000000000568e+00,8.979489827634143939e-01,9.193051217201706482e-01,nan +3.160000000000000142e+00,8.879550157315235781e-01,9.352467420486022531e-01,3.588952176393950949e-02 +3.179999999999999716e+00,8.780010831199275279e-01,8.813652204922443056e-01,nan +3.199999999999999289e+00,8.680875430115327918e-01,9.390011623128490248e-01,1.901648106737680380e-02 +3.220000000000000639e+00,8.582147521055579764e-01,8.600902219662046599e-01,4.474787744605149969e-02 +3.240000000000000213e+00,8.483830656859036035e-01,8.523875153073585675e-01,3.323721427792761945e-02 +3.259999999999999787e+00,8.385928375944491364e-01,8.644647000854875918e-01,2.951322030495437398e-02 +3.279999999999999361e+00,8.288444202080517131e-01,8.690682320477158829e-01,2.307464008203872000e-02 +3.300000000000000711e+00,8.191381644183171051e-01,8.079756093381198490e-01,2.212612398486654697e-02 +3.320000000000000284e+00,8.094744196134425751e-01,8.294698933376599692e-01,3.349877650881752378e-02 +3.339999999999999858e+00,7.998535336616054980e-01,7.773164878496438002e-01,2.792524629966190025e-02 +3.359999999999999432e+00,7.902758528955147188e-01,7.987031000048978591e-01,2.519442004395512635e-02 +3.380000000000000782e+00,7.807417220978202232e-01,7.915291710156696636e-01,2.941592650294502787e-02 +3.400000000000000355e+00,7.712514844871715125e-01,7.742913156876776171e-01,3.018585662763769925e-02 +3.419999999999999929e+00,7.618054817047594796e-01,8.052117292761523659e-01,3.534820799363974642e-02 +3.439999999999999503e+00,7.524040538012307655e-01,6.975738328763312346e-01,nan +3.460000000000000853e+00,7.430475392238857779e-01,7.456754448582639805e-01,5.177506811051349467e-02 +3.480000000000000426e+00,7.337362748040932026e-01,7.971989053927358571e-01,nan +3.500000000000000000e+00,7.244705957448805966e-01,7.500214809794597537e-01,2.973325203337600503e-02 +3.519999999999999574e+00,7.152508356086680896e-01,7.405704628922389343e-01,2.644933588288904402e-02 +3.540000000000000924e+00,7.060773263051204340e-01,7.010520360337462176e-01,2.751384680887634590e-02 +3.560000000000000497e+00,6.969503980791000863e-01,7.748798508620088343e-01,2.238844522514950308e-02 +3.580000000000000071e+00,6.878703794987069964e-01,7.037393802241858065e-01,3.022398023282772683e-02 +3.599999999999999645e+00,6.788375974434043281e-01,6.853447274267203682e-01,3.659059286972034947e-02 +3.620000000000000995e+00,6.698523770922192311e-01,6.404736362845638853e-01,4.159673361489631821e-02 +3.640000000000000568e+00,6.609150419120116693e-01,5.796863065161470541e-01,2.310965550118417033e-02 +3.660000000000000142e+00,6.520259136458131932e-01,6.706090484334317203e-01,2.150927154134406327e-02 +3.679999999999999716e+00,6.431853123012337692e-01,6.590807197035827292e-01,nan +3.700000000000001066e+00,6.343935561389366651e-01,6.579262974794020113e-01,3.664203532810380443e-02 +3.720000000000000639e+00,6.256509616611743985e-01,6.362810821527531413e-01,nan +3.740000000000000213e+00,6.169578436003895217e-01,6.329963281254368246e-01,3.069766501312837931e-02 +3.759999999999999787e+00,6.083145149078831304e-01,5.674844485932917237e-01,2.929212140056279556e-02 +3.779999999999999361e+00,5.997212867425509852e-01,6.020347859780290634e-01,nan +3.800000000000000711e+00,5.911784684596717021e-01,5.979345896309717912e-01,nan +3.820000000000000284e+00,5.826863675997813186e-01,5.921153801598458832e-01,2.472956858646184128e-02 +3.839999999999999858e+00,5.742452898775902703e-01,5.168250498864187525e-01,3.932557515162073830e-02 +3.859999999999999432e+00,5.658555391709854110e-01,5.804153801121019196e-01,2.396882167584396886e-02 +3.880000000000000782e+00,5.575174175100824359e-01,5.121237847876588534e-01,2.891589906210544070e-02 +3.900000000000000355e+00,5.492312250663633488e-01,5.784753349303056735e-01,3.652093289156662509e-02 +3.919999999999999929e+00,5.409972601418588933e-01,5.514372689831463781e-01,2.860219042728869357e-02 +3.939999999999999503e+00,5.328158191584188019e-01,4.913097559383847734e-01,2.105444236715212428e-02 +3.960000000000000853e+00,5.246871966470317838e-01,5.158491350566534184e-01,4.172547955572703665e-02 +3.980000000000000426e+00,5.166116852372329982e-01,5.006790115005972375e-01,nan +4.000000000000000000e+00,5.085895756465597106e-01,5.296558230138243006e-01,2.904212449707989363e-02 +4.019999999999999574e+00,5.006211566700918825e-01,5.527501560920484724e-01,3.635397445862977722e-02 +4.040000000000000924e+00,4.927067151700498249e-01,5.121606975311043808e-01,2.941529899783447422e-02 +4.060000000000000497e+00,4.848465360654752865e-01,4.620786835801224401e-01,3.957298503403856166e-02 +4.080000000000000071e+00,4.770409023219653322e-01,5.631392727992968750e-01,3.838219534881322920e-02 +4.099999999999999645e+00,4.692900949414935408e-01,5.001731340426340822e-01,2.855744716291427898e-02 +4.120000000000000995e+00,4.615943929522858813e-01,4.759932526974401945e-01,nan +4.140000000000000568e+00,4.539540733987852406e-01,4.408134938285349635e-01,2.613790641807950052e-02 +4.160000000000000142e+00,4.463694113316689638e-01,4.661151650812371816e-01,nan +4.179999999999999716e+00,4.388406797979540475e-01,4.170876717713623005e-01,3.040067984335515991e-02 +4.200000000000001066e+00,4.313681498311596751e-01,4.673591396611029847e-01,3.570914616284417281e-02 +4.220000000000000639e+00,4.239520904415605695e-01,4.763519606919671134e-01,2.539380830284991911e-02 +4.240000000000000213e+00,4.165927686064918589e-01,4.092221112259533666e-01,2.153629549044803679e-02 +4.259999999999999787e+00,4.092904492607467581e-01,3.932138318109248964e-01,2.659889927443474106e-02 +4.279999999999999361e+00,4.020453952870306491e-01,4.201976468779460294e-01,3.705805557891760083e-02 +4.300000000000000711e+00,3.948578675065055332e-01,3.600548568063514643e-01,2.534162187361805110e-02 +4.320000000000000284e+00,3.877281246693948802e-01,4.174107488585798165e-01,3.201413680890515673e-02 +4.339999999999999858e+00,3.806564234456719653e-01,4.200430502438344815e-01,3.407525487923236673e-02 +4.359999999999999432e+00,3.736430184158152645e-01,3.398977450477670414e-01,4.296195288458046213e-02 +4.380000000000000782e+00,3.666881620616491144e-01,3.616536429337198211e-01,2.686004203200916332e-02 +4.400000000000000355e+00,3.597921047572512077e-01,3.430360370501685519e-01,nan +4.419999999999999929e+00,3.529550947599409128e-01,3.413386186647102138e-01,4.121900071809197347e-02 +4.439999999999999503e+00,3.461773782013377598e-01,2.999581533858771132e-01,nan +4.460000000000000853e+00,3.394591990785065239e-01,4.195723359185571377e-01,2.022926550192588255e-02 +4.480000000000000426e+00,3.328007992451702535e-01,3.306936459388154503e-01,3.731986554227401803e-02 +4.500000000000000000e+00,3.262024184030050122e-01,3.418964395084210794e-01,4.555894651088994440e-02 +4.519999999999999574e+00,3.196642940930061183e-01,3.042222781899822093e-01,nan +4.540000000000000924e+00,3.131866616869440900e-01,2.945166790367058640e-01,3.255860667849282825e-02 +4.560000000000000497e+00,3.067697543788856507e-01,3.353764562249985648e-01,4.859846603864388659e-02 +4.580000000000000071e+00,3.004138031768017747e-01,2.817826789133563992e-01,2.650893947918774030e-02 +4.599999999999999645e+00,2.941190368942476763e-01,3.504444880940863483e-01,3.514300648032542274e-02 +4.620000000000000995e+00,2.878856821421302836e-01,2.864676182506220581e-01,3.684391263051964294e-02 +4.640000000000000568e+00,2.817139633205476557e-01,2.634071544168652967e-01,2.308952122403817517e-02 +4.660000000000000142e+00,2.756041026107118874e-01,2.388318962249805499e-01,2.924970611005074661e-02 +4.679999999999999716e+00,2.695563199669466226e-01,2.575495264893400416e-01,3.474081006829046747e-02 +4.700000000000001066e+00,2.635708331087747158e-01,2.032770265411950605e-01,4.046234879416068381e-02 +4.720000000000000639e+00,2.576478575130769499e-01,2.754515805332317657e-01,nan +4.740000000000000213e+00,2.517876064063357955e-01,2.510993908751023018e-01,3.621635024533232522e-02 +4.759999999999999787e+00,2.459902907569562203e-01,2.865785354726087530e-01,3.795087884160799202e-02 +4.779999999999999361e+00,2.402561192676773150e-01,3.109668724655918481e-01,4.300319067191369499e-02 +4.800000000000000711e+00,2.345852983680514203e-01,1.785131240197480318e-01,3.270046984938442675e-02 +4.820000000000000284e+00,2.289780322070213980e-01,2.571486778006862317e-01,2.718603598796379486e-02 +4.839999999999999858e+00,2.234345226455607403e-01,2.159567786508689025e-01,2.758167160029438916e-02 +4.859999999999999432e+00,2.179549692494162638e-01,1.759826199015482873e-01,3.354081722595050885e-02 +4.880000000000000782e+00,2.125395692819176396e-01,2.354648132723803911e-01,3.584108267447292523e-02 +4.900000000000000355e+00,2.071885176968804032e-01,2.306952276361795284e-01,3.216812284508516889e-02 +4.919999999999999929e+00,2.019020071315820175e-01,1.881273852558159876e-01,2.551945124320626965e-02 +4.939999999999999503e+00,1.966802278998327491e-01,1.949066051071076111e-01,nan +4.960000000000000853e+00,1.915233679851200410e-01,2.230855359433417673e-01,2.520107939712273176e-02 +4.980000000000000426e+00,1.864316130338459221e-01,1.271495425341684082e-01,3.617754646046948686e-02 +5.000000000000000000e+00,1.814051463486365812e-01,1.665385684665513233e-01,2.618489741967522377e-02 diff --git a/tests/test_functionality.py b/tests/test_functionality.py index e17a6c3b..e6a0f82f 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -3,15 +3,15 @@ import polars as pl import pytest +from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.augmentation import ( BaselineShift, - ExponentialNoise, + ExponentialNoise, IndexShift, - NormalNoise, + NormalNoise, SpectrumScale, UniformNoise, ) - from chemotools.baseline import ( AirPls, ArPls, @@ -21,6 +21,7 @@ SubtractReference, ) from chemotools.derivative import NorrisWilliams, SavitzkyGolay +from chemotools.feature_selection import IndexSelector, RangeCut from chemotools.scale import MinMaxScaler, NormScaler, PointScaler from chemotools.scatter import ( ExtendedMultiplicativeScatterCorrection, @@ -29,41 +30,87 @@ StandardNormalVariate, ) from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth -from chemotools.feature_selection import IndexSelector, RangeCut -from tests.fixtures import ( +from chemotools.utils._models import BandedSolvers +from tests.fixtures import reference_airpls # noqa: F401 +from tests.fixtures import reference_arpls # noqa: F401 +from tests.fixtures import reference_msc_mean # noqa: F401 +from tests.fixtures import reference_msc_median # noqa: F401 +from tests.fixtures import reference_sg_15_2 # noqa: F401 +from tests.fixtures import reference_snv # noqa: F401 +from tests.fixtures import reference_whittaker # noqa: F401 +from tests.fixtures import spectrum_arpls # noqa: F401 +from tests.fixtures import spectrum + + +@pytest.mark.parametrize("num_series", [1, 5]) +def test_air_pls( spectrum, - spectrum_arpls, - reference_airpls, - reference_arpls, - reference_msc_mean, - reference_msc_median, - reference_sg_15_2, - reference_snv, - reference_whitakker, -) - - -def test_air_pls(spectrum, reference_airpls): + reference_airpls, # noqa: F811 + num_series: int, +) -> None: # Arrange - air_pls = AirPls() + repetitions = (num_series, 1) + air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) # Act spectrum_corrected = air_pls.fit_transform(spectrum) # Assert - assert np.allclose(spectrum_corrected[0], reference_airpls[0], atol=1e-7) + assert np.allclose( + spectrum_corrected[0], np.tile(reference_airpls, reps=repetitions), atol=1e-7 + ) + + +# FIXME: Deactivated because it fails; Issue created: +# @pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) +# @pytest.mark.parametrize("size", [5_000]) +# def test_air_pls_constant_signal(size: int, fill_value: float) -> None: +# # Arrange +# spectrum = np.full(shape=(size,), fill_value=fill_value).reshape((1, -1)) +# air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) +# # Act +# spectrum_corrected = air_pls.fit_transform(spectrum) -def test_ar_pls(spectrum_arpls, reference_arpls): +# # Assert +# assert np.allclose(spectrum_corrected[0], spectrum[0]) + + +# FIXME: working with such a high ``atol`` indicates that the reference is not up to +# date anymore +@pytest.mark.parametrize("num_series", [1, 5]) +def test_ar_pls( + spectrum_arpls, # noqa: F811 + reference_arpls, # noqa: F811 + num_series: int, +) -> None: # Arrange - arpls = ArPls(1e2, 0.0001) + repetitions = (num_series, 1) + arpls = ArPls(lam=1e2, differences=2, ratio=0.0001) reference = np.array(spectrum_arpls) - np.array(reference_arpls) # Act spectrum_corrected = arpls.fit_transform(spectrum_arpls) # Assert - assert np.allclose(spectrum_corrected[0], reference[0], atol=1e-4) + assert np.allclose( + spectrum_corrected[0], np.tile(reference, reps=repetitions), atol=1e-4 + ) + + +# FIXME: Deactivated because it fails; Issue created: +# @pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) +# @pytest.mark.parametrize("size", [5_000]) +# def test_ar_pls_constant_signal(size: int, fill_value: float) -> None: +# # Arrange +# spectrum = np.full(shape=(size,), fill_value=fill_value).reshape((1, -1)) +# ar_pls = ArPls(lam=1e2, differences=2, ratio=0.0001) + +# # Act +# spectrum_corrected = ar_pls.fit_transform(spectrum) + +# # Assert +# assert np.allclose(spectrum_corrected[0], spectrum[0]) def test_baseline_shift(): @@ -76,9 +123,11 @@ def test_baseline_shift(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.mean(spectrum_corrected[0]) > np.mean(spectrum[0]) + assert np.mean(spectrum_corrected[0]) > np.mean(spectrum[0]) assert np.isclose(np.std(spectrum_corrected[0]), 0.0, atol=1e-8) - assert np.isclose(np.mean(spectrum_corrected[0]) - np.mean(spectrum[0]), 0.77395605, atol=1e-8) + assert np.isclose( + np.mean(spectrum_corrected[0]) - np.mean(spectrum[0]), 0.77395605, atol=1e-8 + ) def test_constant_baseline_correction(): @@ -120,8 +169,7 @@ def test_exponential_noise(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.allclose(np.mean(spectrum_corrected[0])-1, 0.1, atol=1e-2) - + assert np.allclose(np.mean(spectrum_corrected[0]) - 1, 0.1, atol=1e-2) def test_extended_baseline_correction(): @@ -167,7 +215,6 @@ def test_extended_baseline_correction_with_no_reference(): emsc.fit_transform(spectrum) - def test_extended_baseline_correction_with_wrong_reference(): # Arrange spectrum = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]).reshape( @@ -222,7 +269,7 @@ def test_extended_baseline_correction_through_msc(spectrum): def test_extended_baseline_correction_through_msc_median(spectrum): - # EMSC of 0 order should be equivalient to MSC + # EMSC of 0 order should be equivalent to MSC # Arrange msc = MultiplicativeScatterCorrection(use_median=True) emsc = ExtendedMultiplicativeScatterCorrection(order=0, use_median=True) @@ -233,7 +280,6 @@ def test_extended_baseline_correction_through_msc_median(spectrum): # Assert assert np.allclose(spectrum_emsc[0], spectrum_msc, atol=1e-8) - def test_index_selector(): @@ -280,13 +326,16 @@ def test_index_selector_with_wavenumbers(): def test_index_selector_with_wavenumbers_and_dataframe(): # Arrange wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]) - spectrum = pd.DataFrame(np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]])) + spectrum = pd.DataFrame( + np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]]) + ) + # FIXME: this is not used expected = np.array([[1.0, 2.0, 3.0, 34.0, 55.0, 89.0]]) # Act select_features = IndexSelector( features=np.array([1, 2, 3, 8, 9, 10]), wavenumbers=wavenumbers - ).set_output(transform='pandas') + ).set_output(transform="pandas") spectrum_corrected = select_features.fit_transform(spectrum) @@ -524,7 +573,7 @@ def test_normal_noise(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.allclose(np.mean(spectrum_corrected[0])-1, 0, atol=1e-2) + assert np.allclose(np.mean(spectrum_corrected[0]) - 1, 0, atol=1e-2) assert np.allclose(np.std(spectrum_corrected[0]), 0.5, atol=1e-2) @@ -630,7 +679,9 @@ def test_range_cut_by_wavenumber_with_pandas_dataframe(): # Arrange wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]])) - range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='pandas') + range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output( + transform="pandas" + ) # Act spectrum_corrected = range_cut.fit_transform(spectrum) @@ -643,7 +694,9 @@ def test_range_cut_by_wavenumber_with_polars_dataframe(): # Arrange wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] spectrum = pl.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]])) - range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='polars') + range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output( + transform="polars" + ) # Act spectrum_corrected = range_cut.fit_transform(spectrum) @@ -762,16 +815,125 @@ def test_uniform_noise(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.allclose(np.mean(spectrum_corrected[0])-1, 0, atol=1e-2) - assert np.allclose(np.std(spectrum_corrected[0]), np.sqrt(1/3), atol=1e-2) + assert np.allclose(np.mean(spectrum_corrected[0]) - 1, 0, atol=1e-2) + assert np.allclose(np.std(spectrum_corrected[0]), np.sqrt(1 / 3), atol=1e-2) -def test_whitakker_smooth(spectrum, reference_whitakker): +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("with_weights", [True, False]) +@pytest.mark.parametrize("num_series", [1, 5]) +def test_whittaker_smooth( + spectrum, + reference_whittaker, # noqa: F811 + num_series: int, + with_weights: bool, + same_weights_for_all: bool, +) -> None: + # Arrange + repetitions = (num_series, 1) + whittaker_smooth = WhittakerSmooth() + if with_weights and not same_weights_for_all: + weights = np.ones(shape=(num_series, len(spectrum[0]))) + elif with_weights and same_weights_for_all: + weights = np.ones(shape=(len(spectrum[0]),)) + else: + weights = None + + spectrum_to_fit_original = np.tile(spectrum, reps=repetitions) + spectrum_to_fit = spectrum_to_fit_original.copy() + + # Act + spectrum_corrected = whittaker_smooth.fit_transform( + X=spectrum_to_fit, sample_weight=weights + ) + + # Assert + # NOTE: the following test makes sure nothing was overwritten + assert np.array_equal(spectrum_to_fit, spectrum_to_fit_original) + assert np.allclose( + spectrum_corrected, np.tile(reference_whittaker, reps=repetitions), atol=1e-8 + ) + + +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("with_weights", [True, False]) +@pytest.mark.parametrize("num_series", [1, 5]) +def test_whittaker_with_pentapy( + num_series: int, + with_weights: bool, + same_weights_for_all: bool, +): + # this test is skipped with a warning if pentapy is not installed + if not PENTAPY_AVAILABLE: + pytest.skip("Pentapy is not installed, test cannot be performed") + + # Arrange + np.random.seed(42) + spectrum = np.random.rand(num_series, 1000) + whittaker_smooth = WhittakerSmooth(lam=100.0, differences=2) + + weights = None + if with_weights and not same_weights_for_all: + weights = np.ones(shape=(num_series, len(spectrum[0]))) + elif with_weights and same_weights_for_all: + weights = np.ones(shape=(len(spectrum[0]),)) + + # Act with pentapy + spectrum_corr_pentapy = whittaker_smooth.fit_transform( + X=spectrum, sample_weight=weights + ) + + # Assert with pentapy + # NOTE: the weight is not correct since the test only checks the method + solve_method = whittaker_smooth._solve( + lam=whittaker_smooth._lam_internal_.fixed_lambda, + rhs_b_weighted=spectrum.transpose(), + weights=1.0, + )[1] + assert solve_method == BandedSolvers.PENTAPY + + # Act without pentapy + whittaker_smooth._WhittakerLikeSolver__allow_pentapy = False # type: ignore + spectrum_corr_factorized_solve = whittaker_smooth.fit_transform( + spectrum, sample_weight=weights + ) + + # Assert without pentapy + # NOTE: the weight is not correct since the test only checks the method + solve_method = whittaker_smooth._solve( + lam=whittaker_smooth._lam_internal_.fixed_lambda, + rhs_b_weighted=spectrum.transpose(), + weights=1.0, + )[1] + assert solve_method == BandedSolvers.PIVOTED_LU + assert np.allclose(spectrum_corr_pentapy[0], spectrum_corr_factorized_solve[0]) + + +@pytest.mark.parametrize( + "log10_lam", np.arange(start=-25.0, stop=15.0, step=5.0).tolist() +) +@pytest.mark.parametrize("difference", [1, 2]) +@pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) +@pytest.mark.parametrize("num_data", [5_000]) +def test_whittaker_constant_signal( + num_data: int, + fill_value: float, + difference: int, + log10_lam: float, +) -> None: + # Arrange - whitakker_smooth = WhittakerSmooth() + spectrum = np.full(shape=(num_data,), fill_value=fill_value).reshape((1, -1)) + whittaker_smooth = WhittakerSmooth(lam=10.0**log10_lam, differences=difference) # Act - spectrum_corrected = whitakker_smooth.fit_transform(spectrum) + spectrum_corrected = whittaker_smooth.fit_transform(spectrum) # Assert - assert np.allclose(spectrum_corrected[0], reference_whitakker[0], atol=1e-8) + # this test needs to be as strict as possible because the result has to be exact + assert np.allclose( + spectrum_corrected[0], + spectrum[0], + atol=num_data * np.finfo(np.float64).eps, # type: ignore + rtol=1e-6, + ) diff --git a/tests/test_sklearn_compliance.py b/tests/test_sklearn_compliance.py index a4a192b4..b3accf67 100644 --- a/tests/test_sklearn_compliance.py +++ b/tests/test_sklearn_compliance.py @@ -2,13 +2,12 @@ from chemotools.augmentation import ( BaselineShift, - ExponentialNoise, - NormalNoise, + ExponentialNoise, IndexShift, - SpectrumScale, + NormalNoise, + SpectrumScale, UniformNoise, ) - from chemotools.baseline import ( AirPls, ArPls, @@ -20,6 +19,7 @@ SubtractReference, ) from chemotools.derivative import NorrisWilliams, SavitzkyGolay +from chemotools.feature_selection import IndexSelector, RangeCut from chemotools.scale import MinMaxScaler, NormScaler, PointScaler from chemotools.scatter import ( ExtendedMultiplicativeScatterCorrection, @@ -33,9 +33,6 @@ SavitzkyGolayFilter, WhittakerSmooth, ) -from chemotools.feature_selection import RangeCut, IndexSelector - -from tests.fixtures import spectrum # AirPls @@ -49,7 +46,7 @@ def test_compliance_air_pls(): # ArPls def test_compliance_ar_pls(): # Arrange - transformer = ArPls() + transformer = ArPls(differences=1) # Act & Assert check_estimator(transformer) @@ -60,7 +57,7 @@ def test_compliance_baseline_shift(): transformer = BaselineShift() # Act & Assert check_estimator(transformer) - + # ConstantBaselineCorrection def test_compliance_constant_baseline_correction(): @@ -91,7 +88,7 @@ def test_compliance_extended_multiplicative_scatter_correction(): # Arrange transformer = ExtendedMultiplicativeScatterCorrection() # Act & Assert - check_estimator(transformer) + check_estimator(transformer) # IndexSelector @@ -109,6 +106,7 @@ def test_compliance_spectrum_shift(): # Act & Assert check_estimator(transformer) + # LinearCorrection def test_compliance_linear_correction(): # Arrange @@ -196,7 +194,7 @@ def test_compliance_point_scaler(): # Act & Assert check_estimator(transformer) - + # PolynomialCorrection def test_compliance_polynomial_correction(): # Arrange diff --git a/tests/tests_for_utils/__init__.py b/tests/tests_for_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/tests_for_utils/test_banded_linalg.py b/tests/tests_for_utils/test_banded_linalg.py new file mode 100644 index 00000000..049fc813 --- /dev/null +++ b/tests/tests_for_utils/test_banded_linalg.py @@ -0,0 +1,306 @@ +""" +Test suite for the utility functions in the :mod:`chemotools.utils.banded_linalg` +module. + +""" + +### Imports ### + +from typing import List, Union + +import numpy as np +import pytest +from scipy.linalg import solve_banded as scipy_solve_banded + +from chemotools.utils._banded_linalg import ( + _datacopied, + convert_upper_chol_banded_to_lu_banded_storage, + lu_banded, + lu_solve_banded, + slogdet_lu_banded, +) +from tests.tests_for_utils.utils_funcs import get_banded_slogdet + +### Constants ### + +_ARRAY_TO_VIEW: np.ndarray = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) +_VIEW = _ARRAY_TO_VIEW[::] + +### Test Suite ### + + +@pytest.mark.parametrize( + "arr, original, expected", + [ + ( # Number 0 Different arrays + np.array([1, 2, 3]), + np.array([1, 2, 3]), + True, + ), + ( # Number 1 Array and list + np.array([1, 2, 3]), + [1, 2, 3], + True, + ), + ( # Number 2 Different data types + np.array([1, 2, 3]), + np.array([1, 2, 3], dtype=np.float64), + True, + ), + ( # Number 3 Different view and array + _ARRAY_TO_VIEW[0:3], + np.array([1, 2, 3]), + False, + ), + ( # Number 4 Same array + _ARRAY_TO_VIEW, + _ARRAY_TO_VIEW, + False, + ), + ( # Number 5 Same view and array + _VIEW, + _ARRAY_TO_VIEW, + False, + ), + ], +) +def test_datacopied( + arr: np.ndarray, + original: Union[np.ndarray, List], + expected: bool, +) -> None: + """ + Tests the function that checks if a NumPy array has been copied from another array + or list. + + """ + + assert _datacopied(arr, original) == expected + + +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("overwrite_b", [True, False]) +@pytest.mark.parametrize("num_right_hand_sides", [0, 1, 2]) +@pytest.mark.parametrize("num_superdiagonals", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize("num_subdiagonals", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize( + "num_rows", + [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 50, + 51, + 100, + 101, + 500, + 501, + 1_000, + 1001, + 5_000, + 5001, + ], +) +def test_lu_banded_solve( + num_rows: int, + num_subdiagonals: int, + num_superdiagonals: int, + num_right_hand_sides: int, + overwrite_b: bool, + with_finite_check: bool, +) -> None: + """ + Tests the separate LU decomposition followed by solving a system of linear equations + for banded matrices against the SciPy solution. + + NOTE: A number of 0 right-hand sides are used for making the vector to solve a + NOTE: 1D-Array. + + """ + + # if the matrix cannot exist with the given shape, the test is skipped + num_rows_min = num_subdiagonals + num_superdiagonals + 1 + if num_rows < num_rows_min: + pytest.skip( + f"Test skipped because the number of rows {num_rows} is smaller than the " + f"minimum number of rows {num_rows_min} required by the number of sub- " + f"{num_subdiagonals} and superdiagonals {num_superdiagonals}." + ) + + # a random banded matrix and right-hand-side-vector/-matrix are generated + np.random.seed(seed=42) + ab = -1.0 + 2.0 * np.random.rand( + num_subdiagonals + num_superdiagonals + 1, num_rows + ) + b = ( + np.random.rand(num_rows) + if num_right_hand_sides == 0 + else np.random.rand(num_rows, num_right_hand_sides) + ) + + # first, the Scipy solution is computed because if this fails due to singularity, + # the test has to not test for equivalent results, but for failure + # NOTE: failure is indicated by the solution being ``None`` + # NOTE: this order of evaluation is also better for testing if the overwrite flag + # is working correctly because otherwise SciPy would get the overwritten b + l_and_u = (num_subdiagonals, num_superdiagonals) + x_reference = None + try: + x_reference = scipy_solve_banded( + l_and_u=l_and_u, + ab=ab, + b=b, + check_finite=True, + ) + + # NOTE: even if SciPy computes the solution "successfully", there might be NaNs + # NOTE: in the result, so the test has to check for that as well + if np.any(np.isnan(x_reference)): + x_reference = None + + except np.linalg.LinAlgError: + pass + + # the banded matrix is LU decomposed with the respective Chemotools function + lu_factorization = lu_banded( + l_and_u=l_and_u, + ab=ab, + check_finite=with_finite_check, + ) + + # the linear system is solved with the respective Chemotools function + # Case 1: Scipy failed + if x_reference is None: + # in this case, the Chemotools function has to raise an exception as well + with pytest.raises(np.linalg.LinAlgError): + x_chemotools = lu_solve_banded( + lub_factorization=lu_factorization, + b=b, + overwrite_b=overwrite_b, + check_finite=with_finite_check, + ) + return + + # Case 2: Scipy succeeded + # in this case, the Chemotools function has to return the same result as Scipy + x_chemotools = lu_solve_banded( + lub_factorization=lu_factorization, + b=b, + overwrite_b=overwrite_b, + check_finite=with_finite_check, + ) + + # NOTE: the following check has to be fairly strict when it comes to equivalence + # NOTE: since the SciPy and Chemotools are basically doing the same under the hood + # NOTE: when it comes to the solution process (first LU, then triangular solve) + assert np.allclose( + x_chemotools, + x_reference, + atol=1e-10, + rtol=1e-10, + ) + + +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("ensure_positive_definite", [True, False]) +@pytest.mark.parametrize("num_sub_and_superdiagonals", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize( + "num_rows", + [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 50, + 51, + 100, + 101, + 500, + 501, + 1_000, + 1_001, + 5_000, + 5_001, + ], +) +def test_lu_banded_slogdet( + num_rows: int, + num_sub_and_superdiagonals: int, + ensure_positive_definite: bool, + with_finite_check: bool, +) -> None: + """ + Tests the computation of the sign and log determinant of a banded matrix from + its LU decomposition by comparing it to NumPy's ``slogdet``. + + """ + + # if the matrix cannot exist with the given shape, the test is skipped + n_rows_min = 2 * num_sub_and_superdiagonals + 1 + if num_rows < n_rows_min: + pytest.skip( + f"Test skipped because the number of rows {num_rows} is smaller than the " + f"minimum number of rows {n_rows_min} required by the number of sub- " + f"{num_sub_and_superdiagonals} and superdiagonals " + f"{num_sub_and_superdiagonals}." + ) + + # a random banded matrix is generated in the upper banded storage used for Cholesky + # decomposition + np.random.seed(seed=42) + # NOTE: the diagonal lifting ensures that the matrix is positive and diagonally + # dominant, which makes it positive definite, but this is only done if the + # flag is set + # NOTE: for an indefinite matrix, the matrix is shifted and scaled to be in the + # interval [-1, 1] + ab_for_chol = np.random.rand(num_sub_and_superdiagonals + 1, num_rows) + if ensure_positive_definite: + ab_for_chol[num_sub_and_superdiagonals, ::] += 1.0 + 2.0 * float( + num_sub_and_superdiagonals + ) + else: + ab_for_chol = -1.0 + 2.0 * ab_for_chol + + l_and_u, ab_for_lu = convert_upper_chol_banded_to_lu_banded_storage(ab=ab_for_chol) + + # first, the log determinant is computed with the literal definition as the sum of + # the logarithms of the eigenvalues of the matrix + sign_reference, logabsdet_reference = get_banded_slogdet(ab=ab_for_chol) + + # the banded matrix is LU decomposed with the respective Chemotools function ... + lu_fact = lu_banded( + l_and_u=l_and_u, + ab=ab_for_lu, + check_finite=with_finite_check, + ) + # ... and the sign and log determinant are computed + sign_chemotools, logabsdet_chemotools = slogdet_lu_banded(lub_factorization=lu_fact) + + # the results are compared + assert np.isclose( + sign_chemotools, + sign_reference, + atol=1e-5, + rtol=1e-5, + ) + assert np.isclose( + logabsdet_chemotools, + logabsdet_reference, + atol=1e-5, + rtol=1e-5, + ) diff --git a/tests/tests_for_utils/test_check_inputs.py b/tests/tests_for_utils/test_check_inputs.py new file mode 100644 index 00000000..749e4399 --- /dev/null +++ b/tests/tests_for_utils/test_check_inputs.py @@ -0,0 +1,209 @@ +""" +Test suite for the utility models in the :mod:`chemotools.utils.check_inputs` module. + +""" + +### Imports ### + +from typing import Optional, Tuple, Union + +import numpy as np +import pytest + +from chemotools.utils.check_inputs import check_weights + +### Test Suite ### + + +@pytest.mark.parametrize( + "weights, expected_result", + [ + ( # Number 0 (no weights; for all) + None, + (None, True), + ), + ( # Number 1 (valid 1D-weights; for all) + np.array([1.0, 2.0, 3.0]), + (np.array([[1.0, 2.0, 3.0]]), True), + ), + ( # Number 2 (valid 2D-weights; for all) + np.array([[1.0, 2.0, 3.0]]), + (np.array([[1.0, 2.0, 3.0]]), True), + ), + ( # Number 3 (valid 2D-weights; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]), + (np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]), False), + ), + ( # Number 4 (invalid 1D-weights with wrong column number; for all) + np.array([1.0, 2.0, 3.0, 4.0]), + ValueError("Weights must have 3 columns, but they have"), + ), + ( # Number 5 (invalid 1D-weights with wrong column number; for all) + np.array([1.0, 2.0]), + ValueError("Weights must have 3 columns, but they have"), + ), + ( # Number 6 (invalid 2D-weights with wrong column number; for all) + np.array([[1.0, 2.0, 3.0, 4.0]]), + ValueError("Weights must have 3 columns, but they have"), + ), + ( # Number 7 (invalid 2D-weights with wrong column number; for all) + np.array([[1.0, 2.0]]), + ValueError("Weights must have 3 columns, but they have"), + ), + ( # Number 8 (invalid 2D-weights with wrong row number; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), + ValueError("Weights must have either 1 or 3 rows, but they have"), + ), + ( # Number 9 (invalid 2D-weights with wrong row number; individual) + np.array( + [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0]] + ), + ValueError("Weights must have either 1 or 3 rows, but they have"), + ), + ( + # Number 10 (invalid 2D-weights with wrong row and column number; + # individual) + np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), + ValueError("Weights must have either 1 or 3 rows, but they have"), + ), + ( + # Number 10 (invalid 2D-weights with wrong row and column number; + # individual) + np.array([[1.0, 2.0], [3.0, 4.0]]), + ValueError("Weights must have either 1 or 3 rows, but they have"), + ), + ( + # Number 11 (invalid 2D-weights with wrong row and column number; + # individual) + np.array( + [ + [1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0], + [13.0, 14.0, 15.0, 16.0], + ] + ), + ValueError("Weights must have either 1 or 3 rows, but they have"), + ), + ( + # Number 12 (invalid 2D-weights with wrong row and column number; + # individual) + np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]), + ValueError("Weights must have either 1 or 3 rows, but they have"), + ), + ( # Number 13 (invalid 1D-weights with negative entry; for all) + np.array([1.0, 2.0, -1_000.0]), + ValueError("Weights may not be negative, but"), + ), + ( # Number 14 (invalid 2D-weights with negative entry; for all) + np.array([[1.0, 2.0, -1_000.0]]), + ValueError("Weights may not be negative, but"), + ), + ( # Number 15 (invalid 2D-weights with negative entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, -1_000.0], [7.0, 8.0, 9.0]]), + ValueError("Weights may not be negative, but"), + ), + ( # Number 16 (invalid 1D-weights with NaN entry; for all) + np.array([1.0, 2.0, np.nan]), + ValueError("Input contains NaN"), + ), + ( # Number 17 (invalid 2D-weights with NaN entry; for all) + np.array([[1.0, 2.0, np.nan]]), + ValueError("Input contains NaN"), + ), + ( # Number 18 (invalid 2D-weights with NaN entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, np.nan], [7.0, 8.0, 9.0]]), + ValueError("Input contains NaN"), + ), + ( # Number 19 (invalid 1D-weights with inf entry; for all) + np.array([1.0, 2.0, np.inf]), + ValueError("Input contains infinity or a value too large"), + ), + ( # Number 20 (invalid 2D-weights with inf entry; for all) + np.array([[1.0, 2.0, np.inf]]), + ValueError("Input contains infinity or a value too large"), + ), + ( # Number 21 (invalid 2D-weights with inf entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, np.inf], [7.0, 8.0, 9.0]]), + ValueError("Input contains infinity or a value too large"), + ), + ( # Number 22 (invalid 1D-weights with -inf entry; for all) + np.array([1.0, 2.0, -np.inf]), + ValueError("Input contains infinity or a value too large"), + ), + ( # Number 23 (invalid 2D-weights with -inf entry; for all) + np.array([[1.0, 2.0, -np.inf]]), + ValueError("Input contains infinity or a value too large"), + ), + ( # Number 24 (invalid 2D-weights with -inf entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, -np.inf], [7.0, 8.0, 9.0]]), + ValueError("Input contains infinity or a value too large"), + ), + ( # Number 25 (invalid 1D-weights with all zero entries; for all) + np.array([0.0, 0.0, 0.0]), + ValueError("At least one weights needs to be > 0, but"), + ), + ( # Number 26 (invalid 2D-weights with all zero entries; for all) + np.array([[0.0, 0.0, 0.0]]), + ValueError("At least one weights needs to be > 0, but"), + ), + ( # Number 27 (invalid 2D-weights with all zero entries; individual) + np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), + ValueError("At least one weights needs to be > 0, but"), + ), + ], +) +def test_weight_checks( + weights: Optional[np.ndarray], + expected_result: Union[Tuple[Optional[np.ndarray], bool], Exception], +) -> None: + """ + Tests the function :func:`chemotools.utils.check_inputs.check_weights` for different + valid and invalid input combinations. + + """ + + # the size of the matrix against which the weights are checked is set + n_samples, n_features = 3, 3 + + # if the expected output is an exception, the test is run in a context manager to + # check if the respective exception is raised + if isinstance(expected_result, Exception): + error_catch_phrase = str(expected_result) + with pytest.raises( + type(expected_result), + match=error_catch_phrase, + ): + check_weights( + weights=weights, + n_samples=n_samples, + n_features=n_features, + ) + + return + + # otherwise, the output is compared to the expected output + ref_weights, ref_same_weights_for_all = expected_result + checked_weights, same_weights_for_all = check_weights( + weights=weights, + n_samples=n_samples, + n_features=n_features, + ) + + # Case 1: the reference weights are None + if ref_weights is None: + assert checked_weights is None + assert same_weights_for_all is ref_same_weights_for_all + + return + + # Case 2: the reference weights are an Array and the checked weights are as well + if isinstance(ref_weights, np.ndarray) and isinstance(checked_weights, np.ndarray): + assert np.array_equal(checked_weights, ref_weights) + assert same_weights_for_all is ref_same_weights_for_all + + return + + raise AssertionError( + "The weights could not be checked correctly due to a type mismatch." + ) diff --git a/tests/tests_for_utils/test_finite_differences.py b/tests/tests_for_utils/test_finite_differences.py new file mode 100644 index 00000000..ccc3ded7 --- /dev/null +++ b/tests/tests_for_utils/test_finite_differences.py @@ -0,0 +1,453 @@ +""" +Test suite for the utility functions in the :mod:`chemotools.utils.finite_differences` +module. + +""" + +### Imports ### + +from typing import List, Optional + +import numpy as np +import pytest + +from chemotools.utils._finite_differences import ( + central_finite_difference_coefficients, + estimate_noise_stddev, + forward_finite_difference_kernel, + squared_forward_difference_matrix_banded, +) +from tests.fixtures import noise_level_estimation_references # noqa: F401 +from tests.fixtures import noise_level_estimation_signal # noqa: F401 +from tests.fixtures import reference_finite_differences # noqa: F401 +from tests.tests_for_utils.utils_funcs import ( + conv_upper_cho_banded_storage_to_sparse, + multiply_vect_with_squared_forward_finite_differences_original_first, + multiply_vect_with_squared_forward_finite_differences_transpose_first, +) +from tests.tests_for_utils.utils_models import ( + NoiseEstimationReference, + RefDifferenceKernel, +) + +### Test Suite ### + + +# parametrizes the fixture ``reference_finite_differences`` +@pytest.mark.parametrize("kind", ["forward"]) +def test_forward_diff_kernel( + reference_finite_differences: List[RefDifferenceKernel], # noqa: F811 +) -> None: + """ + Tests the calculation of the forward finite difference kernel. + + """ + + # each kernel is calculated and compared to the reference + for kernel_reference in reference_finite_differences: + kernel_chemotools = forward_finite_difference_kernel( + differences=kernel_reference.differences + ) + + # first, the size of the kernel is checked ... + assert kernel_chemotools.size == kernel_reference.size, ( + f"Difference order {kernel_reference.differences} with accuracy 1 - " + f"Expected kernel size {kernel_reference.size} but got " + f"{kernel_chemotools.size}" + ) + # ... followed by the comparison of the kernel itself + assert np.allclose(kernel_chemotools, kernel_reference.kernel, atol=1e-8), ( + f"Difference order {kernel_reference.differences} with accuracy 1 - " + f"Expected kernel {kernel_reference.kernel.tolist()} but got " + f"{kernel_chemotools.tolist()}" + ) + + +# parametrizes the fixture ``reference_finite_differences`` +@pytest.mark.parametrize("kind", ["central"]) +def test_central_diff_kernel( + reference_finite_differences: List[RefDifferenceKernel], # noqa: F811 +) -> None: + """ + Tests the calculation of the central finite difference kernel. + + """ + + # each kernel is calculated and compared to the reference + for kernel_reference in reference_finite_differences: + kernel_chemotools = central_finite_difference_coefficients( + differences=kernel_reference.differences, + accuracy=kernel_reference.accuracy, + ) + + # first, the size of the kernel is checked ... + assert kernel_chemotools.size == kernel_reference.size, ( + f"Difference order {kernel_reference.differences} with accuracy " + f"{kernel_reference.accuracy} - Expected kernel size " + f"{kernel_reference.size} but got {kernel_chemotools.size}" + ) + # ... followed by the comparison of the kernel itself + assert np.allclose(kernel_chemotools, kernel_reference.kernel, atol=1e-8), ( + f"Difference order {kernel_reference.differences} with accuracy " + f"{kernel_reference.accuracy} - Expected kernel " + f"{kernel_reference.kernel.tolist()} but got {kernel_chemotools.tolist()}" + ) + + +@pytest.mark.parametrize( + "num_additional_values", + list(range(0, 11)) + list(range(20, 101, 10)) + list(range(200, 1001, 100)), +) +@pytest.mark.parametrize("differences", list(range(1, 11))) +def test_squ_fw_fin_diff_mat_cho_banded_orig_first( + differences: int, + num_additional_values: int, +) -> None: + """ + Tests the generation of the squared forward finite difference matrix ``D @ D.T`` + where ``D`` is the forward finite difference matrix. + Here, the original matrix ``D`` and not its transpose is used first. + + It can be effectively tested by means of a convolution of the matrix with a vector + after it was converted from the banded storage to a sparse matrix. + + """ + + # first, the finite difference kernel is calculated + kernel = forward_finite_difference_kernel(differences=differences) + + # then, the banded matrix D @ D.T is generated ... + num_data = kernel.size + num_additional_values + d_dot_dt_banded = squared_forward_difference_matrix_banded( + num_data=num_data, + differences=differences, + original_first=True, + ) + # ... and converted to a sparse matrix + d_dot_dt_sparse = conv_upper_cho_banded_storage_to_sparse(ab=d_dot_dt_banded) + + # a random vector is created + np.random.seed(42) + vector = np.random.rand(num_additional_values + 1) + + # this vector is multiplied with the matrix + result_chemotools = d_dot_dt_sparse @ vector + + # afterwards, the result is compared to the result of the convolution + result_reference = ( + multiply_vect_with_squared_forward_finite_differences_original_first( + differences=differences, + kernel=kernel, + vector=vector, + ) + ) + + # the results are compared + # NOTE: the following check has to be fairly strict when it comes to equivalence + # since the NumPy and Chemotools are basically doing the same under the hood + assert np.allclose( + result_chemotools, + result_reference, + atol=1e-10, + rtol=1e-10, + ) + + +@pytest.mark.parametrize( + "num_additional_values", + list(range(0, 11)) + list(range(20, 101, 10)) + list(range(200, 1001, 100)), +) +@pytest.mark.parametrize("differences", list(range(1, 11))) +def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( + differences: int, + num_additional_values: int, +) -> None: + """ + Tests the generation of the squared forward finite difference matrix ``D.T @ D`` + where ``D`` is the forward finite difference matrix. + Here, the transpose matrix ``D.T`` and not the original matrix is used first. + + It can be effectively tested by means of a convolution of the matrix with a vector + after it was converted from the banded storage to a sparse matrix. + + """ + + # first, the finite difference kernel is calculated + kernel = forward_finite_difference_kernel(differences=differences) + + # then, the banded matrix D.T @ D is generated ... + num_data = kernel.size + num_additional_values + dt_dot_d_banded = squared_forward_difference_matrix_banded( + num_data=num_data, + differences=differences, + original_first=False, + ) + # ... and converted to a sparse matrix + dt_dot_d_sparse = conv_upper_cho_banded_storage_to_sparse(ab=dt_dot_d_banded) + + # a random vector is created + np.random.seed(42) + vector = np.random.rand(num_data) + + # this vector is multiplied with the matrix + result_chemotools = dt_dot_d_sparse @ vector + + # afterwards, the result is compared to the result of the convolution + result_reference = ( + multiply_vect_with_squared_forward_finite_differences_transpose_first( + differences=differences, + kernel=kernel, + vector=vector, + ) + ) + + # the results are compared + # NOTE: the following check has to be fairly strict when it comes to equivalence + # since the NumPy and Chemotools are basically doing the same under the hood + assert np.allclose(result_chemotools, result_reference, atol=1e-10, rtol=1e-10) + + +@pytest.mark.parametrize( + "series, differences, accuracy, window_size, stddev_power, stddev_min", + [ + ( # Number 0 series is too small for difference kernel + np.arange(start=0, stop=5), + 10, + 2, + 3, + 1, + 1e-10, + ), + ( # Number 1 series is too small for difference kernel + np.arange(start=0, stop=5), + 10, + 2, + None, + 1, + 1e-10, + ), + ( # Number 2 series is too small for window size + np.arange(start=0, stop=5), + 1, + 2, + 11, + 1, + 1e-10, + ), + ( # Number 3 the difference order is 0 + np.arange(start=0, stop=10), + 0, + 2, + 3, + 1, + 1e-10, + ), + ( # Number 4 the difference order is negative + np.arange(start=0, stop=10), + -1, + 2, + 3, + 1, + 1e-10, + ), + ( # Number 5 the accuracy is odd + np.arange(start=0, stop=10), + 2, + 3, + 3, + 1, + 1e-10, + ), + ( # Number 6 the accuracy is odd + np.arange(start=0, stop=10), + 2, + 5, + 3, + 1, + 1e-10, + ), + ( # Number 7 the accuracy is 1 + np.arange(start=0, stop=10), + 2, + 1, + 3, + 1, + 1e-10, + ), + ( # Number 8 the accuracy is 0 + np.arange(start=0, stop=10), + 2, + 0, + 3, + 1, + 1e-10, + ), + ( # Number 9 the accuracy is negative + np.arange(start=0, stop=10), + 2, + -1, + 3, + 1, + 1e-10, + ), + ( # Number 10 the window size is even + np.arange(start=0, stop=10), + 1, + 2, + 6, + 1, + 1e-10, + ), + ( # Number 11 the window size is 0 + np.arange(start=0, stop=10), + 1, + 2, + 0, + 1, + 1e-10, + ), + ( # Number 12 the window size is negative + np.arange(start=0, stop=10), + 1, + 2, + -1, + 1, + 1e-10, + ), + ( # Number 13 the power is -3 + np.arange(start=0, stop=10), + 1, + 2, + 3, + -3, + 1e-10, + ), + ( # Number 14 the power is 3 + np.arange(start=0, stop=10), + 1, + 2, + 3, + 3, + 1e-10, + ), + ( # Number 15 the minimum standard deviation is zero + np.arange(start=0, stop=5), + 1, + 2, + 3, + 1, + 0.0, + ), + ( # Number 16 the minimum standard deviation is negative + np.arange(start=0, stop=5), + 1, + 2, + 3, + 1, + -10.0, + ), + ], +) +def test_estimate_noise_stddev_invalid_input( + series: np.ndarray, + differences: int, + accuracy: int, + window_size: Optional[int], + stddev_power: int, + stddev_min: float, +) -> None: + """ + Tests the input validation of the function :func:`estimate_noise_stddev`. + + The combinations of + + - the series length, + - the difference order, + - the accuracy, + - the window size, + - the power to which the noise level is raised, and + - the minimum standard deviation + + are chosen such that the input is invalid. + + """ + + with pytest.raises(ValueError): + estimate_noise_stddev( + series=series, + differences=differences, + differences_accuracy=accuracy, + window_size=window_size, + stddev_power=stddev_power, # type: ignore + stddev_min=stddev_min, + ) + + return + + +def test_noise_level_estimation( + noise_level_estimation_signal: np.ndarray, # noqa: F811 + noise_level_estimation_references: List[NoiseEstimationReference], # noqa: F811 +) -> None: + """ + Tests the noise level estimation function :func:`estimate_noise_stddev`. + + The function is tested for all the reference noise levels. + + """ + + for noise_level_reference in noise_level_estimation_references: + # the noise level is estimated + noise_level_chemotools = estimate_noise_stddev( + series=noise_level_estimation_signal, + differences=noise_level_reference.differences, + differences_accuracy=noise_level_reference.accuracy, + window_size=noise_level_reference.window_size, + stddev_min=noise_level_reference.min_noise_level, + ) + # then, the noise level itself is compared to the reference in a quite strict + # way because both results were computed in the same way with the only + # difference being that Chemotools uses Python and the reference uses + # LibreOffice Calc + assert np.allclose( + noise_level_chemotools, + noise_level_reference.noise_level, + rtol=1e-12, + ), ( + f"Original noise level differs from reference noise for differences " + f"{noise_level_reference.differences} with accuracy " + f"{noise_level_reference.accuracy} and window size " + f"{noise_level_reference.window_size} given a minimum standard deviation " + f"of {noise_level_reference.min_noise_level}." + ) + + # then, all the available powers to which the noise level can be raised are + # compared to the reference + for ( + stddev_power, + raised_noise_level_ref, + ) in noise_level_reference.raised_noise_levels.items(): + raised_noise_level = estimate_noise_stddev( + series=noise_level_estimation_signal, + differences=noise_level_reference.differences, + differences_accuracy=noise_level_reference.accuracy, + window_size=noise_level_reference.window_size, + stddev_min=noise_level_reference.min_noise_level, + stddev_power=stddev_power, + ) + + # again, the comparison is quite strict + assert np.allclose( + raised_noise_level, + raised_noise_level_ref, + atol=1e-12, + ), ( + f"Raised noise level differs from reference noise for differences " + f"{noise_level_reference.differences} with accuracy " + f"{noise_level_reference.accuracy} and window size " + f"{noise_level_reference.window_size} given a minimum standard " + f"deviation of {noise_level_reference.min_noise_level} and a power of " + f"{stddev_power}." + ) + + return diff --git a/tests/tests_for_utils/test_models.py b/tests/tests_for_utils/test_models.py new file mode 100644 index 00000000..69f308ca --- /dev/null +++ b/tests/tests_for_utils/test_models.py @@ -0,0 +1,294 @@ +""" +Test suite for the utility models in the :mod:`chemotools.utils.models` module. + +""" + +### Imports ### + +from math import log +from typing import List, Tuple, Union + +import pytest + +from chemotools.utils import _models +from tests.tests_for_utils.utils_models import ExpectedWhittakerSmoothLambda + +### Type aliases ### + +_RealNumeric = Union[float, int] +_LambdaValueNumeric = Union[_RealNumeric, Tuple[_RealNumeric, _RealNumeric]] +_LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] +_WhittakerMethod = Union[str, _models.WhittakerSmoothMethods] +_WhittakerMethodSequence = List[_WhittakerMethod] + +### Constants ### + +_NAN: float = float("nan") +_FIXED_WHITTAKER_METHODS: _WhittakerMethodSequence = [ + "fixed", + _models.WhittakerSmoothMethods.FIXED, +] +_LOGML_WHITTAKER_METHODS: _WhittakerMethodSequence = [ + "logml", + _models.WhittakerSmoothMethods.LOGML, +] +# NOTE: "aauto" is not a typo, but helps to not confuse it with "all" +_aauto_whittaker_methods: _WhittakerMethodSequence = _LOGML_WHITTAKER_METHODS + [] +_all_whittaker_methods: _WhittakerMethodSequence = ( + _FIXED_WHITTAKER_METHODS + _aauto_whittaker_methods +) + + +### Test Suite ### + + +@pytest.mark.parametrize( + "lam, methods, expected", + [ + ( # Number 0 (fixed float; fixed method) + 100.0, + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 1 (fixed integer; fixed method) + 100, + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 2 (coinciding floats; fixed method) + (100.0, 100.0), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 3 (coinciding integers; fixed method) + (100, 100), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 4 (virtually coinciding floats; fixed method) + (100.0, 100.000001), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 5 (virtually coinciding floats; automated methods) + (100.0, 100.000001), + _aauto_whittaker_methods, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 6 (flipped virtually coinciding floats; fixed method) + (100.000001, 100.0), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 7 (flipped virtually coinciding floats; automated methods) + (100.000001, 100.0), + _aauto_whittaker_methods, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 8 (search space floats; logml method) + (100.0, 10_000.0), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 9 (search space integers; logml method) + (100, 10_000), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 10 (flipped search space floats; logml method) + (10_000.0, 100.0), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 11 (flipped search space integers; logml method) + (10_000, 100), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 12 (fixed zero float; fixed method) + 0.0, + _FIXED_WHITTAKER_METHODS, + ValueError("has to be greater than or equal to the zero tolerance"), + ), + ( # Number 13 (fixed zero integer; fixed method) + 0, + _FIXED_WHITTAKER_METHODS, + ValueError("has to be greater than or equal to the zero tolerance"), + ), + ( # Number 14 (search space floats; fixed method) + (100.0, 10_000.0), + _FIXED_WHITTAKER_METHODS, + ValueError("for the penalty weight lambda are a search space"), + ), + ( + # Number 15 (search space integers; fixed method) + (100, 10_000), + _FIXED_WHITTAKER_METHODS, + ValueError("for the penalty weight lambda are a search space"), + ), + ( # Number 16 (fixed float; automated method) + 100.0, + _aauto_whittaker_methods, + ValueError("was selected for a fixed penalty weight"), + ), + ( + # Number 17 (fixed integer; automated method) + 100, + _aauto_whittaker_methods, + ValueError("was selected for a fixed penalty weight"), + ), + ( # Number 18 (search space floats with zero; all methods) + (0.0, 100.0), + _all_whittaker_methods, + ValueError("have to be greater than or equal to the zero tolerance"), + ), + ( # Number 19 (search space integers with zero; all methods) + (0, 100), + _all_whittaker_methods, + ValueError("have to be greater than or equal to the zero tolerance"), + ), + ( # Number 20 (flipped search space floats with zero; all methods) + (100.0, 0.0), + _all_whittaker_methods, + ValueError("have to be greater than or equal to the zero tolerance"), + ), + ( # Number 21 (flipped search space integer with zero; all methods) + (100, 0), + _all_whittaker_methods, + ValueError("have to be greater than or equal to the zero tolerance"), + ), + ( # Number 22 (all float zeros; all methods) + (0.0, 0.0), + _all_whittaker_methods, + ValueError("have to be greater than or equal to the zero tolerance"), + ), + ( # Number 23 (all float integers; all methods) + (0, 0), + _all_whittaker_methods, + ValueError("have to be greater than or equal to the zero tolerance"), + ), + ( # Number 24 (wrong type; all methods) + "error", + _all_whittaker_methods, + TypeError("have to be either a scalar or a tuple of two values"), + ), + ( # Number 25 (fixed float; wrong method) + 100.0, + "error", + ValueError("is not valid. Please choose one of the following"), + ), + ( # Number 26 (fixed integer; wrong method) + 100, + "error", + ValueError("is not valid. Please choose one of the following"), + ), + ], +) +def test_whittaker_smooth_lambda_model( + lam: _LambdaValueNumericOrFlawed, + methods: _WhittakerMethodSequence, + expected: Union[ExpectedWhittakerSmoothLambda, Exception], +) -> None: + """ + Tests the class :class:`WhittakerSmoothLambda` for the correct behavior of its + ``__post_init__`` method. + + """ + + # if the expected result is an exception, it is tested whether the correct exception + # is raised + if isinstance(expected, Exception): + error_catch_phrase = str(expected) + for meth in methods: + with pytest.raises( + type(expected), + match=error_catch_phrase, + ): + _models.WhittakerSmoothLambda( + bounds=lam, # type: ignore + method=meth, # type: ignore + ) + + return + + # if the expected result is a valid result, the class is instantiated and the + # generated object is compared to the expected result + for meth in methods: + lambda_model = _models.WhittakerSmoothLambda( + bounds=lam, # type: ignore + method=meth, # type: ignore + ) + + expected.assert_is_equal_to(other=lambda_model) diff --git a/tests/tests_for_utils/test_whittaker_base.py b/tests/tests_for_utils/test_whittaker_base.py new file mode 100644 index 00000000..0c0592a7 --- /dev/null +++ b/tests/tests_for_utils/test_whittaker_base.py @@ -0,0 +1,603 @@ +""" +Test suite for the utility functions in the :mod:`chemotools.utils.whittaker_base` +module. + +""" + +### Imports ### + +from math import log +from typing import Any, Tuple, Union + +import numpy as np +import pytest + +from chemotools.utils import _models +from chemotools.utils._whittaker_base.auto_lambda.shared import ( + smooth_weighted_sum_of_squared_residuals, +) +from chemotools.utils._whittaker_base.initialisation import ( + get_checked_lambda, + get_penalty_log_pseudo_determinant, +) +from chemotools.utils._whittaker_base.main import WhittakerLikeSolver +from chemotools.utils._whittaker_base.misc import get_weight_generator +from chemotools.utils._whittaker_base.solvers import solve_normal_equations +from tests.fixtures import noise_level_whittaker_auto_lambda # noqa: F401 +from tests.fixtures import spectrum_whittaker_auto_lambda # noqa: F401 +from tests.tests_for_utils.utils_funcs import ( + find_whittaker_smooth_opt_lambda_log_marginal_likelihood, +) +from tests.tests_for_utils.utils_models import ExpectedWhittakerSmoothLambda + +### Type Aliases ### + +_RealNumeric = Union[float, int] +_WhittakerMethod = Union[str, _models.WhittakerSmoothMethods] +_LambdaSpecs = Union[_RealNumeric, Tuple[_RealNumeric, _RealNumeric, _WhittakerMethod]] +_LambdaSpecsOrFlawed = Union[_LambdaSpecs, str] + +### Constants ### + +_NAN: float = float("nan") + +### Test Suite ### + + +@pytest.mark.parametrize( + "lam, expected_result", + [ + ( # Number 0 (fixed float) + 100.0, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 1 (fixed integer) + 100, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 2 (float search space, log marginal likelihood method enum) + (100.0, 10_000.0, _models.WhittakerSmoothMethods.LOGML), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 3 (float search space, log marginal likelihood method string) + (100.0, 10_000.0, "logml"), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 4 (integer search space, log marginal likelihood method enum) + (100, 10_000, _models.WhittakerSmoothMethods.LOGML), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 5 (integer search space, log marginal likelihood method string) + (100, 10_000, "logml"), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 6 (dataclass float specification; fixed method) + _models.WhittakerSmoothLambda( + bounds=100.0, + method=_models.WhittakerSmoothMethods.FIXED, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 7 (dataclass integer specification; fixed method) + _models.WhittakerSmoothLambda( + bounds=100, + method=_models.WhittakerSmoothMethods.FIXED, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=_models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 8 (dataclass float specification; log marginal likelihood method) + _models.WhittakerSmoothLambda( + bounds=(100.0, 10_000.0), + method=_models.WhittakerSmoothMethods.LOGML, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 9 (dataclass integer specification; log marginal likelihood method) + _models.WhittakerSmoothLambda( + bounds=(100, 10_000), + method=_models.WhittakerSmoothMethods.LOGML, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=_models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 10 (wrong length tuple) + (100.0, 10_000.0), + ValueError("must be a tuple of three elements"), + ), + ( # Number 11 (wrong type) + "error", + TypeError("must be an integer, a float, a tuple of"), + ), + ], +) +def test_get_checked_lambda( + lam: _LambdaSpecsOrFlawed, + expected_result: Union[ExpectedWhittakerSmoothLambda, Exception], +) -> None: + """ + Tests the function that casts a penalty weight lambda to the respective dataclass. + + """ + + # if the expected output is an exception, the test is run in a context manager + if isinstance(expected_result, Exception): + error_catch_phrase = str(expected_result) + with pytest.raises(type(expected_result), match=error_catch_phrase): + get_checked_lambda(lam=lam) + + return + + # otherwise, the output dataclass is compared to the expected output + lambda_model = get_checked_lambda(lam=lam) + if isinstance(lambda_model, _models.WhittakerSmoothLambda): + expected_result.assert_is_equal_to(other=lambda_model) + + return + + raise AssertionError( + "The lambda value could not be checked correctly since the returned value is " + "not an instance of the class 'WhittakerSmoothLambda'." + ) + + +@pytest.mark.parametrize( + "weights, expected_output", + [ + (None, 1.0), # Number 0 (no weights) + ( # Number 1 (2D weights) + np.ones(shape=(10, 1_000), dtype=np.float64), + np.ones(shape=(1_000), dtype=np.float64), + ), + ( # Number 2 (2D weights out of bounds) + np.ones(shape=(5, 1_000), dtype=np.float64), + IndexError("is out of bounds for axis 0 with size"), + ), + ( # Number 3 (1D weights) + np.ones(shape=(1_000), dtype=np.float64), + ValueError("If provided as an Array, the weights must be a 2D-Array"), + ), + ( # Number 4 (3D weights) + np.ones(shape=(1, 5, 1_000), dtype=np.float64), + ValueError("If provided as an Array, the weights must be a 2D-Array"), + ), + ( # Number 5 (wrong type) + "error", + TypeError("must either be None or a NumPy-2D-Array"), + ), + ], +) +def test_weight_generator_identical_weights( + weights: Any, + expected_output: Union[np.ndarray, float, Exception], +) -> None: + """ + Tests the weight generator when provided with weights that are identical for all + signals. + + """ + + # the number of series is defined + num_series = 10 + + # if the expected output is an exception, the test is run in a context manager + if isinstance(expected_output, Exception): + error_catch_phrase = str(expected_output) + with pytest.raises(type(expected_output), match=error_catch_phrase): + for _ in get_weight_generator(weights=weights, num_series=num_series): + pass + + return + + # otherwise, the output is compared to the expected output + # Case 1: the expected output is a scalar + if isinstance(expected_output, (float, int)): + for wght in get_weight_generator(weights=weights, num_series=num_series): + assert isinstance(wght, (float, int)) + assert wght == expected_output + + return + + # Case 2: the expected output is an array + for wght in get_weight_generator(weights=weights, num_series=num_series): + assert isinstance(wght, np.ndarray) + assert np.array_equal(wght, expected_output) + + +def test_weight_generator_different_weights() -> None: + """ + Tests the weight generator when provided with weights that are different for each + signal. + + """ + + # the weights are defined + weights = np.array( + [ + [1.0, 2.0, 3.0, 4.0, 5.0], + [6.0, 7.0, 8.0, 9.0, 10.0], + [11.0, 12.0, 13.0, 14.0, 15.0], + ] + ) + weights_reference = weights.copy() + + # the generator is tested + for index, wght in enumerate( + get_weight_generator(weights=weights, num_series=weights.shape[0]) + ): + assert np.array_equal(wght, weights_reference[index, ::]) + + +@pytest.mark.parametrize( + "with_weights, weighted_residual_sum_of_squared_expected", + [ + (True, 244_9755_000.0), + (False, 490_000.0), + ], +) +def test_smooth_weighted_residual_sum_of_squares( + with_weights: bool, + weighted_residual_sum_of_squared_expected: float, +) -> None: + """ + Tests the weighted residual sum of squares calculation. + + """ + + # two series are generated where the difference between the elements is 7.0 + np.random.seed(42) + num_data = 10_000 + a_signs = np.random.choice([-1.0, 1.0], size=(num_data,), replace=True) + a_series = a_signs * 4.5 + b_series = (-1.0) * a_signs * 2.5 + + # the weights are generated + weights = ( + np.arange(start=0, stop=num_data, step=1.0, dtype=np.float64) + if with_weights + else 1.0 + ) + + # the wrss is calculated ... + weighted_sum_of_squared_residuals_chemotools = ( + smooth_weighted_sum_of_squared_residuals( + rhs_b=a_series, + rhs_b_smooth=b_series, + weights=weights, + ) + ) + + # ... and compared to the expected value with a very strict tolerance + assert np.isclose( + weighted_sum_of_squared_residuals_chemotools, + weighted_residual_sum_of_squared_expected, + atol=1e-13, + rtol=0.0, + ) + + +# TODO: due to ill-conditioning, this is highly limited in the differences and number +# of data points; in the future, this should be tackled by QR-decomposition for +# extra numerical stability +@pytest.mark.parametrize( + "differences, num_data_from, num_data_to", + [ + (1, 0, 2_000), + (1, 2_001, 4_000), + (1, 4_001, 6_000), + (1, 6_001, 8_000), + (1, 8_001, 10_000), + (2, 0, 2_000), + (2, 2_001, 4_000), + (2, 4_001, 6_000), + (2, 6_001, 8_000), + (2, 8_001, 10_000), + ], +) +def test_penalty_log_pseudo_det_can_compute( + differences: int, + num_data_from: int, + num_data_to: int, +) -> None: + """ + Tests the log pseudo-determinant of the penalty matrix for all the difference orders + and number of data points. + The test is successful if the function does not raise an exception. + + """ + + for num_data in range(max(differences + 1, num_data_from), num_data_to + 1): + get_penalty_log_pseudo_determinant( + num_data=num_data, + differences=differences, + dtype=np.float64, + ) + + +# TODO: this test will not 100% reflect reality as intended; in the future this should +# be tested with the LAPACK function ``dgbcon`` to check the condition number; +# right now, it is set to a number of data points that causes the intended +# failure, but in the future, the condition number has to be used to detect +# ill-conditioning +def test_penalty_log_pseudo_determinant_breaks_ill_conditioned() -> None: + """ + Tests that the log pseudo-determinant of the penalty matrix breaks when the matrix + is ill-conditioned. + + """ + + # the difference order and number of data points are set so high that the matrix + # becomes ill-conditioned + num_data = 1_000 + differences = 10 + + # the function is tested for breaking + with pytest.raises( + RuntimeError, + match="The pseudo-determinant of the penalty D.T @ D matrix is negative", + ): + get_penalty_log_pseudo_determinant( + num_data=num_data, + differences=differences, + dtype=np.float64, + ) + + return + + +# TODO: this test will not 100% reflect reality as intended; in the future this should +# be tested with the LAPACK function ``dgbcon`` to check the condition number; +# right now, the matrix is heavily altered to cause the intended failure, but in +# the future, the condition number has to be used to detect ill-conditioning +@pytest.mark.parametrize("with_pentapy", [True, False]) +def test_normal_condition_solve_breaks_ill_conditioned(with_pentapy: bool) -> None: + """ + Tests that the normal condition solver breaks when the matrix is ill-conditioned. + + Note that the conditions for the solver to break will never be met in practice. + + """ + + # if pentapy is not installed but required, the test is skipped + if with_pentapy: + try: + import pentapy # noqa: F401 + except ImportError: + pytest.skip("Pentapy is not installed.") + + # a banded ill-conditioned matrix is created that has zeros on the diagonal + num_data = 10_000 + differences = 2 + a_banded = np.ones(shape=(2 * differences + 1, num_data), dtype=np.float64) + a_banded[differences, :] = 0.0 + + # some further required variables are initialised + lam = 1e100 + b_vect = np.ones(shape=(num_data,), dtype=np.float64) + weights = 0.0 + + # Test that the solver breaks + with pytest.raises( + RuntimeError, match="failed to solve the linear system of equations" + ): + solve_normal_equations( + lam=lam, + differences=differences, + l_and_u=(differences, differences), + penalty_matrix_banded=a_banded, + rhs_b_weighted=b_vect, + weights=weights, + pentapy_enabled=with_pentapy, + ) + + +def test_whittakerlike_issues_warning_difference_order_too_high() -> None: + """ + Tests that the class :class:`WhittakerLikeSolver` issues a warning when the + difference order is greater than 2. + + """ + + with pytest.warns( + UserWarning, + match="WARNING: With the current implementation, the numerical stability", + ): + whittaker_base = WhittakerLikeSolver() + whittaker_base._setup_for_fit( + num_data=500, + differences=3, + lam=_models.WhittakerSmoothLambda( + bounds=(100.0, 10_000.0), + method=_models.WhittakerSmoothMethods.LOGML, + ), + child_class_name="pytest_run", + ) + + return + + +# TODO: this can only go to differences 2 due to ill-conditioning; in the future, this +# should be tackled by QR-decomposition for extra numerical stability +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("differences", [1, 2]) +def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( + differences: int, + same_weights_for_all: bool, +) -> None: + """ + Tests that the automatic lambda calculation using the log marginal likelihood method + refuses to work with no weights. + + """ + + # the smoother is initialised ... + num_data = 500 + whittaker_base = WhittakerLikeSolver() + whittaker_base._setup_for_fit( + num_data=num_data, + differences=differences, + lam=_models.WhittakerSmoothLambda( + bounds=(100.0, 10_000.0), + method=_models.WhittakerSmoothMethods.LOGML, + ), + child_class_name="pytest_run", + ) + + # ... and the log marginal likelihood method is called without weights + np.random.seed(42) + X = np.random.rand(num_data) + with pytest.raises( + ValueError, + match="is only possible if weights are provided", + ): + whittaker_base._whittaker_solve( + X=X, + weights=None, + use_same_w_for_all=same_weights_for_all, + ) + + +@pytest.mark.parametrize("with_zero_weights", [True, False]) +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("differences", [1, 2]) +@pytest.mark.parametrize("num_series", [1, 5]) +def test_auto_lambda_log_marginal_likelihood( + spectrum_whittaker_auto_lambda: np.ndarray, # noqa: F811 + noise_level_whittaker_auto_lambda: np.ndarray, # noqa: F811 + num_series: int, + differences: int, + same_weights_for_all: bool, + with_zero_weights: bool, +) -> None: + """ + Tests the automatic lambda calculation using the log marginal likelihood method. + + Some of the noise standard deviations in the respective fixture are set to NaN which + allows for two different ways of handling them: + + - with zero weights, which will set the weights of the NaN values to zero, or + - interpolated weights, which will replace the NaN values with linearly interpolated + values which cannot be zero. + + This has slightly different effects on the log marginal likelihood calculation. + + Everything is tested against a from-scratch implementation based on SciPy to ensure + that the test is decoupled from the actual implementation used in Chemotools. + + """ + + # first of all, the Nan values in the noise level are handled + noise_level = noise_level_whittaker_auto_lambda.copy() + + # Case 1: Zero weights + if with_zero_weights: + # this can be achieved by replacing the NaN-values with +inf + noise_level = np.where(np.isnan(noise_level), np.inf, noise_level) + + # Case 2: Interpolated weights + else: + # the NaN-values are replaced by linearly interpolated values + nan_flags = np.isnan(noise_level) + noise_level[nan_flags] = np.interp( + x=np.where(nan_flags)[0], + xp=np.where(~nan_flags)[0], + fp=noise_level[~nan_flags], + ) + + # then, the weights are computed as the square of the inverse noise level ... + weights = (1.0 / np.square(noise_level))[np.newaxis, ::] + # ... and stacked as many times as required + weights = np.tile(weights, reps=(num_series, 1)) + + # then, the spectrum is repeated as many times as required + X = np.tile(spectrum_whittaker_auto_lambda[np.newaxis, ::], reps=(num_series, 1)) + + # the smoothing is performed using the chemotools implementation + lambda_bounds = (1e-15, 1e10) + whittaker_base = WhittakerLikeSolver() + whittaker_base._setup_for_fit( + num_data=X.shape[1], + differences=differences, + lam=_models.WhittakerSmoothLambda( + bounds=lambda_bounds, + method=_models.WhittakerSmoothMethods.LOGML, + ), + child_class_name="pytest_run", + ) + _, lambda_opts = whittaker_base._whittaker_solve( + X=X, + weights=weights, + use_same_w_for_all=same_weights_for_all, + ) + + # the reference optimum lambda is found by a from-scratch implementation that relies + # on dense matrices + lambda_opt_ref, _, _ = find_whittaker_smooth_opt_lambda_log_marginal_likelihood( + b_vect=X[0, ::], + weight_vect=weights[0, ::], + differences=differences, + log_lambda_bounds=(log(lambda_bounds[0]), log(lambda_bounds[1])), + num_optimizations=100, + ) + + # the results are compared with 1% relative tolerance + for lam_opts in lambda_opts: + assert np.isclose(lam_opts, lambda_opt_ref, rtol=1e-2) diff --git a/tests/tests_for_utils/utils_funcs.py b/tests/tests_for_utils/utils_funcs.py new file mode 100644 index 00000000..ddc5dcb0 --- /dev/null +++ b/tests/tests_for_utils/utils_funcs.py @@ -0,0 +1,1301 @@ +""" +This script implements utility functions required for testing the +:mod:`chemotools.utils` module. + +It contains doctests itself that are executed when running the script, but they are +automatically tested when running the whole test suite as well. This ensures that the +test utilities are working as expected as well. + +""" + +### Imports ### + +from math import exp, isnan +from typing import Tuple, Union + +import numpy as np +from scipy.linalg import eigvals_banded +from scipy.optimize import brute, minimize_scalar +from scipy.sparse import csc_matrix, csr_matrix +from scipy.sparse import diags as sp_diags +from scipy.sparse import linalg as spla + +from chemotools.utils._finite_differences import forward_finite_difference_kernel +from chemotools.utils._whittaker_base import WhittakerLikeSolver + +### Utility Functions ### + + +def float_is_bit_equal(value: float, reference: float) -> bool: + """ + Checks if two floating-point numbers are equal up to the last bit and handles the + case of NaN values as well. + + Doctests + -------- + >>> # Imports + >>> from tests.tests_for_utils.utils_funcs import float_is_bit_equal + + >>> # Test 1 + >>> float_is_bit_equal(value=1.0, reference=1.0) + True + + >>> # Test 2 + >>> float_is_bit_equal(value=1.0, reference=10.0) + False + + >>> # Test 3 + >>> float_is_bit_equal(value=1.0, reference=float("nan")) + False + + >>> # Test 4 + >>> float_is_bit_equal(value=float("nan"), reference=float("nan")) + True + + >>> # Test 5 + >>> float_is_bit_equal(value=float("nan"), reference=1.0) + False + + """ + + if isnan(reference): + return isnan(value) + + return value == reference + + +def conv_upper_cho_banded_storage_to_sparse(ab: np.ndarray) -> csr_matrix: + """ + Converts a banded matrix stored in the upper banded storage used for LAPACK's banded + Cholesky decomposition to a sparse ``CSR`` matrix. + For more information on the banded storage, please see the documentation of + :func:`chemotools.utils.banded_linalg.conv_upper_chol_banded_to_lu_banded_storage`. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from numpy import nan + >>> from tests.tests_for_utils.utils_funcs import ( + ... conv_upper_cho_banded_storage_to_sparse, + ... ) + + >>> # Generating a set of test matrices + >>> # Matrix 1 + >>> ab = np.array( + ... [ + ... [nan, nan, 1., 2., 3.], + ... [nan, 4., 5., 6., 7.], + ... [ 8., 9., 10., 11., 12.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[ 8., 4., 1., 0., 0.], + [ 4., 9., 5., 2., 0.], + [ 1., 5., 10., 6., 3.], + [ 0., 2., 6., 11., 7.], + [ 0., 0., 3., 7., 12.]]) + + >>> # Matrix 2 + >>> ab = np.array( + ... [ + ... [nan, nan, nan, 1.], + ... [nan, nan, 2., 3.], + ... [nan, 4., 5., 6.], + ... [ 7., 8., 9., 10.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[ 7., 4., 2., 1.], + [ 4., 8., 5., 3.], + [ 2., 5., 9., 6.], + [ 1., 3., 6., 10.]]) + + >>> # Matrix 3 + >>> ab = np.array( + ... [ + ... [1., 2., 3., 4., 5.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[1., 0., 0., 0., 0.], + [0., 2., 0., 0., 0.], + [0., 0., 3., 0., 0.], + [0., 0., 0., 4., 0.], + [0., 0., 0., 0., 5.]]) + + >>> # Matrix 4 + >>> ab = np.array( + ... [ + ... [nan, 1.], + ... [ 2., 3.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[2., 1.], + [1., 3.]]) + + >>> # Matrix 5 + >>> ab = np.array( + ... [ + ... [nan, nan, nan, nan, nan, nan, nan, nan, nan, 1.], + ... [nan, nan, nan, nan, nan, nan, nan, nan, 2., 3.], + ... [nan, nan, nan, nan, nan, nan, nan, 4., 5., 6.], + ... [nan, nan, nan, nan, nan, nan, 7., 8., 9., 10.], + ... [nan, nan, nan, nan, nan, 11., 12., 13., 14., 15.], + ... [nan, nan, nan, nan, 16., 17., 18., 19., 20., 21.], + ... [nan, nan, nan, 22., 23., 24., 25., 26., 27., 28.], + ... [nan, nan, 29., 30., 31., 32., 33., 34., 35., 36.], + ... [nan, 37., 38., 39., 40., 41., 42., 43., 44., 45.], + ... [46., 47., 48., 49., 50., 51., 52., 53., 54., 55.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[46., 37., 29., 22., 16., 11., 7., 4., 2., 1.], + [37., 47., 38., 30., 23., 17., 12., 8., 5., 3.], + [29., 38., 48., 39., 31., 24., 18., 13., 9., 6.], + [22., 30., 39., 49., 40., 32., 25., 19., 14., 10.], + [16., 23., 31., 40., 50., 41., 33., 26., 20., 15.], + [11., 17., 24., 32., 41., 51., 42., 34., 27., 21.], + [ 7., 12., 18., 25., 33., 42., 52., 43., 35., 28.], + [ 4., 8., 13., 19., 26., 34., 43., 53., 44., 36.], + [ 2., 5., 9., 14., 20., 27., 35., 44., 54., 45.], + [ 1., 3., 6., 10., 15., 21., 28., 36., 45., 55.]]) + + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab[6::]).toarray() + array([[46., 37., 29., 22., 0., 0., 0., 0., 0., 0.], + [37., 47., 38., 30., 23., 0., 0., 0., 0., 0.], + [29., 38., 48., 39., 31., 24., 0., 0., 0., 0.], + [22., 30., 39., 49., 40., 32., 25., 0., 0., 0.], + [ 0., 23., 31., 40., 50., 41., 33., 26., 0., 0.], + [ 0., 0., 24., 32., 41., 51., 42., 34., 27., 0.], + [ 0., 0., 0., 25., 33., 42., 52., 43., 35., 28.], + [ 0., 0., 0., 0., 26., 34., 43., 53., 44., 36.], + [ 0., 0., 0., 0., 0., 27., 35., 44., 54., 45.], + [ 0., 0., 0., 0., 0., 0., 28., 36., 45., 55.]]) + + """ + + # the offset vector is initialised + num_diagonals, num_columns = ab.shape + num_diagonals -= 1 + main_diagonal_index = num_diagonals + offsets = np.arange( + start=-num_diagonals, + stop=num_diagonals + 1, + step=1, + dtype=np.int64, + ) + + # then, the list of diagonals is created + diagonals = [] + # the subdiagonals are added first ... + for offset in range(num_diagonals, 0, -1): + diagonals.append(ab[main_diagonal_index - offset, offset:num_columns]) + + # ... followed by the main diagonal ... + diagonals.append(ab[main_diagonal_index, ::]) + + # ... and finally the superdiagonals + for offset in range(1, num_diagonals + 1): + diagonals.append(ab[main_diagonal_index - offset, offset:num_columns]) + + # the sparse matrix is created + return sp_diags( # type: ignore + diagonals=diagonals, + offsets=offsets, # type: ignore + shape=(num_columns, num_columns), + format="csr", + ) + + +def conv_lu_banded_storage_to_sparse( + ab: np.ndarray, + l_and_u: Tuple[int, int], +) -> csr_matrix: + """ + Converts a banded matrix stored in the banded storage used for LAPACK's banded LU + decomposition into a sparse ``CSR`` matrix. + For more information on the banded storage, please see the documentation of + :func:`chemotools.utils.banded_linalg.conv_upper_chol_banded_to_lu_banded_storage`. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from numpy import nan + >>> from tests.tests_for_utils.utils_funcs import ( + ... conv_lu_banded_storage_to_sparse, + ... ) + + >>> # Generating a set of test matrices + >>> # Matrix 1 + >>> l_and_u = (1, 2) + >>> ab = np.array( + ... [ + ... [nan, nan, 1., 2., 3.], + ... [nan, 4., 5., 6., 7.], + ... [ 8., 9., 10., 11., 12.], + ... [13., 14., 15., 16., nan], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[ 8., 4., 1., 0., 0.], + [13., 9., 5., 2., 0.], + [ 0., 14., 10., 6., 3.], + [ 0., 0., 15., 11., 7.], + [ 0., 0., 0., 16., 12.]]) + + >>> # Matrix 2 + >>> l_and_u = (2, 1) + >>> ab = np.array( + ... [ + ... [nan, 1., 2., 3., 4.], + ... [ 5., 6., 7., 8., 9.], + ... [10., 11., 12., 13., nan], + ... [14., 15., 16., nan, nan], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[ 5., 1., 0., 0., 0.], + [10., 6., 2., 0., 0.], + [14., 11., 7., 3., 0.], + [ 0., 15., 12., 8., 4.], + [ 0., 0., 16., 13., 9.]]) + + >>> # Matrix 3 + >>> l_and_u = (0, 0) + >>> ab = np.array( + ... [ + ... [1., 2., 3., 4., 5.], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[1., 0., 0., 0., 0.], + [0., 2., 0., 0., 0.], + [0., 0., 3., 0., 0.], + [0., 0., 0., 4., 0.], + [0., 0., 0., 0., 5.]]) + + >>> # Matrix 5 + >>> l_and_u = (5, 4) + >>> ab = np.array( + ... [ + ... [nan, nan, nan, nan, 1., 2., 3., 4., 5.], + ... [nan, nan, nan, 6., 7., 8., 9., 10., 11.], + ... [nan, nan, 12., 13., 14., 15., 16., 17., 18.], + ... [nan, 19., 20., 21., 22., 23., 24., 25., 26.], + ... [27., 28., 29., 30., 31., 32., 33., 34., 35.], + ... [36., 37., 38., 39., 40., 41., 42., 43., nan], + ... [44., 45., 46., 47., 48., 49., 50., nan, nan], + ... [51., 52., 53., 54., 55., 56., nan, nan, nan], + ... [57., 58., 59., 60., 61., nan, nan, nan, nan], + ... [62., 63., 64., 65., nan, nan, nan, nan, nan], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[27., 19., 12., 6., 1., 0., 0., 0., 0.], + [36., 28., 20., 13., 7., 2., 0., 0., 0.], + [44., 37., 29., 21., 14., 8., 3., 0., 0.], + [51., 45., 38., 30., 22., 15., 9., 4., 0.], + [57., 52., 46., 39., 31., 23., 16., 10., 5.], + [62., 58., 53., 47., 40., 32., 24., 17., 11.], + [ 0., 63., 59., 54., 48., 41., 33., 25., 18.], + [ 0., 0., 64., 60., 55., 49., 42., 34., 26.], + [ 0., 0., 0., 65., 61., 56., 50., 43., 35.]]) + + >>> l_and_u = (1, 4) + >>> conv_lu_banded_storage_to_sparse(ab=ab[0:6, ::], l_and_u=l_and_u).toarray() + array([[27., 19., 12., 6., 1., 0., 0., 0., 0.], + [36., 28., 20., 13., 7., 2., 0., 0., 0.], + [ 0., 37., 29., 21., 14., 8., 3., 0., 0.], + [ 0., 0., 38., 30., 22., 15., 9., 4., 0.], + [ 0., 0., 0., 39., 31., 23., 16., 10., 5.], + [ 0., 0., 0., 0., 40., 32., 24., 17., 11.], + [ 0., 0., 0., 0., 0., 41., 33., 25., 18.], + [ 0., 0., 0., 0., 0., 0., 42., 34., 26.], + [ 0., 0., 0., 0., 0., 0., 0., 43., 35.]]) + + >>> l_and_u = (2, 1) + >>> conv_lu_banded_storage_to_sparse(ab=ab[3:7, ::], l_and_u=l_and_u).toarray() + array([[27., 19., 0., 0., 0., 0., 0., 0., 0.], + [36., 28., 20., 0., 0., 0., 0., 0., 0.], + [44., 37., 29., 21., 0., 0., 0., 0., 0.], + [ 0., 45., 38., 30., 22., 0., 0., 0., 0.], + [ 0., 0., 46., 39., 31., 23., 0., 0., 0.], + [ 0., 0., 0., 47., 40., 32., 24., 0., 0.], + [ 0., 0., 0., 0., 48., 41., 33., 25., 0.], + [ 0., 0., 0., 0., 0., 49., 42., 34., 26.], + [ 0., 0., 0., 0., 0., 0., 50., 43., 35.]]) + + """ + + # the offset vector is initialised + num_subdiagonals, num_superdiagonals = l_and_u + main_diagonal_index = num_superdiagonals + num_columns = ab.shape[1] + offsets = np.arange( + start=-num_subdiagonals, + stop=num_superdiagonals + 1, + step=1, + dtype=np.int64, + ) + + # then, the list of diagonals is created + diagonals = [] + # the subdiagonals are added first ... + for offset in range(num_subdiagonals, 0, -1): + diagonals.append(ab[main_diagonal_index + offset, 0 : num_columns - offset]) + + # ... followed by the main diagonal ... + diagonals.append(ab[main_diagonal_index, ::]) + + # ... and finally the superdiagonals + for offset in range(1, num_superdiagonals + 1): + diagonals.append(ab[main_diagonal_index - offset, offset:num_columns]) + + # the matrix is created from the diagonals + return sp_diags( # type: ignore + diagonals=diagonals, + offsets=offsets, # type: ignore + shape=(num_columns, num_columns), + format="csr", + ) + + +def multiply_vect_with_squared_forward_finite_differences_original_first( + differences: int, + kernel: np.ndarray, + vector: np.ndarray, +) -> np.ndarray: + """ + Multiplies a vector with the squared forward finite difference matrix ``D @ D.T`` + where ``D`` is the forward finite difference matrix. + Here, the original matrix ``D`` and not its transpose is used first. + + This is the same operation as a convolution with the flipped kernel after zero- + padding the vector. Then, the result is again convolved with the kernel, but this + time there is neither zero-padding nor flipping involved. + ``y = D.T @ x`` is the zero-padding and flipping operation, and ``D @ y`` is the + convolution without zero-padding and flipping. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from tests.tests_for_utils.utils_funcs import ( + ... multiply_vect_with_squared_forward_finite_differences_original_first, + ... ) + + >>> # All the following tests were checked using LibreOffice Calc + + >>> # Test 1 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([1, 2]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([0, 3]) + + >>> # Test 2 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([-10, 3, 11]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-23, 5, 19]) + + >>> # Test 3 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([ 25, 17, -13, -12]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 33, 22, -31, -11]) + + >>> # Test 4 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([1, 2, 3]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 1, -4, 11]) + + >>> # Test 5 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([-10, 3, 11, 27]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-61, 41, -64, 121]) + + >>> # Test 6 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 69, 42, -35, -155, 263]) + + >>> # Test 7 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([1, 2, 3, 4]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 4, 4, -24, 46]) + + >>> # Test 8 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([-10, 3, 11, 27, -5]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-206, 212, -320, 478, -442]) + + >>> # Test 9 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38, -8]) + >>> multiply_vect_with_squared_forward_finite_differences_original_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 179, 50, 51, -586, 965, -789]) + + + """ + + # first, the zero-padded vector is convolved with the flipped kernel + vector_padded = np.pad( + vector, + pad_width=(differences, differences), + mode="constant", + constant_values=0, + ) + # NOTE: since NumPy already flips the kernel internally, there is no need to flip it + vector_convolved = np.convolve(vector_padded, kernel, mode="valid") + + # then, the result is convolved with the kernel + # NOTE: here, the kernel has to be flipped to counteract NumPy's internal flipping + return np.convolve(vector_convolved, np.flip(kernel), mode="valid") + + +def multiply_vect_with_squared_forward_finite_differences_transpose_first( + differences: int, + kernel: np.ndarray, + vector: np.ndarray, +) -> np.ndarray: + """ + Multiplies a vector with the squared forward finite difference matrix ``D.T @ D`` + where ``D`` is the forward finite difference matrix. + Here, the transpose matrix ``D.T`` and not the original matrix is used first. + + This is the same operation as a convolution with the kernel followed by another + convolution with the flipped kernel with an intermediate zero-padding. + ``y = D @ x`` is the convolution with the kernel, and ``D.T @ y`` is the convolution + with the flipped kernel and zero-padding. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from tests.tests_for_utils.utils_funcs import ( + ... multiply_vect_with_squared_forward_finite_differences_transpose_first, + ... ) + + >>> # Test 1 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([1, 2]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-1, 1]) + + >>> # Test 2 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([-10, 3, 11]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-13, 5, 8]) + + >>> # Test 3 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([ 25, 17, -13, -12]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 8, 22, -31, 1]) + + >>> # Test 4 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([1, 2, 3]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([0, 0, 0]) + + >>> # Test 5 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([-10, 3, 11, 27]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ -5, 18, -21, 8]) + + >>> # Test 6 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-22, 75, -35, -67, 49]) + + >>> # Test 7 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([1, 2, 3, 4]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([0, 0, 0, 0]) + + >>> # Test 8 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([-10, 3, 11, 27, -5]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ -13, 95, -207, 181, -56]) + + >>> # Test 9 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38, -8]) + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ -53, 141, 40, -436, 453, -145]) + + """ + + # first, the vector is convolved with the kernel + # NOTE: here, the kernel has to be flipped to counteract NumPy's internal flipping + vector_convolved = np.convolve(vector, np.flip(kernel), mode="valid") + + # then, the result is convolved with the flipped kernel and zero-padded + vector_padded = np.pad( + vector_convolved, + pad_width=(differences, differences), + mode="constant", + constant_values=0, + ) + # NOTE: since NumPy already flips the kernel internally, there is no need to flip it + return np.convolve(vector_padded, kernel, mode="valid") + + +def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: + """ + Computes the sign and the logarithm of the determinant of a banded matrix stored + in the upper banded storage used for LAPACK's banded Cholesky decomposition. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from tests.tests_for_utils.utils_funcs import ( + ... conv_upper_cho_banded_storage_to_sparse, + ... get_banded_slogdet, + ... ) + + >>> # Generating a set of test matrices + >>> np.random.seed(42) + + >>> # Matrix 1 (positive definite) + >>> semi_bw_plus_one = 3 + >>> # NOTE: the diagonal lifting makes the matrix positive definite + >>> ab_for_chol = np.random.rand(semi_bw_plus_one, 100) + >>> ab_for_chol[semi_bw_plus_one - 1, ::] += 1.0 + 2.0 * float(semi_bw_plus_one) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, 200.55218150013826) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, 200.55218150013826) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 2 (positive definite) + >>> semi_bw_plus_one = 5 + >>> ab_for_chol = np.random.rand(semi_bw_plus_one, 1000) + >>> ab_for_chol[semi_bw_plus_one - 1, ::] += 1.0 + 2.0 * float(semi_bw_plus_one) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, 2432.2672133727287) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, 2432.267213372733) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 3 (positive definite) + >>> semi_bw_plus_one = 1 + >>> ab_for_chol = np.random.rand(semi_bw_plus_one, 5000) + >>> ab_for_chol[semi_bw_plus_one - 1, ::] += 1.0 + 2.0 * float(semi_bw_plus_one) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, 6234.8131295042585) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, 6234.8131295042585) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 4 (indefinite) + >>> semi_bw_plus_one = 2 + >>> ab_for_chol = -1.0 + 2.0 * np.random.rand(semi_bw_plus_one, 1000) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (-1.0, -437.7731132082764) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (-1.0, -437.7731132082757) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 5 (indefinite) + >>> semi_bw_plus_one = 1 + >>> ab_for_chol = -1.0 + 2.0 * np.random.rand(semi_bw_plus_one, 5000) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, -5001.0078551404185) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, -5001.007855140422) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + """ + # since the log determinant can be expressed as the sum of the logarithms of the + # absolute eigenvalues, an eigenvalue evaluation is sufficient to determine the + # sign and the log determinant + eigenvalues = eigvals_banded( + a_band=ab, + lower=False, + select="a", + ) + if np.count_nonzero(eigenvalues < 0.0) % 2 == 0: # type: ignore + sign = 1.0 + else: + sign = -1.0 + + with np.errstate(divide="ignore", over="ignore"): + logabsdet = np.log(np.abs(eigenvalues)).sum() # type: ignore + + return sign, logabsdet + + +def get_sparse_forward_finite_difference_matrix( + num_data: int, + differences: int, +) -> csc_matrix: + """ + Creates a dense forward finite difference matrix ``D`` of a given difference order. + + Doctests + -------- + >>> # Imports + >>> from tests.tests_for_utils.utils_funcs import ( + ... get_sparse_forward_finite_difference_matrix, + ... ) + + >>> # Matrix 1 + >>> num_data, differences = 5, 1 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() + array([[-1., 1., 0., 0., 0.], + [ 0., -1., 1., 0., 0.], + [ 0., 0., -1., 1., 0.], + [ 0., 0., 0., -1., 1.]]) + + >>> # Matrix 2 + >>> num_data, differences = 10, 1 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() + array([[-1., 1., 0., 0., 0., 0., 0., 0., 0., 0.], + [ 0., -1., 1., 0., 0., 0., 0., 0., 0., 0.], + [ 0., 0., -1., 1., 0., 0., 0., 0., 0., 0.], + [ 0., 0., 0., -1., 1., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 0., -1., 1., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 0., -1., 1., 0., 0., 0.], + [ 0., 0., 0., 0., 0., 0., -1., 1., 0., 0.], + [ 0., 0., 0., 0., 0., 0., 0., -1., 1., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 0., -1., 1.]]) + + >>> # Matrix 3 + >>> num_data, differences = 5, 2 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() + array([[ 1., -2., 1., 0., 0.], + [ 0., 1., -2., 1., 0.], + [ 0., 0., 1., -2., 1.]]) + + >>> # Matrix 4 + >>> num_data, differences = 10, 2 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() + array([[ 1., -2., 1., 0., 0., 0., 0., 0., 0., 0.], + [ 0., 1., -2., 1., 0., 0., 0., 0., 0., 0.], + [ 0., 0., 1., -2., 1., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 1., -2., 1., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 1., -2., 1., 0., 0., 0.], + [ 0., 0., 0., 0., 0., 1., -2., 1., 0., 0.], + [ 0., 0., 0., 0., 0., 0., 1., -2., 1., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 1., -2., 1.]]) + + >>> # Matrix 4 + >>> num_data, differences = 5, 3 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() + array([[-1., 3., -3., 1., 0.], + [ 0., -1., 3., -3., 1.]]) + + >>> # Matrix 5 + >>> num_data, differences = 10, 3 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() + array([[-1., 3., -3., 1., 0., 0., 0., 0., 0., 0.], + [ 0., -1., 3., -3., 1., 0., 0., 0., 0., 0.], + [ 0., 0., -1., 3., -3., 1., 0., 0., 0., 0.], + [ 0., 0., 0., -1., 3., -3., 1., 0., 0., 0.], + [ 0., 0., 0., 0., -1., 3., -3., 1., 0., 0.], + [ 0., 0., 0., 0., 0., -1., 3., -3., 1., 0.], + [ 0., 0., 0., 0., 0., 0., -1., 3., -3., 1.]]) + + """ + + # first, the required constants are obtained from the ``WhittakerLikeSolver``-class + dtype = WhittakerLikeSolver._WhittakerLikeSolver__dtype # type: ignore + + # then, the dense finite difference matrix D is created from the forward difference + # kernel + diff_kernel = forward_finite_difference_kernel(differences=differences) + offsets = np.arange(start=0, stop=diff_kernel.size, step=1, dtype=np.int64) + return sp_diags( + diagonals=diff_kernel, + offsets=offsets, # type: ignore + shape=(num_data - diff_kernel.size + 1, num_data), + dtype=dtype, + format="csc", + ) + + +def sparse_slogdet_from_superlu(splu: spla.SuperLU) -> Tuple[float, float]: + """ + Computes the sign and the logarithm of the determinant of a sparse matrix from its + SuperLU decomposition. + + References + ---------- + This function is based on the following GIST and its discussion: + https://gist.github.com/luizfelippesr/5965a536d202b913beda9878a2f8ef3e + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> import scipy.sparse as sprs + + >>> from tests.tests_for_utils.utils_funcs import ( + ... sparse_slogdet_from_superlu, + ... ) + + >>> # Setup of a test with random matrices + >>> np.random.seed(42) + >>> n_rows = np.random.randint(low=10, high=1_001, size=20) + >>> density = 0.5 # chosen to have a high probability of a solvable system + >>> n_rows + array([112, 445, 870, 280, 116, 81, 710, 30, 624, 131, 476, 224, 340, + 468, 97, 382, 109, 881, 673, 140]) + + >>> # Running the tests in a loop + >>> for m in n_rows: + ... iter_i = 0 + ... attempts = 10 + ... failed = False + ... while iter_i < 10: + ... # a random matrix is generated and if the LU decomposition fails, the + ... # test is repeated (this test is not there to test the LU decomposition) + ... attempts += 1 + ... matrix = sprs.random(m=m, n=m, density=density, format="csc") + ... try: + ... splu = sprs.linalg.splu(matrix) + ... except RuntimeError: + ... continue + ... + ... # first, the utility function is used to compute the sign and the log + ... # determinant of the matrix + ... sign, logabsdet = sparse_slogdet_from_superlu(splu=splu) + ... + ... # then, the sign and the log determinant are computed by NumPy's dense + ... # log determinant function for comparison + ... sign_ref, logabsdet_ref = np.linalg.slogdet(matrix.toarray()) + ... + ... # the results are compared and if they differ, the test is stopped + ... # with a diagnostic message + ... if not ( + ... np.isclose(sign, sign_ref) + ... and np.isclose(logabsdet, logabsdet_ref) + ... ): + ... print( + ... f"Failed for matrix with shape {m}x{m}: " + ... f"sign: {sign} vs. {sign_ref} and " + ... f"logabsdet: {logabsdet} vs. {logabsdet_ref}" + ... ) + ... failed = True + ... break + ... + ... # if the test is successful, the loop is continued if the number of + ... # attempts is less than 100 + ... del splu + ... iter_i += 1 + ... if attempts >= 100: + ... print( + ... f"Could not generate a solvable system for matrix with shape " + ... f"{m}x{m}" + ... ) + ... + ... if failed: + ... break + + """ + + ### Auxiliary Function ### + + def find_min_num_swaps(arr: np.ndarray): + """ + Minimum number of swaps needed to order a permutation array. + + """ + # from https://www.thepoorcoder.com/hackerrank-minimum-swaps-2-solution/ + a = dict(enumerate(arr)) + b = {v: k for k, v in a.items()} + count = 0 + for i in a: + x = a[i] + if x != i: + y = b[i] + a[y] = x + b[x] = y + count += 1 + + return count + + ### Main Part ### + + # the logarithm of the determinant is the sum of the logarithms of the diagonal + # elements of the LU decomposition, but since L is unit lower triangular, only the + # diagonal elements of U are considered + u_diagonal = splu.U.diagonal() + logabsdet = np.log(np.abs(u_diagonal)).sum() + + # then, the sign is determined from the diagonal elements of U as well as the row + # and column permutations + # NOTE: odd number of negative elements/swaps leads to a negative sign + factorization_sign = -1 if np.count_nonzero(u_diagonal < 0.0) % 2 == 1 else 1 + row_permutation_sign = -1 if find_min_num_swaps(splu.perm_r) % 2 == 1 else 1 + column_permutation_sign = -1 if find_min_num_swaps(splu.perm_c) % 2 == 1 else 1 + total_sign = ( + -1.0 + if factorization_sign * row_permutation_sign * column_permutation_sign < 0 + else 1.0 + ) + + return total_sign, logabsdet + + +def calc_whittaker_smooth_log_marginal_likelihood_const_term( + differences: int, + difference_matrix: csc_matrix, + weight_vect: np.ndarray, +) -> float: + """ + Calculates the constant term of the log marginal likelihood of a Whittaker smoother + with a given set of parameters. + + It is given by + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(pseudo_det(D.T @ D))`` + + or better + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(det(D @ D.T))`` + + For further details, please see the documentation of the function :func:`get_log_marginal_likelihood_constant_term` + from the module :mod:`chemotools.utils.whittaker_base.logml`. + + Doctest + ------- + >>> # Imports + >>> import numpy as np + >>> from tests.tests_for_utils.utils_funcs import ( + ... calc_whittaker_smooth_log_marginal_likelihood_const_term, + ... get_sparse_forward_finite_difference_matrix, + ... ) + + >>> # Generation of the weight matrix W and the finite difference matrix D + >>> weights = np.array([0.5, 1.0, 0.5, 1.0, 0.5]) + >>> num_data, differences = weights.size, 1 + >>> difference_matrix_sparse = get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ) + >>> difference_matrix_dense = difference_matrix_sparse.toarray() + + >>> # Test 1 with all weights being non-zero + + >>> # Calculation of the log pseudo-determinant of the weight matrix W + >>> # since it is diagonal, the log-determinant is the sum of the logarithms of the + >>> # diagonal elements + >>> log_pseudo_determinant_w = np.log(weights).sum() + >>> log_pseudo_determinant_w + -2.0794415416798357 + + >>> # Calculation of the log pseudo-determinant via the Cholesky decomposition of + >>> # the product D @ D.T + >>> squared_difference_matrix_chol = np.linalg.cholesky( + ... difference_matrix_dense @ difference_matrix_dense.T + ... ) + >>> squared_difference_matrix_chol + array([[ 1.41421356, 0. , 0. , 0. ], + [-0.70710678, 1.22474487, 0. , 0. ], + [ 0. , -0.81649658, 1.15470054, 0. ], + [ 0. , 0. , -0.8660254 , 1.11803399]]) + >>> # the sum of the doubled logarithms of the main diagonal elements is the log + >>> # pseudo-determinant of the matrix D.T @ D + >>> log_pseudo_determinant_dt_dot_d = ( + ... 2.0 * np.log(np.diag(squared_difference_matrix_chol)).sum() + ... ) + >>> log_pseudo_determinant_dt_dot_d + 1.6094379124341003 + + >>> # Calculation of the theoretical constant term + >>> logml_theoretical = ( + ... (num_data - differences) * np.log(2.0 * np.pi) + ... - log_pseudo_determinant_w + ... - log_pseudo_determinant_dt_dot_d + ... ) + + >>> # Calculation of the constant term via the utility function + >>> logml_via_function = calc_whittaker_smooth_log_marginal_likelihood_const_term( + ... differences=differences, + ... difference_matrix=difference_matrix_sparse, + ... weight_vect=weights, + ... ) + >>> logml_via_function + 7.821511894883117 + >>> np.isclose(logml_via_function, logml_theoretical) + True + + >>> # Test 2 with 2 weights being zero + >>> weights[1] = 0.0 + >>> weights[3] = 0.0 + >>> nonzero_weights_flags = weights > 0.0 + >>> log_pseudo_determinant_w = np.log(weights[nonzero_weights_flags]).sum() + + >>> # Calculation of the theoretical constant term + >>> logml_theoretical = ( + ... (nonzero_weights_flags.sum() - differences) * np.log(2.0 * np.pi) + ... - log_pseudo_determinant_w + ... - log_pseudo_determinant_dt_dot_d + ... ) + + >>> # Calculation of the constant term via the utility function + >>> logml_via_function = calc_whittaker_smooth_log_marginal_likelihood_const_term( + ... differences=differences, + ... difference_matrix=difference_matrix_sparse, + ... weight_vect=weights, + ... ) + >>> logml_via_function + 4.145757762064426 + >>> np.isclose(logml_via_function, logml_theoretical) + True + + """ # noqa: E501 + + ### Pre-computation of the constant term ### + + # first, the required constants are obtained from the ``WhittakerLikeSolver``-class + zero_weight_tol = WhittakerLikeSolver._WhittakerLikeSolver__zero_weight_tol # type: ignore + + # for W, the log pseudo-determinant is calculated ... + w_nonzero_indices = weight_vect > weight_vect.max() * zero_weight_tol + num_nonzero_w = w_nonzero_indices.sum() + w_log_pseudo_determinant = np.log(weight_vect[w_nonzero_indices]).sum() + + # ... followed by the log pseudo-determinant of the penalty matrix D.T @ D which is + # equivalent to the determinant of the flipped matrix D @ D.T which is not + # rank-deficient + _, penalty_log_pseudo_determinant = sparse_slogdet_from_superlu( + splu=spla.splu(A=difference_matrix @ difference_matrix.transpose()) + ) + + # from all of this, the constant term is computed + return ( + (num_nonzero_w - differences) * np.log(2.0 * np.pi) + - w_log_pseudo_determinant + - penalty_log_pseudo_determinant + ) + + +def find_whittaker_smooth_opt_lambda_log_marginal_likelihood( + b_vect: np.ndarray, + weight_vect: np.ndarray, + differences: int, + log_lambda_bounds: Tuple[float, float], + num_optimizations: int, +) -> Tuple[float, float, np.ndarray]: + """ + Finds the optimal lambda value for a Whittaker smoother by maximising the log + marginal likelihood via a nested brute-force optimisation followed by a bounded + scalar minimisation. + + Since it relies purely on dense linear algebra for highly sparse matrices, this + utility function is only suitable for small to medium-sized datasets (n < 500 ... + 1000). + + """ + + ### Definition of the target function ### + + def get_smooth_solution( + log_lam: Union[np.ndarray, float] + ) -> Tuple[np.ndarray, spla.SuperLU, float, float]: + """ + Computes the smooth solution for the Whittaker smoother. + + """ + + # first, the linear system (left hand side) has to be set up for calculating the + # smooth solution + if isinstance(log_lam, np.ndarray): + log_lam = log_lam[0] + + lam = exp(log_lam) + + # NOTE: lhs is "left-hand side" + lhs_matrix = lam * penalty_matrix + lhs_matrix += sp_diags( + diagonals=weight_vect, + offsets=0, + shape=(b_vect.size, b_vect.size), + format="csc", + ) + + # then, the solution is obtained + lhs_splu = spla.splu(A=lhs_matrix) + smooth_solution = lhs_splu.solve(rhs=weight_vect * b_vect) + + return ( + smooth_solution, + lhs_splu, + lam, + log_lam, # type: ignore + ) + + def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: + """ + The target function to minimize for maximizing the log marginal likelihood. + + """ + + # first, the smooth solution is calculated together with the left-hand side + # matrix and the lambda value + smooth_solution, lhs_splu, lam, log_lam = get_smooth_solution(log_lam=log_lam) + + # the log-determinant of the left-hand-side matrix is calculated + # NOTE: lhs is "left-hand side" + _, log_determinant_lhs = sparse_slogdet_from_superlu(splu=lhs_splu) + + # finally, the log marginal likelihood is computed from: + # 1) the weighted residual sum of squares + weighted_sum_of_squared_residuals = ( + weight_vect * np.square(b_vect - smooth_solution) + ).sum() + + # 2) the sum of squared penalties + # NOTE: the order of multiplications for the following term is important because + # the last multiplication is a matrix-vector resulting in another vector; + # the other way around would result in another matrix followed by + # a matrix-vector multiplication + sum_of_squared_penalties = lam * ( + smooth_solution @ (penalty_matrix @ smooth_solution) + ) + + # 3) the log-determinant of the lhs matrix and the constant term + # NOTE: the sign is positive because the log marginal likelihood is maximised + # and not minimised + return 0.5 * ( + weighted_sum_of_squared_residuals + + sum_of_squared_penalties + - (b_vect.size - differences) * log_lam + + log_determinant_lhs + + logml_constant_term + ) + + ### Pre-computations ### + + # then, some pre-computations are made + num_data = b_vect.size + log_lambda_min, log_lambda_max = log_lambda_bounds + difference_matrix_sparse = get_sparse_forward_finite_difference_matrix( + num_data=num_data, + differences=differences, + ) + penalty_matrix = ( + difference_matrix_sparse.transpose() @ difference_matrix_sparse + ).tocsc() # type: ignore + logml_constant_term = calc_whittaker_smooth_log_marginal_likelihood_const_term( + differences=differences, + difference_matrix=difference_matrix_sparse, + weight_vect=weight_vect, + ) + + ### Running the optimisation ### + + # the first optimisation is run with the target function to narrow down the + # search space + opt_log_lam = brute( + func=logml_target_func, + ranges=((log_lambda_min, log_lambda_max),), + Ns=num_optimizations, + finish=None, + full_output=False, + ) + + # the search space is narrowed down for the second optimisation to roughly one + # decade in the natural log space + log_lambda_min = opt_log_lam - 1.2 # type: ignore + log_lambda_max = opt_log_lam + 1.2 # type: ignore + + # the second optimisation is run with the target function to find the optimal lambda + opt_log_lam = brute( + func=logml_target_func, + ranges=((log_lambda_min, log_lambda_max),), + Ns=num_optimizations, + finish=None, + full_output=False, + ) + + # one more optimisation is run to ensure that the optimal lambda is found + log_lambda_min = opt_log_lam - 0.1 # type: ignore + log_lambda_max = opt_log_lam + 0.1 # type: ignore + opt_log_lam = minimize_scalar( + fun=logml_target_func, + bounds=(log_lambda_min, log_lambda_max), + method="bounded", + ).x + + # finally, the solutions for the optimal lambda are returned + return ( + exp(opt_log_lam), + (-1.0) * logml_target_func(log_lam=opt_log_lam), + get_smooth_solution(log_lam=opt_log_lam)[0], + ) + + +### Doctests ### + +if __name__ == "__main__": # pragma: no cover + + import doctest + + doctest.testmod() diff --git a/tests/tests_for_utils/utils_models.py b/tests/tests_for_utils/utils_models.py new file mode 100644 index 00000000..9dbcfdec --- /dev/null +++ b/tests/tests_for_utils/utils_models.py @@ -0,0 +1,100 @@ +""" +This script implements utility models required for testing the +:mod:`chemotools.utils` module. + +""" + +### Imports ### + +from dataclasses import dataclass, field +from typing import Dict, Literal, Optional, Tuple + +import numpy as np + +from chemotools.utils import _models +from tests.tests_for_utils.utils_funcs import float_is_bit_equal + +### Dataclasses ### + + +@dataclass +class RefDifferenceKernel: + """ + Dataclass for storing the reference for the difference kernel validity check. + + """ + + differences: int + accuracy: int + kernel: np.ndarray + + size: int = field(init=False) + + def __post_init__(self) -> None: + self.size = self.kernel.size + + +@dataclass +class NoiseEstimationReference: + """ + Dataclass for storing the reference for the noise estimation validity check. + + """ + + window_size: Optional[int] + min_noise_level: float + differences: int + accuracy: int + noise_level: np.ndarray + + raised_noise_levels: Dict[Literal[-2, -1, 1, 2], np.ndarray] = field(init=False) + + def __post_init__(self) -> None: + self.raised_noise_levels = { + power: self.noise_level**power for power in (-2, -1, 1, 2) + } + + +@dataclass +class ExpectedWhittakerSmoothLambda: + """ + Dataclass for checking the expected results for the class :class:`WhittakerSmoothLambda` + from the module :mod:`chemotools.utils.models`. + + """ # noqa: E501 + + fixed_lambda: float + auto_bounds: Tuple[float, float] + fit_auto: bool + method_used: _models.WhittakerSmoothMethods + log_auto_bounds: Tuple[float, float] = (0.0, 0.0) + + def assert_is_equal_to(self, other: _models.WhittakerSmoothLambda) -> None: + """ + Checks if the current instance is equal to another instance of the same class. + + """ + + assert other.fit_auto is self.fit_auto + assert other.method_used == self.method_used + # NOTE: since NAN-values are used, the comparison is split into two parts for + # the fixed lambda value and each of the bounds + assert float_is_bit_equal( + value=other.fixed_lambda, + reference=self.fixed_lambda, + ) + assert float_is_bit_equal( + value=other.auto_bounds[0], reference=self.auto_bounds[0] + ) + assert float_is_bit_equal( + value=other.auto_bounds[1], + reference=self.auto_bounds[1], + ) + assert float_is_bit_equal( + value=other.log_auto_bounds[0], + reference=self.log_auto_bounds[0], + ) + assert float_is_bit_equal( + value=other.log_auto_bounds[1], + reference=self.log_auto_bounds[1], + )