From 3882236985ec246709e8ac8976d4349e8cba7fc1 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sun, 17 Dec 2023 16:36:14 +0100 Subject: [PATCH 001/118] wip: [44] test push: untracked VSCode settings; added requirements and dev-requirements --- .vscode/settings.json | 8 -------- requirements-dev.txt | 3 +++ requirements.txt | 3 ++- 3 files changed, 5 insertions(+), 9 deletions(-) delete mode 100644 .vscode/settings.json create mode 100644 requirements-dev.txt diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index b276816b..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "python.formatting.provider": "black", - "python.testing.pytestArgs": [ - "tests" - ], - "python.testing.unittestEnabled": false, - "python.testing.pytestEnabled": true -} \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..b534c25d --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,3 @@ +matplotlib +pentapy +pytest \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f18ef2b6..c9045e8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ numpy>=1.24.1 pandas>=1.3.4 -scikit-learn>=1.2.0 +scikit-learn>=1.2.0 +scipy>=1.11.0 From 14d274855fbd945ed75e76eff2c0210e05313c53 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 14:39:33 +0100 Subject: [PATCH 002/118] wip: [44] attempt to remove `settings.json` from project --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 4d866453..94a95d2f 100644 --- a/.gitignore +++ b/.gitignore @@ -103,7 +103,7 @@ celerybeat.pid # Environments .env -.venv +.venv* env/ venv/ ENV/ @@ -129,7 +129,7 @@ dmypy.json .pyre/ # VScode -.vscode/ +# .vscode/ # Notebooks *.ipynb \ No newline at end of file From 9c10929dfcf50e9179608cd900b110caebac1212 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 14:44:44 +0100 Subject: [PATCH 003/118] feat: [44] added `settings.json`-template for also including `ruff` (linting) --- .vscode/settings_template.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .vscode/settings_template.json diff --git a/.vscode/settings_template.json b/.vscode/settings_template.json new file mode 100644 index 00000000..eafd7db3 --- /dev/null +++ b/.vscode/settings_template.json @@ -0,0 +1,14 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.defaultInterpreterPath": "Enter interpreter path here for debugging", + "ruff.interpreter": [ + "Enter interpreter path here for linting" + ] +} \ No newline at end of file From 4db90e477e9159fd901b4a0ecf22bc65063df067 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 14:47:23 +0100 Subject: [PATCH 004/118] wip: [44] git-ignored `.vscode`-folder again to decouple user settings from repo --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 94a95d2f..4a198cc3 100644 --- a/.gitignore +++ b/.gitignore @@ -129,7 +129,7 @@ dmypy.json .pyre/ # VScode -# .vscode/ +.vscode/ # Notebooks *.ipynb \ No newline at end of file From 935062c87905beed70832dc4f521392c92049ece Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 14:50:15 +0100 Subject: [PATCH 005/118] fix: [44] fixed wrong type hint; isort; black formatting; trim trailing whitespaces --- chemotools/augmentation/spectrum_scale.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/chemotools/augmentation/spectrum_scale.py b/chemotools/augmentation/spectrum_scale.py index ce105659..a3249dab 100644 --- a/chemotools/augmentation/spectrum_scale.py +++ b/chemotools/augmentation/spectrum_scale.py @@ -1,5 +1,5 @@ import numpy as np -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted from chemotools.utils.check_inputs import check_input @@ -17,7 +17,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): random_state : int, default=None The random state to use for the random number generator. - + Attributes ---------- n_features_in_ : int @@ -25,7 +25,7 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): _is_fitted : bool Whether the transformer has been fitted to data. - + Methods ------- fit(X, y=None) @@ -35,15 +35,14 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): Transform the input data by scaling the spectrum. """ - - def __init__(self, scale: int = 0.0, random_state: int = None): + def __init__(self, scale: float = 0.0, random_state: int | None = None): self.scale = scale self.random_state = random_state def fit(self, X: np.ndarray, y=None) -> "SpectrumScale": """ Fit the transformer to the input data. - + Parameters ---------- X : np.ndarray of shape (n_samples, n_features) @@ -97,7 +96,9 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: # Check that the number of features is the same as the fitted data if X_.shape[1] != self.n_features_in_: - raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}") + raise ValueError( + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + ) # Calculate the scaled spectrum for i, x in enumerate(X_): @@ -106,6 +107,5 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: return X_.reshape(-1, 1) if X_.ndim == 1 else X_ def _scale_spectrum(self, x) -> np.ndarray: - scaling_factor = self._rng.uniform(low=1-self.scale, high=1+self.scale) + scaling_factor = self._rng.uniform(low=1 - self.scale, high=1 + self.scale) return np.multiply(x, scaling_factor) - \ No newline at end of file From ae352b3dba66f4bb4ac0095a93c34ebb9342e080 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 17:24:00 +0100 Subject: [PATCH 006/118] feat: [44] added ruff linting checks; added requirements for development --- pyproject.toml | 42 +++++++++++++++++++++++++++++++++++++++++- requirements-dev.txt | 2 ++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5a3c468..6c046a18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,4 +3,44 @@ requires = [ "setuptools>=42", "wheel" ] -build-backend = "setuptools.build_meta" \ No newline at end of file +build-backend = "setuptools.build_meta" + +[tool.ruff] +# Enable pycodestyle (`E`), Pyflakes (`F`) checks. +select = ["E", "F"] +ignore = [] + +# Allow autofix for all enabled rules (when `--fix`) is provided. +# "A", "B", "C", "D", "E", "F", "..." +fixable = [] +unfixable = [] + +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv*", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + ".vscode", +] + +line-length = 88 +target-version = "py311" + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index b534c25d..febecc49 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,5 @@ +black +isort matplotlib pentapy pytest \ No newline at end of file From 67faaf369fe9558b63e4dc050390f2bfe5ad778d Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 17:38:50 +0100 Subject: [PATCH 007/118] feat: [44] added finite difference computation for Whittaker smoothers --- chemotools/utils/finite_differences.py | 182 +++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 chemotools/utils/finite_differences.py diff --git a/chemotools/utils/finite_differences.py b/chemotools/utils/finite_differences.py new file mode 100644 index 00000000..bce324ba --- /dev/null +++ b/chemotools/utils/finite_differences.py @@ -0,0 +1,182 @@ +from math import factorial +from numbers import Integral + +import numpy as np +from scipy.sparse import dia_matrix +from scipy.sparse import diags as spdiags +from sklearn.utils import check_scalar + + +def _calc_arbitrary_fin_diff_kernel( + *, + grid_points: np.ndarray, + differences: int, +) -> np.ndarray: + """Computes the kernel for finite differences with arbitrary grid points.""" + # the number of grid points is counted + num_grid_points = grid_points.size + + # if the grid points cannot support the respective difference, an error is raised + if differences >= num_grid_points: + raise ValueError( + f"\n{num_grid_points} grid points cannot support a {differences}-th order " + f"difference." + ) + # else nothing + + # then, the system of linear equations to solve is set up as A@x = b where x is + # the kernel vector + lhs_mat_a = np.vander(x=grid_points, N=num_grid_points, increasing=True).T + rhs_vect_b = np.zeros(shape=(num_grid_points,), dtype=np.float64) + rhs_vect_b[differences] = factorial(differences) + + # the kernel is computed and returned + return np.linalg.solve(a=lhs_mat_a, b=rhs_vect_b) + + +def calc_forward_diff_kernel( + *, + differences: int, + accuracy: int = 1, +) -> np.ndarray: + """Computes the kernel for forward finite differences which can be applied to a + series by means of a convolution, e.g., + + ```python + kernel = calc_forward_fin_diff_kernel(differences=2, accuracy=1) + differences = np.convolve(series, np.flip(kernel), mode="valid") + # NOTE: NumPy flips the kernel internally due to the definition of convolution + ``` + + Parameters + ---------- + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 0 are not allowed. + accuracy : int, default=1 + The accuracy of the approximation which must be a positive integer starting + from 1. + + Returns + ------- + fin_diff_kernel : np.ndarray of shape (differences + accuracy,) + A NumPy-1D-vector resembling the kernel from the code example above. + + Raises + ------ + ValueError + If the difference order is below 0, the accuracy is below 1, or the number of + grid points is not sufficient to support the respective difference order. + + """ + # the input is validated + check_scalar( + differences, + name="differences", + target_type=Integral, + min_val=0, + include_boundaries="left", + ) + check_scalar( + accuracy, + name="accuracy", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + + # afterwards, the number of grid points is evaluated, which is simply the sum of the + # difference order and the accuracy + num_grid_points = differences + accuracy + + # then, the system of linear equations is solved for the x in A@x = b since x is + # the kernel vector + grid_points = np.arange( + start=0, + stop=num_grid_points, + step=1, + dtype=np.float64, + ) + fin_diff_kernel = _calc_arbitrary_fin_diff_kernel( + grid_points=grid_points, differences=differences + ) + + return fin_diff_kernel + + +def forward_finite_diff_conv_matrix( + *, + differences: int, + accuracy: int = 1, + series_size: int, +) -> dia_matrix: + """Computes the convolution matrix for forward finite differences which can be + applied to a series by means of a matrix multiplication, e.g., + + ```python + conv_mat = finite_diff_conv_matrix(differences=2, accuracy=1, series_size=10) + differences = conv_mat @ series # boundaries require special care + ``` + + this is equivalent to + + ```python + kernel = calc_forward_fin_diff_kernel(differences=2, accuracy=1) + differences = np.convolve(series, np.flip(kernel), mode="valid") + # NOTE: NumPy flips the kernel internally due to the definition of convolution + ``` + + Parameters + ---------- + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 0 are not allowed. + accuracy : int, default=1 + The accuracy of the approximation which must be a positive integer starting + from 1. + series_size : int + The number of data points in the series to which the convolution matrix is + applied. + + Returns + ------- + conv_mat : dia_matrix of shape (series_size - differences, series_size) + A sparse matrix resembling the convolution matrix from the code example above. + + Raises + ------ + ValueError + If the difference order is below 0, the accuracy is below 1, or the number of + grid points is not sufficient to support the respective difference order. + ValueError + If ``series_size`` is not enough to support the respective ``differences`` and + ``accuracy``. + + """ + # the input is validated (``differences`` and ``accuracy`` are validated in the + # function ``calc_forward_diff_kernel``) + kernel_size = differences + accuracy + try: + check_scalar( + series_size, + name="n_features", # for compatibility with sklearn + target_type=Integral, + min_val=kernel_size, + include_boundaries="left", + ) + except ValueError: + raise ValueError(f"Got n_features = {series_size}, must be >= {kernel_size}.") + + # afterwards, the kernel is computed ... + kernel = calc_forward_diff_kernel(differences=differences, accuracy=accuracy) + # ... and the convolution matrix is created + return spdiags( + diagonals=kernel, + offsets=np.arange(start=0, stop=kernel_size, step=1), # type: ignore + shape=(series_size - kernel_size + 1, series_size), + format="dia", + ) From d13c5ce05b31f46f541a19099e6ff98d0bed8bd3 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 17:40:54 +0100 Subject: [PATCH 008/118] feat: [44] added more banded linear algebra functions with `scipy`s LAPACK wrappers for Whittaker smoothers --- chemotools/utils/banded_linalg.py | 614 ++++++++++++++++++++++++++++++ 1 file changed, 614 insertions(+) create mode 100644 chemotools/utils/banded_linalg.py diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py new file mode 100644 index 00000000..8f8a2915 --- /dev/null +++ b/chemotools/utils/banded_linalg.py @@ -0,0 +1,614 @@ +from numbers import Integral +from typing import Optional, Union +from warnings import warn + +import numpy as np +from numpy.typing import ArrayLike +from scipy.linalg import eigvals_banded, lapack +from scipy.sparse import eye as speye +from scipy.sparse import spmatrix +from sklearn.utils import check_array, check_scalar + + +def _check_full_arr_n_diag_counts_for_lu_banded( + a_shape: tuple[int, int], + l_and_u: tuple[int, int], +) -> None: + """Validates the shape of a full array and the number of sub- and superdiagonals + for LU-decomposition of a banded (sparse) matrix. + """ + num_rows, num_cols = a_shape + num_low_diags, num_upp_diags = l_and_u + + check_scalar( + x=num_rows, + name="num_rows", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + check_scalar( + x=num_cols, + name="num_cols", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + check_scalar( + x=num_low_diags, + name="num_low_diags", + target_type=Integral, + min_val=0, + max_val=num_rows - 1, + include_boundaries="both", + ) + check_scalar( + x=num_upp_diags, + name="num_upp_diags", + target_type=Integral, + min_val=0, + max_val=num_rows - 1, + include_boundaries="both", + ) + + if num_rows != num_cols: + raise ValueError(f"\nThe matrix must be square, but it has shape {a_shape}.") + # else nothing + + +def conv_to_lu_banded_storage( + a: Union[np.ndarray, spmatrix], + l_and_u: tuple[int, int], +) -> np.ndarray: + """Converts a (sparse) square banded matrix A to its banded storage required for + LU-decomposition in LAPACK-routines like the function ``lu_banded`` or SciPy's + ``solve_banded``. This format is identical for pentapy where it is referred to as + "column-wise flattened". + Cholesky-decompositions require a different format. + + Parameters + ---------- + a : np.ndarray or sparse matrix of shape (n, n) + A square banded NumPy-2D-Array or SciPy sparse matrix. "Square" means that the + row count equals the column count while "banded" implies that only the main + diagonal and a few sub- and/or superdiagonals are non-zero (see `l_and_u`). + l_and_u : tuple[int, int] + The number of "non-zero" sub- (first) and superdiagonals (second element) aside + the main diagonal which does not need to be considered here. "Non-zero" can be + a bit misleading in this context. These numbers should count up to the diagonal + after which all following diagonals are zero. Zero-diagonals that come before + still need to be included. + Wrong specification of this can lead to non-zero-diagonals being ignored or + zero-diagonals being included which corrupts the results or reduces the + performance. + + Returns + ------- + ab : np.ndarray of shape (l_and_u[0] + 1 + l_and_u[1], n) + A NumPy-2D-Array resembling `a` in banded storage format (see Notes). + + Raises + ------ + ValueError + If `a` is not square. + ValueError + If the number of rows of `a` does not match the number of rows given by + the diagonal number. + + Notes + ----- + For LAPACK LU-decomposition, the matrix `a` is stored in `ab` using the matrix + diagonal ordered form: + + ```python + ab[u + i - j, j] == a[i,j] # see below for u + ``` + + An example of `ab` (shape of a is ``(7,7)``, `u`=3 superdiagonals, `l`=2 + subdiagonals) looks like: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + a10 a21 a32 a43 a54 a65 * # v subdiagonals + a20 a31 a42 a53 a64 * * + ``` + + where all entries marked with ``*`` are ``0`` when returned by this function. + Internally LAPACK relies on an expanded version of this format to perform inplace + operations, but the respective functions handle the conversion themselves. + + """ + + # the matrix is checked for being square and for having the correct number of rows + num_low_diags, num_upp_diags = l_and_u + a = check_array(array=a, accept_sparse=True, ensure_2d=True) + _check_full_arr_n_diag_counts_for_lu_banded( + a_shape=a.shape, l_and_u=l_and_u # type: ignore + ) + + # first, the number of lower and upper diagonals is extracted and turned into two + # offset vectors + main_diag_idx = num_upp_diags + num_cols = a.shape[-1] + + # now, the diagonal extraction method is specified based and the banded storage is + # filled by it + diag_method = a.diagonal # type: ignore + ab = np.zeros(shape=(num_low_diags + 1 + num_upp_diags, num_cols)) + + # the superdiagonals and the main diagonal + for offset in range(num_upp_diags, -1, -1): + ab[main_diag_idx - offset, offset::] = diag_method(offset) + + # the subdiagonals + for offset in range(-1, -num_low_diags - 1, -1): + ab[main_diag_idx - offset, 0:offset] = diag_method(offset) + + return ab + + +def lu_banded( + l_and_u: tuple[int, int], + ab: ArrayLike, + *, + overwrite_ab: bool = False, + check_finite: bool = True, +) -> tuple[np.ndarray, np.ndarray]: + """Computes the LU-decomposition of a banded matrix A using LAPACK-routines. + + This function is a wrapper of the LAPACK-routine ``gbtrf`` which computes the LU- + decomposition of a banded matrix `A` in-place. It wraps the routine in an analogous + way to SciPy's ``cholesky_banded``. + + Parameters + ---------- + l_and_u : tuple[int, int] + The number of "non-zero" sub- (first) and superdiagonals (second element) aside + the main diagonal which does not need to be considered here. "Non-zero" can be + a bit misleading in this context. These numbers should count up to the diagonal + after which all following diagonals are zero. Zero-diagonals that come before + still need to be included. + Neither of both may exceed `num_rows`. + Wrong specification of this can lead to non-zero-diagonals being ignored or + zero-diagonals being included which corrupts the results or reduces the + performance. + ab : np.ndarray of shape (l_and_u[0] + 1 + l_and_u[1], n) + A NumPy-2D-Array resembling the matrix `A` in banded storage format + (see Notes). + + overwrite_ab : bool, default=False + If ``True``, the contents of `ab` can be overwritten by the routine. Otherwise, + a copy of `ab` is created and overwritten. + + check_finite : bool, default=True + Whether to check that the input matrix contains only finite numbers. Disabling + may give a performance gain, but may result in problems (crashes, + non-termination) if the inputs do contain infinities or NaNs. + + Returns + ------- + lu : np.ndarray of shape (l_and_u[0] + 1 + 2 * l_and_u[1], n) + A NumPy-2D-Array resembling the LU-decomposition of `A` in banded storage + format (see Notes). + ipiv : np.ndarray of shape (n,) + A NumPy-1D-Array containing the pivoting indices. It's `i`-th entry resembles + gives the row that was used for pivoting the `i`-th row of `A`. + + Notes + ----- + For LAPACK LU-decomposition, the matrix `a` is stored in `ab` using the matrix + diagonal ordered form: + + ```python + ab[u + i - j, j] == a[i,j] # see below for u + ``` + + An example of `ab` (shape of a is ``(7,7)``, `u`=3 superdiagonals, `l`=2 + subdiagonals) looks like: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + a10 a21 a32 a43 a54 a65 * # v subdiagonals + a20 a31 a42 a53 a64 * * + ``` + + where all entries marked with ``*`` are ``0`` when returned by this function. + Internally LAPACK relies on an expanded version of this format to perform inplace + operations that adds another `l` superdiagonals to the matrix in order to overwrite + them for the purpose of pivoting. The output is thus an expanded version + of the LU-decomposition of `A` in the same format where the main diagonal of + `L` is implicitly taken to be a vector of ones. The output can directly be used + for the LAPACK-routine ``gbtrs`` to solve linear systems of equations based on this + decomposition. + + """ + # the (optional) finite check and Array-conversion are performed + if check_finite: + inter_ab = np.asarray_chkfinite(ab) + else: + inter_ab = np.asarray(ab) + + # then, the number of lower and upper subdiagonals needs to be checked for being + # consistent with the shape of ``ab`` + num_low_diags, num_upp_diags = l_and_u + if num_low_diags + num_upp_diags + 1 != inter_ab.shape[0]: + raise ValueError( + f"\nInvalid values for the number of lower and upper " + f"diagonals: l+u+1 ({num_low_diags + num_upp_diags + 1}) does not equal " + f"ab.shape[0] ({inter_ab.shape[0]})." + ) + # else nothing + + # now, the LAPACK-routines can be called + # to make ``ab`` compatible with the shape the LAPACK expects in this case, it + # needs to be re-written into a larger Array that has zeros elsewhere + # FIXME: for tridiagonal matrices, the SciPy wrapper for ``gttrf`` should be used + lapack_routine = "gbtrf" + (gbtrf,) = lapack.get_lapack_funcs((lapack_routine,), (inter_ab,)) + lpkc_ab = np.zeros( + shape=(2 * num_low_diags + num_upp_diags + 1, inter_ab.shape[1]), + dtype=gbtrf.dtype, + ) + lpkc_ab[num_low_diags::, ::] = inter_ab + lu, ipiv, info = gbtrf( + ab=lpkc_ab, kl=num_low_diags, ku=num_upp_diags, overwrite_ab=overwrite_ab + ) + + # then, the results needs to be validated and returned + # Case 1: the factorisation could be completed, which does not imply that the + # solution can be used for solving a linear system + if info >= 0: + if info > 0: + warn( + f"\nThe ({info-1}, {info-1})-th entry of the factor U is exactly 0, " + f"which makes it singular.\n" + f"Linear systems cannot be solved with this factor." + ) + # else nothing + + return lu, ipiv + + # Case 2: the factorisation was not completed due to invalid input + else: + raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrf.") + + +def slogdet_lu_banded( + decomposition: tuple[tuple[int, int], np.ndarray, np.ndarray], +) -> tuple[float, float]: + """Computes the logarithm of the absolute value of the determinant of a banded + matrix A using its LU-decomposition. This is way more efficient than computing the + determinant directly because the LU-decompositions main diagonals already encode + the determinant as the product of the diagonal entries of the factors. + + Parameters + ---------- + (l_and_u, lub, ipiv) : tuple, (tuple[int, int], np.ndarray, np.ndarray) + `l_and_u` is a tuple of two integers specifying the number of sub- and + superdiagonals of the matrix `A` that are non-zero. + `lub` is a NumPy-2D-Array resembling the LU-decomposition of `A` in banded + storage format as returned by ``lu_banded``. + `ipiv` is a NumPy-1D-Array containing the pivoting indices as returned by + ``lu_banded``. + + Returns + ------- + sign : float + A number representing the sign of the determinant. + logabsdet : float + The natural log of the absolute value of the determinant. + + If the determinant is zero, then `sign` will be 0 and `logabsdet` will be + -Inf. In all cases, the determinant is equal to ``sign * np.exp(logabsdet)``. + + """ + # first, the number of lower and upper diagonals is extracted + l_and_u, lub, ipiv = decomposition + num_low_diags, num_upp_diags = l_and_u + num_rows = lub.shape[-1] + + # then, the number of actual row exchanges needs to be counted + unchanged_row_idxs = np.arange(start=0, stop=num_rows, step=1, dtype=ipiv.dtype) + num_row_exchanges = np.count_nonzero(ipiv - unchanged_row_idxs) + + # the sign-prefactor of the determinant is either +1 or -1 depending on whether the + # number of row exchanges is even or odd + if num_row_exchanges % 2 == 1: + sign = -1.0 + else: + sign = 1.0 + + # since the determinant (without sign prefactor) is just the product of the diagonal + # product of L and the diagonal product of U, the calculation simplifies. As the + # main diagonal of L is a vector of ones, only the diagonal product of U is required + main_diag_idx = num_low_diags + num_upp_diags + u_diaprod_sign = np.prod(np.sign(lub[main_diag_idx, ::])) + with np.errstate(divide="ignore", over="ignore"): + logabsdet = np.sum(np.log(np.abs(lub[main_diag_idx, ::]))) + + # logarithms of zero are already properly handled, so there is not reason to worry + # about, since they are -inf which will result in a zero determinant in exp() + # overflow however needs to lead to a raise and in this case the log(det) is either + # +inf in case of overflow only or NaN in case of the simultaneous occurrence of + # zero and overflow + if np.isnan(logabsdet) or np.isposinf(logabsdet): + raise ValueError( + "\nFloating point overflow in natural logarithm. At least 1 main diagonal " + "entry results in overflow, thereby corrupting the determinant." + ) + # else nothing + + # finally, the absolute value of the natural logarithm of the determinant is + # returned together with its sign + if np.isneginf(logabsdet): + sign = 0.0 + elif float(u_diaprod_sign) > 0.0: + pass + else: + sign *= -1.0 + + return sign, logabsdet + + +def lu_solve_banded( + decomposition: tuple[np.ndarray, np.ndarray, tuple[int, int]], + b: ArrayLike, + *, + overwrite_b: bool = False, + check_finite: bool = True, +) -> np.ndarray: + """Solves a linear system of equations ``Ax=b`` with a banded matrix `A` using its + precomputed LU-decomposition. + This function wraps the LAPACK-routine ``gbtrs`` in an analogous way to SciPy's + ``cho_solve_banded``. + + Parameters + ---------- + (l_and_u, lub, ipiv) : tuple, (np.ndarray, np.ndarray, tuple[int, int]) + `lub` is a NumPy-2D-Array resembling the LU-decomposition of `A` in banded + storage format as returned by ``lu_banded``. + `ipiv` is a NumPy-1D-Array containing the pivoting indices as returned by + ``lu_banded``. + `l_and_u` is a tuple of two integers specifying the number of sub- and + superdiagonals of the matrix `A` that are non-zero. + b : np.ndarray of shape (n,) + A 1D-Array containing the right-hand side of the linear system of equations. + overwrite_b : bool, default=False + If ``True``, the contents of `b` can be overwritten by the routine. Otherwise, + a copy of `b` is created and overwritten. + check_finite : bool, default=True + Whether to check that the input matrix contains only finite numbers. Disabling + may give a performance gain, but may result in problems (crashes, + non-termination) if the inputs do contain infinities or NaNs. + + Returns + ------- + x : np.ndarray of shape (n,) + The solution to the system A x = b + + """ + # the (optional) finite check and Array-conversion are performed + lub, ipiv, l_and_u = decomposition + if check_finite: + inter_lub = np.asarray_chkfinite(lub) + inter_ipiv = np.asarray_chkfinite(ipiv) + inter_b = np.asarray_chkfinite(b) + else: + inter_lub = np.asarray(lub) + inter_ipiv = np.asarray(ipiv) + inter_b = np.asarray(b) + + # then, the shapes of the LU-decomposition and ``b`` need to be validated against + # each other + if inter_lub.shape[-1] != inter_b.shape[0]: + raise ValueError( + f"\nShapes of lub ({inter_lub.shape[-1]}) and b ({inter_b.shape[0]}) are " + f"not compatible." + ) + # else nothing + + # now, the LAPACK-routine is called + num_low_diags, num_upp_diags = l_and_u + (gbtrs,) = lapack.get_lapack_funcs(("gbtrs",), (inter_lub, inter_b)) + x, info = gbtrs( + ab=inter_lub, + kl=num_low_diags, + ku=num_upp_diags, + b=inter_b, + ipiv=inter_ipiv, + overwrite_b=overwrite_b, + ) + + # then, the results needs to be validated and returned + # Case 1: the solution could be computed successfully + if info == 0: + return x + + # Case 2: the solution could not be computed due to invalid input + elif info < 0: + raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrs.") + + # Case 3: unexpected error + else: + raise AssertionError( + f"\nThe internal gbtrs returned info > 0 ({info}) which should not happen." + ) + + +def slodget_cho_banded(decomposition: tuple[np.ndarray, bool]) -> tuple[float, float]: + """Computes the logarithm of the absolute value of the determinant of a banded + hermitian matrix `A` using its Cholesky-decomposition. This is way more efficient + than computing the determinant directly because the Cholesky factors' main + diagonals already encode the determinant as the product of the diagonal entries. + + Parameters + ---------- + (cb, lower) : tuple, (np.ndarray, bool) + `cb` is a NumPy-2D-Array resembling the Cholesky-decomposition of `A` in banded + storage format as returned by ``cholesky_banded``. + `lower` is a boolean indicating whether the Cholesky-decomposition the lower + triangular form (``True``) or the upper triangular form was of `A` was used + (``False``). + + Returns + ------- + sign : float + A number representing the sign of the determinant. It is always +1 since + the matrix under consideration is positive definite. + logabsdet : float + The natural log of the absolute value of the determinant. It cannot be zero + since the matrix under consideration is positive definite. + + """ + + lower = decomposition[1] + main_diag_idx = 0 if lower else -1 + + return 1.0, 2.0 * np.sum(np.log(decomposition[0][main_diag_idx, ::])) + + +def _find_largest_symm_sparse_banded_spd_eigval( + ab: np.ndarray, check_finite: bool = True +) -> float: + """Finds the largest eigenvalue of a symmetric sparse banded matrix `A` using + SciPy's ``sparse.linalg.eigsh``. + + Notes + ----- + This function is intended for matrices that are known to be at least positive + semi-definite from a mathematical point of view (all eigenvalues >= 0). However, due + to numerical inaccuracies, the smallest eigenvalue may be negative. Such a + restriction is not critical in this context since the largest eigenvalue is + typically positive. + From a performance perspective, this function relies on LAPACK's banded eigensolver + and it thus highly efficient already. + + """ + + return eigvals_banded( + a_band=ab, + lower=False, + select="i", + select_range=(ab.shape[1] - 1, ab.shape[1] - 1), + check_finite=check_finite, + )[0] + + +def _find_smallest_symm_sparse_banded_spd_eigval( + ab: np.ndarray, check_finite: bool = True +) -> float: + """Finds the smallest eigenvalue of a symmetric sparse banded matrix `A` using + SciPy's ``sparse.linalg.eigsh``. + + Notes + ----- + This function is intended for matrices that are known to be at least positive + semi-definite from a mathematical point of view (all eigenvalues >= 0). However, due + to numerical inaccuracies, the smallest eigenvalue may be negative. + From a performance perspective, this function relies on LAPACK's banded eigensolver + and it thus highly efficient already. + + """ + return eigvals_banded( + a_band=ab, + lower=False, + select="i", + select_range=(0, 0), + check_finite=check_finite, + )[0] + + +def conv_symm_sparse_banded_sposdef_to_posdef( + a: spmatrix, + *, + l_and_u: tuple[int, int], + rcond: Optional[float] = None, + check_finite: bool = True, +) -> spmatrix: + """Converts a symmetric sparse banded matrix `A` to a positive definite matrix + `B` by adding a small multiple of the identity matrix to `A` (see Notes). + + Parameters + ---------- + a : spmatrix of shape (n, n) + A square symmetric sparse banded matrix. + + l_and_u : tuple[int, int] + The number of "non-zero" sub- (first) and superdiagonals (second element) aside + the main diagonal which does not need to be considered here. "Non-zero" can be + a bit misleading in this context. These numbers should count up to the diagonal + after which all following diagonals are zero. Zero-diagonals that come before + still need to be included. + Wrong specification of this can lead to non-zero-diagonals being ignored or + zero-diagonals being included which corrupts the results or reduces the + performance. + Both its entries must coincide. + + rcond : float, default=None + The relative condition number of the positive definite matrix `B`. + If ``None``, the default value of ``scipy.linalg.pinvh`` is used which is + ``eps * n`` where ``eps`` is the machine precision of the datatype of `a`. + + Returns + ------- + b : spmatrix of shape (n, n) + A positive definite matrix which is identical to ``a`` except for main diagonal. + + Raises + ------ + ValueError + If `a` is not square or symmetric. + + Notes + ----- + This function performs no checks on `a`. + It is intended for matrices that are known to be at least positive semi-definite + from a mathematical point of view (all eigenvalues >= 0). However, due to numerical + inaccuracies, the smallest eigenvalue may be negative. + For making `A` positive definite, a small multiple of the identity matrix is added + to it as ``B = A + delta * I`` where `delta` is chosen to be the smallest positive + number such that the smallest eigenvalue of ``A + delta * I`` is numerically + positive when compared to the largest eigenvalue of ``A + delta * I``. In other + words, given the smallest and largest eigenvalue of `A` `lam_min` and `lam_max`, + respectively, `delta` is is chosen such that + ``(lam_min + delta) / (lam_max + delta) >= rcond`` because ``lam_min + delta`` and + ``lam_max + delta`` are the smallest and largest eigenvalue of the resulting `B`. + Internally ``1.1 * rcond`` is used as the smallest ratio to account for numerical + inaccuracies in the conducted and potential future computations of eigenvalues. + + """ + + # first, the rcond-parameter is determined + if rcond is None: + inter_rcond = np.finfo(a.dtype).eps * a.shape[0] # type: ignore + else: + inter_rcond = rcond + + # then, the smallest and largest eigenvalue are computed + ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u)[0 : l_and_u[1] + 1, ::] + max_eigenvalue = _find_largest_symm_sparse_banded_spd_eigval( + ab=ab, check_finite=check_finite + ) + min_eigenvalue = _find_smallest_symm_sparse_banded_spd_eigval( + ab=ab, check_finite=check_finite + ) + + # if the ratio is fine already, the matrix is returned + if (min_eigenvalue / max_eigenvalue) >= inter_rcond: + return a.copy() # type: ignore + # else nothing + + # otherwise, the smallest multiple of the identity matrix is computed that makes + # the ratio fine and the resulting matrix is returned + inter_rcond *= 1.1 + delta = (min_eigenvalue - inter_rcond * max_eigenvalue) / (inter_rcond - 1.0) + + return a + delta * speye( + m=a.shape[0], dtype=a.dtype, format=a.format # type: ignore + ) From 0041382cd8c494453534a56adc42b5e63c04db6b Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 17:44:29 +0100 Subject: [PATCH 009/118] feat: [44] added centralized Whittaker smoother class --- chemotools/utils/models.py | 20 + chemotools/utils/whittaker_base.py | 626 +++++++++++++++++++++++++++++ 2 files changed, 646 insertions(+) create mode 100644 chemotools/utils/models.py create mode 100644 chemotools/utils/whittaker_base.py diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py new file mode 100644 index 00000000..b7124d01 --- /dev/null +++ b/chemotools/utils/models.py @@ -0,0 +1,20 @@ +from enum import Enum + +# if possible, pentapy is imported since it provides a more efficient implementation +# of solving pentadiagonal systems of equations, but the package is not in the +# dependencies, so ``chemotools`` needs to be made aware of whether it is available +try: + import pentapy as pp # noqa: F401 + + _PENTAPY_AVAILABLE = True +except ImportError: + _PENTAPY_AVAILABLE = False + +# an Enum class for the decomposition types used for solving linear systems that involve +# banded matrices + + +class BandedSolveDecompositions(str, Enum): + CHOLESKY = "cholesky" + LU = "lu" + PENTAPY = "pentapy" diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py new file mode 100644 index 00000000..4df2b091 --- /dev/null +++ b/chemotools/utils/whittaker_base.py @@ -0,0 +1,626 @@ +import numpy as np +from scipy.linalg import cho_solve_banded, cholesky_banded +from scipy.optimize import minimize_scalar + +from chemotools.utils.banded_linalg import ( + conv_symm_sparse_banded_sposdef_to_posdef, + conv_to_lu_banded_storage, + lu_banded, + lu_solve_banded, + slodget_cho_banded, +) +from chemotools.utils.finite_differences import forward_finite_diff_conv_matrix +from chemotools.utils.models import _PENTAPY_AVAILABLE, BandedSolveDecompositions + +if _PENTAPY_AVAILABLE: + import pentapy as pp +# else nothing + +_LUDecomposition = tuple[np.ndarray, np.ndarray, tuple[int, int]] +_CholeskyDecomposition = tuple[np.ndarray, bool] +_PentapyDecomposition = None +_Decomposition = _PentapyDecomposition | _CholeskyDecomposition | _LUDecomposition + +LN_OF_TWO_PI = np.log(2.0 * np.pi) + + +class WhittakerLikeSolver: + """This class can be used to solve linear systems of equations that involve banded + matrices as they occur in applications like the Whittaker-Henderson-smoothing or + derived methods like Asymmetric Least Squares (ALS) baseline correction. + It support weights and tries to use the most efficient method available. + Besides, it also offers the possibility to fit the roughness penalty itself. + + Parameters + ---------- + lam : int or float or None, default=1e2 + The lambda parameter to use for the Whittaker smooth. + If ``None``, the transformer will fit the smoothness parameter itself by + maximising the marginal likelihood, which can be computationally expensive, but + more accurate than using (Generalized) Cross-Validation (see Notes). + + differences : int, default=1 + The number of differences to use for the Whittaker smooth. If the aim is to + obtain a smooth estimate of the `m`-th order derivative, this should be set to + at least ``m + 2``. + + polynomial_order : int, default=1 + Same as ``differences``, but for ``AirPls``. + + rcond : float, default=1e-15 + The relative condition number which is used to keep all matrices involved + positive definite. This is only used if ``lam`` is ``None``. + It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. + + Notes + ----- + If ``lam`` is ``None``, the pentapy solver cannot be used even if available. + Besides, the computational load increases since - especially for large series and + high differences - the pseudo-determinant of the penalty matrix ``P = D^T @ D`` + needs to be computed, which is computationally expensive and also subject to + numerical inaccuracies. The latter cause some eigenvalues to be numerically + negative, even though their true value is positive and of order ``<< 1e-16``, which + makes their accurate computation numerically impossible with double precision. + Therefore, the eigenvalues are lifted by adding a small value to the diagonal of + the penalty matrix before computing the pseudo-determinant to make the smallest + eigenvalue numerically positive when compared to the largest eigenvalue. From a + smoothing point of view, this turns the Whittaker-Smoothing with derivative penalty + into a blend of Whittaker Smoothing and Tikhonov Regularisation. + So, in contrast to ``P = D^T @ D``, the penalty matrix ``P = D^T @ D + c * I`` is + used where ``c`` is a very small numerical value, so in first approximation, the + combined smoother is still mostly a Whittaker smoother. + + """ + + __log_lam_bounds = ( + -34.5, # 1e-15 + 115.13, # 1e50 + ) + + def __init__( + self, + ) -> None: + self._lam: int | float | None = float("nan") + self._differences: int = -1 + self._rcond: float = float("nan") + + def _setup_for_fit( + self, + series_size: int, + lam: int | float | None, + differences: int, + rcond: float = 1e-15, + ) -> None: + """Pre-computes everything that can be computed for the smoothing in general as + well as for fitting the lambda parameter itself. + """ + + # the input arguments are stored + self._lam = lam + self._differences = differences + self._rcond = rcond + + # the banded storage for a LAPACK LU decomposition is computed for the squared + # forward finite difference matrix D^T @ D which is the penalty matrix P + self.auto_lam_: bool = self._lam is None + self.l_and_u_: tuple[int, int] = (self._differences, self._differences) + self.series_size_: int = series_size + self.squ_fw_fin_diff_mat_ = forward_finite_diff_conv_matrix( + differences=self._differences, + accuracy=1, + series_size=series_size, + ) + self.squ_fw_fin_diff_mat_ = ( + self.squ_fw_fin_diff_mat_.T @ self.squ_fw_fin_diff_mat_ + ) + + # if the lambda parameter is to be fitted automatically, the penalty matrix is + # converted to a positive definite matrix and its log-determinant is computed + if self.auto_lam_: + self.squ_fw_fin_diff_mat_ = conv_symm_sparse_banded_sposdef_to_posdef( + a=self.squ_fw_fin_diff_mat_, l_and_u=self.l_and_u_, rcond=self._rcond + ) + # else nothing + + # finally, the matrix is converted to a banded storage + self.fw_fin_diff_mat_lu_banded_: np.ndarray = conv_to_lu_banded_storage( + a=self.squ_fw_fin_diff_mat_, + l_and_u=self.l_and_u_, + ) + + # if the lambda parameter is to be fitted automatically, the log-determinant of + # the penalty matrix is computed, which reduces to summing up the logarithms of + # of the squared main diagonal elements of its banded Cholesky decomposition + if self._lam is None: + lower = False + penalty_chol = cholesky_banded( + ab=self.fw_fin_diff_mat_lu_banded_[0 : self._differences + 1, ::], + lower=lower, + check_finite=False, + ) + + self.penalty_log_det_: float + det_sign, self.penalty_log_det_ = slodget_cho_banded( + decomposition=(penalty_chol, lower) + ) + assert det_sign > 0.0, "The penalty matrix is still not positive definite." + + else: + self.penalty_log_det_: float = float("nan") + + # finally, Pentapy is enabled if available, the number of differences is 2, + # and the lambda parameter is not fitted automatically + self._pentapy_enabled: bool = ( + _PENTAPY_AVAILABLE and self._differences == 2 and not self.auto_lam_ + ) + + def _pentapy_solve(self, ab: np.ndarray, bw: np.ndarray) -> np.ndarray: + """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` + with the Pentapy package. This is written as the system ``A @ x = b`` where + ``A = W + lam * D^T @ D`` and ``b = W @ b``. + """ + + return pp.solve( + mat=ab, + rhs=bw, + is_flat=True, + index_row_wise=False, + solver=1, + ) + + def _cholesky_solve( + self, ab: np.ndarray, bw: np.ndarray + ) -> tuple[np.ndarray, tuple[np.ndarray, bool]]: + """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` + with the Cholesky decomposition. This is written as the system ``A @ x = b`` + where ``A = W + lam * D^T @ D`` and ``b = W @ b``. + + Even though it is mathematically guaranteed that ``A`` is positive definite, + numerical errors can lead to a non-positive definite matrix. In this case, the + Cholesky decomposition fails and a ``LinAlgError`` is raised. + + """ + + lower = False + cb = cholesky_banded(ab, lower=lower, check_finite=False) + decomposition = (cb, lower) + return ( + cho_solve_banded(cb_and_lower=decomposition, b=bw, check_finite=False), + decomposition, + ) + + def _lu_solve( + self, ab: np.ndarray, bw: np.ndarray + ) -> tuple[np.ndarray, tuple[np.ndarray, np.ndarray, tuple[int, int]]]: + """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` + with the LU decomposition. This is written as the system ``A @ x = b`` where + ``A = W + lam * D^T @ D`` and ``b = W @ b``. + + """ + + # the LU decomposition is computed + lub, ipiv = lu_banded( + l_and_u=self.l_and_u_, + ab=ab, + check_finite=False, + ) + decomposition = (lub, ipiv, self.l_and_u_) + + # the linear system is solved + return ( + lu_solve_banded( + decomposition=decomposition, + b=bw, + check_finite=False, + ), + decomposition, + ) + + def _solve( + self, + bw: np.ndarray, + log_lam: float, + w: np.ndarray | None, + ) -> tuple[np.ndarray, _Decomposition, BandedSolveDecompositions]: + """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` + where ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and + ``D`` is the finite difference matrix of order ``differences``. + + Parameters + ---------- + bw : np.ndarray of shape (n,) or (n, m) + The weighted right-hand side vector or matrix of the linear system of + equations. + log_lam : float + The logarithm of the lambda parameter to use for the Whittaker-like smooth. + w : np.ndarray of shape (n,) + The weights to use for the linear system of equations. It must be a vector + even if ``wb`` is a matrix because having ``wb`` as a matrix is only + possible if lambda is fixed and the same weights are applied to all series. + + Returns + ------- + x : np.ndarray of shape (n,) + The solution vector of the linear system of equations. + + decomposition : tuple + The decomposition used to solve the linear system of equations. + For the LU decomposition, this is a tuple ``(lub, ipiv, l_and_u)`` where + ``lub`` is the banded storage of the LU decomposition, ``ipiv`` is the pivot + vector, and ``l_and_u`` is the tuple ``(l, u)`` with the lower and upper + bandwidth of ``lub``. + For the Cholesky decomposition, this is a tuple ``(cb, lower)`` where ``cb`` + is the banded storage of the Cholesky decomposition and ``lower`` is a + boolean flag indicating whether the lower or upper triangular matrix is + stored. + For the Pentapy solver, this is ``None``. + + decomposition_type : BandedSolveDecompositions + The type of decomposition used to solve the linear system of equations. + + """ + + # the banded storage for a LAPACK LU decomposition is computed by updating the + # diagonal of the squared forward finite difference matrix D^T @ D with the + # weights + # NOTE: using the inverse of lambda is more efficient than using lambda directly + # since then it needs to be applied to the weights only rather than a + # possible large matrix + ab = np.exp(log_lam) * self.fw_fin_diff_mat_lu_banded_ + if w is not None: + ab[self._differences, ::] += w + else: + ab[self._differences, ::] += 1.0 + + # the linear system of equations is solved with the most efficient method + # Case 1: Pentapy can be used + if self._pentapy_enabled: + return ( + self._pentapy_solve(ab=ab, bw=bw), + None, + BandedSolveDecompositions.PENTAPY, + ) + + # Case 2: Pentapy cannot be used, but the matrix is NUMERICALLY positive + # definite + try: + x, decomposition = self._cholesky_solve( + ab=ab[0 : self._differences + 1], bw=bw + ) + return x, decomposition, BandedSolveDecompositions.CHOLESKY + + # Case 3: Pentapy cannot be used and the matrix is NOT NUMERICALLY positive + # definite + except np.linalg.LinAlgError: + x, decomposition = self._lu_solve(ab=ab, bw=bw) + return x, decomposition, BandedSolveDecompositions.LU + + # FIXME: this method is not yet used and needs to be tested + def _calc_neg_marginal_likelihood( + self, + x_orig: np.ndarray, + x_smooth: np.ndarray, + decomposition: _Decomposition, + solver: BandedSolveDecompositions, + log_lam: float, + w: np.ndarray | None, + w_logdet: float, + lml_sample_size_corr: float, + ) -> float: + """Computes the negative marginal likelihood of the Whittaker-like smooth.""" + + # running this method is only possible if the lambda parameter is fitted and + # the decomposition originates from a Cholesky decomposition + # TODO: maybe also allow LU decompositions + assert self.auto_lam_ and solver == BandedSolveDecompositions.CHOLESKY, ( + "The solution of the linear system could not be computed with a Cholesky " + "decomposition." + ) + + # now, the weighted sum of squared residuals (wRSS) is computed + if w is not None: + wrss = np.sum(w * np.square(x_orig - x_smooth)) + else: + wrss = np.sum(np.square(x_orig - x_smooth)) + + # then, the sum of squared penalties (SSP) is computed using the positive + # definite penalty matrix + ssp = np.exp(log_lam) * (x_smooth @ self.squ_fw_fin_diff_mat_ @ x_smooth) + + # besides, the log-determinant of the matrix 1/lambda * W + P which is simple + # because its Cholesky decomposition is already available + ww_plus_p_det_sign, ww_plus_p_log_det = slodget_cho_banded( + decomposition=decomposition # type: ignore + ) + assert ( + ww_plus_p_det_sign > 0.0 + ), "The matrix to invert was not positive definite." + + # finally, the reduced marginal likelihood is computed + return 0.5 * ( + wrss + + ssp + - w_logdet + - (self.series_size_ * log_lam + self.penalty_log_det_) + + ww_plus_p_log_det + + lml_sample_size_corr + ) + + # FIXME: this method is not yet used and needs to be tested + def opt_target_auto_lam( + self, + log_lam: float, + x: np.ndarray, + x_weighted: np.ndarray, + w: np.ndarray | None, + w_logdet: float, + lml_sample_size_corr: float, + ) -> float: + """Computes the target function to be minimised when fitting the lambda + parameter itself. + """ + + # first, the solution of the linear system of equations is computed + x_smooth, decomposition, solver = self._solve( + bw=x_weighted, log_lam=log_lam, w=w + ) + + # then, the reduced marginal likelihood is determined and returned + return self._calc_neg_marginal_likelihood( + x_orig=x, + x_smooth=x_smooth, + decomposition=decomposition, + solver=solver, + log_lam=log_lam, + w=w, + w_logdet=w_logdet, + lml_sample_size_corr=lml_sample_size_corr, + ) + + # FIXME: this method is not yet used and needs to be tested + def _solve_single_x_auto_lam( + self, + x: np.ndarray, + x_weighted: np.ndarray, + w: np.ndarray | None, + w_logdet: float, + num_nonzero_w: int, + ) -> tuple[np.ndarray, float]: + """Fits the lambda parameter itself by maximising the reduced marginal + likelihood. "Reduced" refers to the fact that only the terms that depend on the + smoothing parameter `lam` are considered. + + For the parameters, please refer to the documentation of ``solve``. Instead of + a 2D-Array, a 1D-Array is expected for ``x`` and ``w``. Besides, it expects + the product ``x * w`` to be passed as ``x_weighted`` since this is more + efficient than computing it inside the solver. + + Notes + ----- + The logarithm of the marginal likelihood is defined as + ``-0.5 * (wRSS + SSP - log(pdet(W)) - log(pdet(P)) + log(det(W + P)) + + (n_obs - diff) * log(2 * pi))`` where + + - `W` as the diagonal matrix of weights + - `P` is the penalty matrix + - `wRSS` is the weighted Sum of Squares Residuals between the original and the + smoothed series `x` and `x_smoothed` + (``(x - x_smoothed).T @ W @ (x - x_smoothed)``) + - `SSP` is the sum of squared penalties (``x_smoothed.T @ P @ x_smoothed``) + - `pdet` is the pseudo-determinant of a matrix (product of its non-zero + eigenvalues) + - `det` is the determinant of a matrix (product of its eigenvalues) + - `n_obs` is the number of observations with non-zero weights + -""" + + # the sample size correction summand for the marginal likelihood is computed + lml_sample_size_corr = (num_nonzero_w - self._differences) * LN_OF_TWO_PI + # the target function is minimised using the bounded Brent method + opt_res = minimize_scalar( + fun=self.opt_target_auto_lam, + bounds=self.__log_lam_bounds, + method="bounded", + args=( + x, + x_weighted, + w, + w_logdet, + lml_sample_size_corr, + ), + ) + assert opt_res.success, "The optimisation did not converge." + + # the solution of the linear system of equations is computed + x_smooth, _, _ = self._solve(bw=x_weighted, log_lam=opt_res.x, w=w) + + # finally, the solution and the lambda parameter are returned + return x_smooth, np.exp(opt_res.x) + + def _solve_single_x_fixed_lam( + self, + x: np.ndarray, + x_weighted: np.ndarray, + w: np.ndarray | None, + ) -> tuple[np.ndarray, float]: + """Fits the Whittaker-like smooth with a fixed lambda parameter. + + For the parameters, please refer to the documentation of ``solve``. Instead of + a 2D-Array, a 1D-Array is expected for ``x`` and ``w``. Besides, it expects + the product ``x * w`` to be passed as ``x_weighted`` since this is more + efficient than computing it inside the solver. + + """ + + # the solution of the linear system of equations is computed + x_smooth, _, _ = self._solve( + bw=x_weighted, + log_lam=np.log(self._lam), # type: ignore + w=w, + ) + + # finally, the solution is returned together with the lambda parameter + return x_smooth, self._lam # type: ignore + + def _solve_single_x( + self, + x: np.ndarray, + w: np.ndarray | None, + w_logdet: float, + num_nonzero_w: int, + ) -> tuple[np.ndarray, float]: + """Fits the Whittaker-like smooth to a single series for a fixed or fitted + lambda parameter. + + For the parameters, please refer to the documentation of ``solve``. Instead of + a 2D-Array, a 1D-Array is expected for ``x`` and ``w``. + + """ + + # first, the weighted series is computed + if w is not None: + x_weighted = w * x + else: + x_weighted = x + + # then, the solution of the linear system of equations is computed + if self.auto_lam_: + return self._solve_single_x_auto_lam( + x=x, + x_weighted=x_weighted, + w=w, + w_logdet=w_logdet, + num_nonzero_w=num_nonzero_w, + ) + else: + return self._solve_single_x_fixed_lam(x=x, x_weighted=x_weighted, w=w) + + def _solve_multiple_x( + self, + X: np.ndarray, + w: np.ndarray | None, + ) -> tuple[np.ndarray, np.ndarray]: + """Fits the Whittaker-like smooth to multiple series when the lambda parameter + is fixed and the same weights are applied to all series. + It leverages the ability of Pentapy and LAPACK to solve multiple linear systems + of equations at once from the same inversion. + + For the parameters, please refer to the documentation of ``solve``. + + """ + + # in this special case, the solution of the linear system of equations can be + # computed with a single matrix inversion + if w is not None: + x_weighted = np.transpose(w * X) + w_inter = w.ravel() + else: + x_weighted = np.transpose(X) + w_inter = w + + # then, the solution of the linear system of equations is computed + X_smooth, _, _ = self._solve( + bw=x_weighted, + log_lam=np.log(self._lam), # type: ignore + w=w_inter, + ) + + return ( + np.transpose(X_smooth), + np.full(shape=(X.shape[0],), fill_value=self._lam), # type: ignore + ) + + def _whittaker_solve( + self, + X: np.ndarray, + *, + w: np.ndarray | None = None, + use_same_w_for_all: bool = False, + ) -> tuple[np.ndarray, np.ndarray]: + """Solves the linear equations for Whittaker-Henderson smoothing. Internally it + chooses the most appropriate method and solver depending on the data + dimensionality, the weights, and the system's available packages (pentapy). + + Parameters + ---------- + X : np.ndarray of shape(n, m) + The series to be smoothed stored as individual rows. + w : np.ndarray of shape(1, m), shape(n, m), or None + The weights to be applied for smoothing. If only a single row is provided + and ``use_same_w_for_all``, the same weights can be applied for all series + in `X`, which enhances the smoothing a lot for fixed smoothing parameters + `lam`. + If ``None``, no weights are applied and each datapoint is assumed to have + equal importance, This allows for ``use_same_w_for_all`` to be ``True`` + as well. + use_same_w_for_all + Whether to use the same weights for all series in `X`. This is only possible + if `w` is a single row or ``None``. + + Returns + ------- + X_smooth : np.ndarray of shape(n, m) + The smoothed series stored as individual rows. + + lam : np.ndarray of shape(n,) + The lambda parameter used for the smoothing of each series. If `lam` was + fixed, this is a vector of length `n` with the same value for each series. + + """ + + # a nested function is defined for updating the weights + def update_to_next_weights(iter_i: int) -> None: + nonlocal w_curr, w_logdet_curr, num_nonzero_w_curr + if iter_i > 0: + if w is None: + w_curr = None + w_logdet_curr = 0.0 # for identity matrix + num_nonzero_w_curr = self.series_size_ + return + elif not use_same_w_for_all: + w_curr = w[iter_i, ::] + non_zero_idxs = ( + w_curr + >= np.finfo(w_curr.dtype).eps * w_curr.max() * w_curr.size + ) + w_logdet_curr = np.sum(np.log(w_curr[non_zero_idxs])) + num_nonzero_w_curr = np.sum(non_zero_idxs) + else: + return + + else: + if w is None: + w_curr = None + w_logdet_curr = 1.0 + num_nonzero_w_curr = self.series_size_ + else: + w_curr = w[iter_i, ::] + non_zero_idxs = ( + w_curr + >= np.finfo(w_curr.dtype).eps * w_curr.max() * w_curr.size + ) + w_logdet_curr = np.sum(np.log(w_curr[non_zero_idxs])) + + # if multiple x with the same weights are to be solved for fixed lambda, this + # can be done more efficiently by leveraging Pentapy's and LAPACK'S ability to + # perform multiple solves from the same inversion at once + if not self.auto_lam_ and use_same_w_for_all: + return self._solve_multiple_x(X=X, w=w) + # else nothing + + # otherwise, the solution of the linear system of equations is computed for + # each series + X_smooth = np.empty_like(X) + lam = np.empty(shape=(X.shape[0],)) + w_curr = None + w_logdet_curr = float("nan") + num_nonzero_w_curr = -1 + for iter_i, x in enumerate(X): + update_to_next_weights(iter_i=iter_i) + X_smooth[iter_i], lam[iter_i] = self._solve_single_x( + x=x, + w=w_curr, + w_logdet=w_logdet_curr, + num_nonzero_w=num_nonzero_w_curr, + ) + + return X_smooth, lam From c3dff4863c08cdd806a5cea17f80edccc217c6e2 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 18:05:26 +0100 Subject: [PATCH 010/118] feat: [44] unified whittaker-smoother-like methods with performance enhancement; extended tests; fixed type hints; black formatted; started lint fixes --- chemotools/baseline/_air_pls.py | 104 +++--- chemotools/baseline/_ar_pls.py | 114 ++++--- chemotools/smooth/_whittaker_smooth.py | 162 +++++++--- chemotools/utils/check_inputs.py | 45 +++ tests/fixtures.py | 51 ++- .../reference_finite_differences.csv | 25 ++ ..._whitakker.csv => reference_whittaker.csv} | 0 tests/test_for_utils.py | 305 ++++++++++++++++++ tests/test_functionality.py | 172 +++++++--- tests/test_sklearn_compliance.py | 20 +- 10 files changed, 817 insertions(+), 181 deletions(-) create mode 100644 tests/resources/reference_finite_differences.csv rename tests/resources/{reference_whitakker.csv => reference_whittaker.csv} (100%) create mode 100644 tests/test_for_utils.py diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 7e82cfac..3e55814e 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -1,16 +1,18 @@ import logging + import numpy as np -from scipy.sparse import csc_matrix, eye, diags -from scipy.sparse.linalg import spsolve -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted from chemotools.utils.check_inputs import check_input +from chemotools.utils.whittaker_base import WhittakerLikeSolver logger = logging.getLogger(__name__) -class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): +class AirPls( + OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver +): """ This class implements the AirPLS (Adaptive Iteratively Reweighted Penalized Least Squares) algorithm for baseline correction of spectra data. AirPLS is a common approach for removing the baseline from spectra, which can be useful @@ -18,7 +20,7 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): Parameters ---------- - lam : float, optional default=1e2 + lam : float or int, optional default=1e2 The lambda parameter controls the smoothness of the baseline. Increasing the value of lambda results in a smoother baseline. @@ -30,6 +32,11 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): The number of iterations used to calculate the baseline. Increasing the number of iterations can improve the accuracy of the baseline correction, but also increases the computation time. + rcond : float, default=1e-15 + The relative condition number which is used to keep all matrices involved + positive definite. This is not actively used at the moment. + It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. + Methods ------- fit(X, y=None) @@ -40,7 +47,7 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): _calculate_whittaker_smooth(x, w) Calculate the Whittaker smooth of a given input vector x, with weights w. - + _calculate_air_pls(x) Calculate the AirPLS baseline of a given input vector x. @@ -50,15 +57,18 @@ class AirPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): squares. Analyst 135 (5), 1138-1146 (2010). """ + # TODO: polynomial order is actually differences def __init__( self, - lam: int = 100, + lam: int | float = 100, polynomial_order: int = 1, nr_iterations: int = 15, + rcond: float = 1e-15, ): - self.lam = lam - self.polynomial_order = polynomial_order - self.nr_iterations = nr_iterations + self.lam: int | float = lam + self.polynomial_order: int = polynomial_order + self.nr_iterations: int = nr_iterations + self.rcond: float = rcond def fit(self, X: np.ndarray, y=None) -> "AirPls": """Fit the AirPls baseline correction estimator to the input data. @@ -77,7 +87,15 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": Returns the instance itself. """ # Check that X is a 2D array and has only finite values - X = self._validate_data(X) + X = BaseEstimator._validate_data(self, X, reset=True) # type: ignore + + # the internal solver is set up + self._setup_for_fit( + series_size=X.shape[1], + lam=self.lam, + differences=self.polynomial_order, + rcond=self.rcond, + ) return self @@ -106,50 +124,54 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: X_ = X.copy() # Check that the number of features is the same as the fitted data - if X_.shape[1] != self.n_features_in_: + # NOTE: ``n_features_in_`` is set in ``BaseEstimator._validate_data`` when + # ``reset`` is True + if X_.shape[1] != self.n_features_in_: # type: ignore raise ValueError( - f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" # type: ignore # noqa: E501 ) # Calculate the air pls smooth for i, x in enumerate(X_): X_[i] = x - self._calculate_air_pls(x) - return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - - def _calculate_whittaker_smooth(self, x, w): - X = np.array(x) - m = X.size - E = eye(m, format="csc") - for i in range(self.polynomial_order): - E = E[1:] - E[:-1] - W = diags(w, 0, shape=(m, m)) - A = csc_matrix(W + (self.lam * E.T @ E)) - B = csc_matrix(W @ X.T).toarray().ravel() - background = spsolve(A, B) - return np.array(background) + # FIXME: can this even happen because X is ensured to be 2D? + if X_.ndim == 1: + # FIXME: shouldn't this be a row and not a column vector because + # Scikit-Learn works with shape (n_samples, n_features), i.e., + # (1, n_features) for a single sample? + return X_.reshape((-1, 1)) + else: + return X_ def _calculate_air_pls(self, x): - m = x.shape[0] - w = np.ones(m) - - for i in range(1, self.nr_iterations): - z = self._calculate_whittaker_smooth(x, w) + # FIXME: this initial weighting strategy might not yield the best results + w = np.ones_like(x) + z = np.zeros_like(x) + dssn_thresh = 1e-3 * np.abs(x).sum() + + # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` + for i in range(0, self.nr_iterations - 1): + # the baseline is fitted using the Whittaker smoother framework + z = self._whittaker_solve(X=x, w=w, use_same_w_for_all=True)[0] d = x - z dssn = np.abs(d[d < 0].sum()) - if dssn < 0.001 * np.abs(x).sum(): - break - - if i == self.nr_iterations - 1: + # the algorithm is stopped if the threshold is reached + if dssn < dssn_thresh: break - w[d >= 0] = 0 - w[d < 0] = np.exp(i * np.abs(d[d < 0]) / dssn) - - negative_d = d[d < 0] - if negative_d.size > 0: - w[0] = np.exp(i * negative_d.max() / dssn) + # the weights are updated + below_base_indics = d < 0 + w[~below_base_indics] = 0.0 + exp_mult = i + 1 + w[below_base_indics] = np.exp(exp_mult * np.abs(d[d < 0]) / dssn) + + d_negative = d[below_base_indics] + if d_negative.size > 0: + # FIXME: this might easily yield a weight of 1 if the maximum of the + # negative_d is very close to zero + w[0] = np.exp(exp_mult * d_negative.max() / dssn) w[-1] = w[0] diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 064621dc..c128c732 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -1,18 +1,17 @@ import logging -import numpy as np -import scipy.sparse as sp -from scipy.sparse import spdiags, csc_matrix -from scipy.sparse.linalg import splu +from numbers import Integral -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin -from sklearn.utils.validation import check_is_fitted +import numpy as np +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin +from sklearn.utils.validation import check_is_fitted, check_scalar from chemotools.utils.check_inputs import check_input +from chemotools.utils.whittaker_base import WhittakerLikeSolver logger = logging.getLogger(__name__) -class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): +class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver): """ This class implements the Assymmetrically Reweighted Penalized Least Squares (ArPls) is a baseline correction method for spectroscopy data. It uses an iterative process @@ -20,7 +19,7 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): Parameters ---------- - lam : float, optional (default=1e4) + lam : float or int, optional (default=1e4) The penalty parameter for the difference matrix in the objective function. ratio : float, optional (default=0.01) @@ -29,6 +28,11 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): nr_iterations : int, optional (default=100) The maximum number of iterations for the weight updating scheme. + rcond : float, default=1e-15 + The relative condition number which is used to keep all matrices involved + positive definite. This is not actively used at the moment. + It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. + Methods ------- @@ -46,20 +50,24 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): References ---------- - - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo - Baseline correction using asymmetrically reweighted penalized + - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo + Baseline correction using asymmetrically reweighted penalized least squares smoothing """ def __init__( self, - lam: float = 1e4, + lam: float | int = 1e4, + differences: int = 2, ratio: float = 0.01, nr_iterations: int = 100, + rcond: float = 1e-15, ): - self.lam = lam - self.ratio = ratio - self.nr_iterations = nr_iterations + self.lam: float | int = lam + self.differences: int = differences + self.ratio: float = ratio + self.nr_iterations: int = nr_iterations + self.rcond: float = rcond def fit(self, X: np.ndarray, y=None) -> "ArPls": """Fit the estimator to the data. @@ -78,8 +86,28 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": Returns the instance itself. """ + # the constructor parameters are checked + check_scalar( + x=self.ratio, + name="ratio", + target_type=float, + min_val=1e-15, + max_val=1.0 - 1e-15, + ) + check_scalar( + x=self.nr_iterations, name="nr_iterations", target_type=Integral, min_val=1 + ) + # Check that X is a 2D array and has only finite values - X = self._validate_data(X) + X = BaseEstimator._validate_data(self, X, reset=True) # type: ignore + + # the internal solver is setup + self._setup_for_fit( + series_size=X.shape[1], + lam=self.lam, + differences=self.differences, + rcond=self.rcond, + ) return self @@ -108,42 +136,50 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: X_ = X.copy() # Check that the number of features is the same as the fitted data - if X_.shape[1] != self.n_features_in_: + # NOTE: ``n_features_in_`` is set in ``BaseEstimator._validate_data`` when + # ``reset`` is True + if X_.shape[1] != self.n_features_in_: # type: ignore raise ValueError( - f"Expected {self.n_features_in_} features but got {X_.shape[1]}" + f"Expected {self.n_features_in_} features but got {X_.shape[1]}" # type: ignore # noqa: E501 ) # Calculate the ar pls baseline for i, x in enumerate(X_): X_[i] = x - self._calculate_ar_pls(x) - return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - - def _calculate_diff(self, N): - I = sp.eye(N, format="csc") - D2 = sp.diags([1, -2, 1], [0, 1, 2], shape=(N - 2, N), format="csc") - return D2.dot(I).T + # FIXME: can this even happen because X is ensured to be 2D? + if X_.ndim == 1: + # FIXME: shouldn't this be a row and not a column vector because + # Scikit-Learn works with shape (n_samples, n_features), i.e., + # (1, n_features) for a single sample? + return X_.reshape((-1, 1)) + else: + return X_ def _calculate_ar_pls(self, x): - N = len(x) - D = self._calculate_diff(N) - H = self.lam * D.dot(D.T) - w = np.ones(N) - iteration = 0 - while iteration < self.nr_iterations: - W = spdiags(w, 0, N, N) - C = csc_matrix(W + H) - z = splu(C).solve(w * x) + # FIXME: this initial weighting strategy might not yield the best results + if self.ratio < 0.5: + w = np.ones_like(x) + else: + w = np.zeros_like(x) + + z = np.zeros_like(x) + # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` + for _ in range(self.nr_iterations): + # the baseline is fitted using the Whittaker smoother framework + z = self._whittaker_solve(X=x, w=w, use_same_w_for_all=True)[0] d = x - z - dn = d[d < 0] - if len(dn) == 0: + + # if there is no data point below the baseline, the baseline is considered + # to be fitted + d_negative = d[d < 0] + if len(d_negative) == 0: break - m = np.mean(dn) - s = np.std(dn) - exponent = np.clip(2 * (d - (2 * s - m)) / s, -709, 709) + m = np.mean(d_negative) + s = np.std(d_negative) + exponent = np.clip(2.0 * (d - (2.0 * s - m)) / s, -709, 709) # type: ignore wt = 1.0 / (1.0 + np.exp(exponent)) - if np.linalg.norm(w - wt) / np.linalg.norm(w) < self.ratio: + if np.linalg.norm(w - wt) / np.linalg.norm(w) < self.ratio: # type: ignore break w = wt - iteration += 1 return z diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 00ba7e3d..ae806544 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -1,46 +1,81 @@ -import numpy as np -from scipy.sparse import csc_matrix, eye, diags -from scipy.sparse.linalg import spsolve -from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin -from sklearn.utils.validation import check_is_fitted +""" +This module contains the ``WhittakerSmooth`` transformer, which performs smoothing on +data according to the Whittaker-Henderson formulation of Penalized Least Squares. + +References +---------- +It's based on the algorithms described in [1]_ and [2]_ where an implementational +adaption of [2]_ was required to make it numerically stable ([3]_). + +.. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 (2010). +.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023). +.. [3] https://math.stackexchange.com/q/4819039/1261538 -from chemotools.utils.check_inputs import check_input +""" + +from numpy import ndarray +from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin +from sklearn.utils.validation import check_is_fitted -# This code is adapted from the following source: -# Z.-M. Zhang, S. Chen, and Y.-Z. Liang, -# Baseline correction using adaptive iteratively reweighted penalized least squares. -# Analyst 135 (5), 1138-1146 (2010). +from chemotools.utils.check_inputs import check_input, check_weights +from chemotools.utils.whittaker_base import WhittakerLikeSolver -class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): +class WhittakerSmooth( + OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver +): """ - A transformer that calculates the Whittaker smooth of the input data. + A transformer that performs smoothing on data according to the Whittaker-Henderson + formulation of Penalized Least Squares. Parameters ---------- - lam : float, optional - The lambda parameter to use for the Whittaker smooth. Default is 1e2. + lam : float or int, default=1e2 + The lambda parameter to use for the Whittaker smooth. - differences : int, optional - The number of differences to use for the Whittaker smooth. Default is 1. + differences : int, default=1 + The number of differences to use for the Whittaker smooth. If the aim is to + obtain a smooth estimate of the `m`-th order derivative, this should be set to + at least ``m + 2``. + + rcond : float, default=1e-15 + The relative condition number which is used to keep all matrices involved + positive definite. This is not actively used at the moment. + It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. + + Attributes + ---------- + n_features_in_ : int + The number of features in the input data. + + _is_fitted : bool + Whether the transformer has been fitted to data. Methods ------- fit(X, y=None) Fit the transformer to the input data. - transform(X, y=0, copy=True) - Transform the input data by calculating the Whittaker smooth. + transform(X, y=None, sample_weight=None) + Transform the input data by calculating the (weighted) Whittaker smooth. + + fit_transform(X, y=None, sample_weight=None) + Fit the transformer to the input data and transform it. + """ + def __init__( self, - lam: float = 1e2, + lam: int | float = 1e2, differences: int = 1, + rcond: float = 1e-15, ): self.lam = lam self.differences = differences + self.rcond = rcond - def fit(self, X: np.ndarray, y=None) -> "WhittakerSmooth": + def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": """ Fit the transformer to the input data. @@ -56,13 +91,32 @@ def fit(self, X: np.ndarray, y=None) -> "WhittakerSmooth": ------- self : WhittakerSmooth The fitted transformer. + """ # Check that X is a 2D array and has only finite values - X = self._validate_data(X) + X = check_input(X) + + # Set the number of features ... + self.n_features_in_ = X.shape[1] + # ... and all the required attributes for fitting + self._setup_for_fit( + series_size=self.n_features_in_, + lam=self.lam, + differences=self.differences, + rcond=self.rcond, + ) + + # Set the fitted attribute to True + self._is_fitted = True return self - def transform(self, X: np.ndarray, y=None) -> np.ndarray: + def transform( + self, + X: ndarray, + y: None = None, + sample_weight: ndarray | None = None, + ) -> ndarray: """ Transform the input data by calculating the Whittaker smooth. @@ -74,13 +128,20 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: y : None Ignored. + sample_weight : np.ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None + Individual weights for each of the input data. If only 1 weight vector is + provided, it is assumed to be the same for the features all samples. + If ``None``, all features are assumed to have the same weight. + Returns ------- - X_ : np.ndarray of shape (n_samples, n_features) + X_smoothed : np.ndarray of shape (n_samples, n_features) The transformed data. - """ + + """ # noqa: E501 + # Check that the estimator is fitted - check_is_fitted(self, "n_features_in_") + check_is_fitted(self, "_is_fitted") # Check that X is a 2D array and has only finite values X = check_input(X) @@ -92,21 +153,38 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: f"Expected {self.n_features_in_} features but got {X_.shape[1]}" ) + # Check the weights + sample_weight_checked, use_same_w_for_all = check_weights( + weights=sample_weight, n_samples=X_.shape[0], n_features=X_.shape[1] + ) + # Calculate the whittaker smooth - for i, x in enumerate(X_): - X_[i] = self._calculate_whittaker_smooth(x) - - return X_.reshape(-1, 1) if X_.ndim == 1 else X_ - - def _calculate_whittaker_smooth(self, x): - X = np.array(x) - m = X.size - E = eye(m, format="csc") - w = np.ones(m) - for i in range(self.differences): - E = E[1:] - E[:-1] - W = diags(w, 0, shape=(m, m)) - A = csc_matrix(W + (self.lam * E.T @ E)) - B = csc_matrix(W @ X.T).toarray().ravel() - background = spsolve(A, B) - return np.array(background) + return self._whittaker_solve( + X=X_, w=sample_weight_checked, use_same_w_for_all=use_same_w_for_all + )[0] + + def fit_transform( + self, X: ndarray, y: None = None, sample_weight: ndarray | None = None + ) -> ndarray: + """Fit the transformer to the input data and transform it. + + Parameters + ---------- + X : np.ndarray of shape (n_samples, n_features) + The input data to fit and transform. + + y : None + Ignored. + + sample_weight : np.ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None + Individual weights for each of the input data. If only 1 weight vector is + provided, it is assumed to be the same for the features all samples. + If ``None``, all features are assumed to have the same weight. + + Returns + ------- + X_smoothed : np.ndarray of shape (n_samples, n_features) + The transformed data. + """ # noqa: E501 + + return self.fit(X=X).transform(X=X, sample_weight=sample_weight) diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index 88b28293..dbc49ad4 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -1,3 +1,4 @@ +import numpy as np from sklearn.utils.validation import check_array @@ -12,3 +13,47 @@ def check_input(X, y=None): if len(y) != X.shape[0]: raise ValueError("y must have the same number of samples as X") return X + + +def check_weights( + weights: np.ndarray | None, n_samples: int, n_features: int +) -> tuple[np.ndarray | None, bool]: + # if the weights are None, return None and a flag that the same weights should be + # applied for all samples + if weights is None: + return None, True + # else nothing + + # if the weights are an effectively 1D-array, make them a 2D-array + if weights.ndim == 1 or (weights.ndim == 2 and weights.shape[0] == 1): + weights_checked = weights.reshape((1, -1)) + else: + weights_checked = weights + # else nothing + + # now, the need to be checked for having the right shape + weights_checked = check_array( + weights_checked, ensure_2d=True, force_all_finite=True + ) + + # afterwards, they are checked for having the right shape + if weights_checked.shape[0] not in {1, n_samples}: + raise ValueError( + f"\nWeights must have either 1 or {n_samples} rows, but they have " + f"{weights_checked.shape[0]} rows." + ) + elif weights_checked.shape[1] != n_features: + raise ValueError( + f"\nWeights must have {n_features} columns, but they have " + f"{weights_checked.shape[1]} columns." + ) + # else nothing + + # finally, it is checked whether the weights are all non-negative + if not np.all(weights_checked >= 0.0): + raise ValueError("\nWeights must be non-negative.") + # else nothing + + # the weights are returned together with a flag whether to apply the same weights + # for all samples or not + return weights_checked, weights_checked.shape[0] == 1 diff --git a/tests/fixtures.py b/tests/fixtures.py index 54f59a25..f1ebcba0 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,7 +1,7 @@ -import numpy as np import os -import pytest +import numpy as np +import pytest test_directory = os.path.dirname(os.path.abspath(__file__)) @@ -9,7 +9,7 @@ @pytest.fixture -def spectrum() -> np.ndarray: +def spectrum() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "spectrum.csv"), delimiter="," @@ -18,7 +18,7 @@ def spectrum() -> np.ndarray: @pytest.fixture -def spectrum_arpls() -> np.ndarray: +def spectrum_arpls() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "spectrum_arpls.csv"), delimiter="," @@ -27,7 +27,7 @@ def spectrum_arpls() -> np.ndarray: @pytest.fixture -def reference_airpls() -> np.ndarray: +def reference_airpls() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "reference_airpls.csv"), delimiter="," @@ -36,7 +36,7 @@ def reference_airpls() -> np.ndarray: @pytest.fixture -def reference_arpls() -> np.ndarray: +def reference_arpls() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "reference_arpls.csv"), delimiter="," @@ -45,7 +45,7 @@ def reference_arpls() -> np.ndarray: @pytest.fixture -def reference_msc_mean() -> np.ndarray: +def reference_msc_mean() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "reference_msc_mean.csv"), delimiter="," @@ -54,7 +54,7 @@ def reference_msc_mean() -> np.ndarray: @pytest.fixture -def reference_msc_median() -> np.ndarray: +def reference_msc_median() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "reference_msc_median.csv"), delimiter="," @@ -63,7 +63,7 @@ def reference_msc_median() -> np.ndarray: @pytest.fixture -def reference_sg_15_2() -> np.ndarray: +def reference_sg_15_2() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "reference_sg_15_2.csv"), delimiter="," @@ -72,7 +72,7 @@ def reference_sg_15_2() -> np.ndarray: @pytest.fixture -def reference_snv() -> np.ndarray: +def reference_snv() -> list[np.ndarray]: return [ np.loadtxt( os.path.join(path_to_resources, "reference_snv.csv"), delimiter="," @@ -81,9 +81,36 @@ def reference_snv() -> np.ndarray: @pytest.fixture -def reference_whitakker() -> np.ndarray: +def reference_whittaker() -> list[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_whitakker.csv"), delimiter="," + os.path.join(path_to_resources, "reference_whittaker.csv"), delimiter="," ).tolist() ] + + +@pytest.fixture +def reference_finite_differences() -> list[tuple[int, int, np.ndarray]]: + fin_diff_table = np.genfromtxt( + os.path.join(path_to_resources, "reference_finite_differences.csv"), + skip_header=2, + delimiter=",", + missing_values="#N/A", + filling_values=np.nan, + dtype=np.float64, + ) + fin_diff_ordered_coeffs = [] + for row_idx in range(0, fin_diff_table.shape[0]): + # the first column is the difference order, the second column is the accuracy, + # and the remaining columns are the coefficients where the trailing NaNs are + # removed + row = fin_diff_table[row_idx, ::] + fin_diff_ordered_coeffs.append( + ( + int(row[0]), + int(row[1]), + row[2:][~np.isnan(row[2:])], + ) + ) + + return fin_diff_ordered_coeffs diff --git a/tests/resources/reference_finite_differences.csv b/tests/resources/reference_finite_differences.csv new file mode 100644 index 00000000..497a3c31 --- /dev/null +++ b/tests/resources/reference_finite_differences.csv @@ -0,0 +1,25 @@ +From https://en.wikipedia.org/wiki/Finite_difference_coefficient#Forward_finite_difference,,,,,,,,,, +Difference Order,Accuracy,0,1,2,3,4,5,6,7,8 +1,1,-1,1,,,,,,, +1,2,-1.5,2,-0.5,,,,,, +1,3,-1.833333333,3,-1.5,0.333333333,,,,, +1,4,-2.083333333,4,-3,1.333333333,-0.25,,,, +1,5,-2.283333333,5,-5,3.333333333,-1.25,0.2,,, +1,6,-2.45,6,-7.5,6.666666667,-3.75,1.2,-0.166666667,, +2,1,1,-2,1,,,,,, +2,2,2,-5,4,-1,,,,, +2,3,2.916666667,-8.666666667,9.5,-4.666666667,0.916666667,,,, +2,4,3.75,-12.83333333,17.83333333,-13,5.083333333,-0.833333333,,, +2,5,4.511111111,-17.4,29.25,-28.22222222,16.5,-5.4,0.761111111,, +2,6,5.211111111,-22.3,43.95,-52.72222222,41,-20.1,5.661111111,-0.7, +3,1,-1,3,-3,1,,,,, +3,2,-2.5,9,-12,7,-1.5,,,, +3,3,-4.25,17.75,-29.5,24.5,-10.25,1.75,,, +3,4,-6.125,29,-57.625,62,-38.375,13,-1.875,, +3,5,-8.058333333,42.53333333,-98.225,129.6666667,-106.0416667,53.6,-15.40833333,1.933333333, +3,6,-10.0125,58.16666667,-152.9416667,239.1,-242.8333333,163.0333333,-70.125,17.56666667,-1.954166667 +4,1,1,-4,6,-4,1,,,, +4,2,3,-14,26,-24,11,-2,,, +4,3,5.833333333,-31,68.5,-80.66666667,53.5,-19,2.833333333,, +4,4,9.333333333,-55.5,142,-203.1666667,176,-92.5,27.33333333,-3.5, +4,5,13.3625,-87.73333333,254.8166667,-428.8,458.0416667,-318.1333333,140.15,-35.73333333,4.029166667 diff --git a/tests/resources/reference_whitakker.csv b/tests/resources/reference_whittaker.csv similarity index 100% rename from tests/resources/reference_whitakker.csv rename to tests/resources/reference_whittaker.csv diff --git a/tests/test_for_utils.py b/tests/test_for_utils.py new file mode 100644 index 00000000..6065bc47 --- /dev/null +++ b/tests/test_for_utils.py @@ -0,0 +1,305 @@ +import numpy as np +import pytest +from scipy.linalg import cholesky_banded +from scipy.sparse import eye as speye + +from chemotools.utils.banded_linalg import ( + _find_largest_symm_sparse_banded_spd_eigval, + _find_smallest_symm_sparse_banded_spd_eigval, + conv_to_lu_banded_storage, + lu_banded, + lu_solve_banded, + slodget_cho_banded, + slogdet_lu_banded, +) +from chemotools.utils.finite_differences import ( + calc_forward_diff_kernel, + forward_finite_diff_conv_matrix, +) + +# from chemotools.utils.whittaker_base import WhittakerLikeSolver +from tests.fixtures import reference_finite_differences # noqa: F401 + + +def test_forward_diff_kernel( + reference_finite_differences: list[tuple[int, int, np.ndarray]] # noqa: F811 +) -> None: + # Arrange + for differences, accuracy, reference in reference_finite_differences: + # Act + kernel = calc_forward_diff_kernel(differences=differences, accuracy=accuracy) + + # Assert + assert kernel.size == reference.size, ( + f"Difference order {differences} with accuracy {accuracy} " + f"expected kernel size {reference.size} but got {kernel.size}" + ) + assert np.allclose(kernel, reference, atol=1e-8), ( + f"Difference order {differences} with accuracy {accuracy} " + f"expected kernel\n{reference.tolist()}\n" + f"but got\n{kernel.tolist()}" + ) + + +@pytest.mark.parametrize("accuracy", list(range(1, 21))) +@pytest.mark.parametrize("difference", list(range(0, 21))) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +def test_forward_finite_diff_conv_matrix( + size: int, difference: int, accuracy: int +) -> None: + """Tests the generated convolution matrix for forward finite differences by + comparing it to NumPy's ``convolve``. + """ + + # the test is skipped if the kernel is too large + if difference + accuracy > size: + pytest.skip( + f"Test skipped because the kernel size {difference + 1} is larger than the " + f"series size {size}." + ) + # else nothing + + # the random signal is generated + np.random.seed(seed=42) + series = np.random.rand(size) + + # the kernel is computed ... + kernel = calc_forward_diff_kernel(differences=difference, accuracy=accuracy) + # ... and the random series is convolved with the kernel ... + # NOTE: the kernel is flipped because of the way NumPy's convolve works + numpy_convolved_series = np.convolve(series, np.flip(kernel), mode="valid") + + # the convolution matrix is computed ... + conv_matrix = forward_finite_diff_conv_matrix( + differences=difference, accuracy=accuracy, series_size=series.size + ) + # ... and the series is convolved with the convolution matrix + matrix_convolved_series = conv_matrix @ series + + # the actual test is performed + assert np.allclose(matrix_convolved_series, numpy_convolved_series), ( + f"Differences by matrix product for Difference order {difference} with " + f"accuracy {accuracy} for series of size {size} failed." + ) + + +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("difference", list(range(0, 11))) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +def test_stepwise_lu_banded_solve( + size: int, difference: int, with_finite_check: bool +) -> None: + """Tests the LU decomposition of a banded matrix by comparing the solution of the + linear systems involved in Whittaker smoothing with the solution obtained by NumPy's + ``solve``. + It gets ill-condition for ``differences`` >> 10, but this is not the intended use + case. + """ + + # the test is skipped if the kernel is too large + if difference + 1 > size: + pytest.skip( + f"Test skipped because the kernel size {difference + 1} is larger than the " + f"series size {size}." + ) + # else nothing + + # a random right hand side vector is generated + np.random.seed(seed=42) + b = np.random.rand( + size, + ) + + # a finite difference matrix is generated with an updated diagonal to + # ensure positive definiteness + l_and_u = (difference, difference) + d = forward_finite_diff_conv_matrix( + differences=difference, accuracy=1, series_size=size + ) + a = d.T @ d + speye(size) + + # it is converted to LU banded storage ... + ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u) + # ... its LU decomposition is computed ... + lub, ipiv = lu_banded( + l_and_u=l_and_u, + ab=ab, + overwrite_ab=False, + check_finite=with_finite_check, + ) + # ... and the linear system is solved + x = lu_solve_banded( + decomposition=(lub, ipiv, l_and_u), + b=b, + check_finite=with_finite_check, + ) + + # the solution is compared to the solution obtained by NumPy's + # solve + np_x = np.linalg.solve(a=a.toarray(), b=b) + + assert np.allclose(x, np_x), ( + f"Banded LU decomposition for matrix of size {size} with {difference} sub- and " + f"superdiagonals failed." + ) + + +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("difference", list(range(0, 11))) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +def test_lu_banded_slogdet(size: int, difference: int, with_finite_check: bool) -> None: + """Tests the computation of the sign and log determinant of a banded matrix from + its LU decomposition by comparing it to NumPy's ``slogdet``. + """ + + # the test is skipped if the kernel is too large + if difference + 1 > size: + pytest.skip( + f"Test skipped because the kernel size {difference + 1} is larger than the " + f"series size {size}." + ) + # else nothing + + # a finite difference matrix is generated with an updated diagonal to ensure + # positive definiteness + l_and_u = (difference, difference) + d = forward_finite_diff_conv_matrix( + differences=difference, accuracy=1, series_size=size + ) + a = d.T @ d + speye(size) + + # it is converted to LU banded storage ... + ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u) + # ... its LU decomposition is computed ... + lub, ipiv = lu_banded( + l_and_u=l_and_u, + ab=ab, + overwrite_ab=False, + check_finite=with_finite_check, + ) + # ... and the sign and log determinant are determined + sign, logabsdet = slogdet_lu_banded( + decomposition=(l_and_u, lub, ipiv), + ) + + # the sign and log determinant are compared to the values obtained by NumPy's + # slogdet + np_sign, np_logabsdet = np.linalg.slogdet(a=a.toarray()) # type: ignore + + assert np_sign > 0, ( + f"Sign of log determinant for matrix of size {size} with {difference} sub- and " + f"superdiagonals failed." + ) + + assert np.isclose(sign, np_sign), ( + f"Sign of log determinant for matrix of size {size} with {difference} sub- and " + f"superdiagonals failed." + ) + assert np.isclose(logabsdet, np_logabsdet), ( + f"Log determinant for matrix of size {size} with {difference} sub- and " + f"superdiagonals failed." + ) + + +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("difference", list(range(0, 11))) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +def test_cho_banded_slogdet( + size: int, difference: int, with_finite_check: bool +) -> None: + """Tests the computation of the sign and log determinant of a banded matrix from + its Cholesky decomposition by comparing it to NumPy's ``slogdet``. + """ + + # the test is skipped if the kernel is too large + if difference + 1 > size: + pytest.skip( + f"Test skipped because the kernel size {difference + 1} is larger than the " + f"series size {size}." + ) + + # a finite difference matrix is generated with an updated diagonal to + # ensure positive definiteness + l_and_u = (difference, difference) + d = forward_finite_diff_conv_matrix( + differences=difference, accuracy=1, series_size=size + ) + a = d.T @ d + speye(size) + + # it is converted to LU banded storage ... + ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u) + # ... its Cholesky decomposition is computed ... + lower = False + chob = cholesky_banded(ab=ab[0 : difference + 1, ::], lower=lower) + # ... and the sign and log determinant are determined + sign, logabsdet = slodget_cho_banded(decomposition=(chob, lower)) + + # the sign and log determinant are compared to the values obtained by + # NumPy's slogdet + np_sign, np_logabsdet = np.linalg.slogdet(a=a.toarray()) # type: ignore + + assert np.isclose(sign, np_sign), ( + f"Sign of log determinant for matrix of size {size} with {difference} sub- and " + f"superdiagonals failed." + ) + assert np.isclose(logabsdet, np_logabsdet), ( + f"Log determinant for matrix of size {size} with {difference} sub- and " + f"superdiagonals failed." + ) + + +# FIXME: this test takes forever and is currently not even required, so the differences +# screened was limited a lot +@pytest.mark.parametrize("with_finite_check", [True, False]) +# @pytest.mark.parametrize("difference", list(range(0, 11))) +@pytest.mark.parametrize("difference", [0, 1, 2]) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +def test_largest_smallest_eigval_of_spbanded( + size: int, difference: int, with_finite_check: bool +) -> None: + """Tests the computation of the largest and smallest eigenvalues of a symmetric + mathematically positive semi-definite banded matrix by comparing it to NumPy's + ``eigvalsh``. Squared finite difference matrices are used for this test since they + are symmetric and mathematically positive semi-definite. + """ + + # the test is skipped if the kernel is too large + if difference + 1 > size: + pytest.skip( + f"Test skipped because the kernel size {difference + 1} is larger than the " + f"series size {size}." + ) + + # a finite difference matrix is generated and squared + l_and_u = (difference, difference) + d = forward_finite_diff_conv_matrix( + differences=difference, accuracy=1, series_size=size + ) + a = d.T @ d + ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u)[0 : difference + 1, ::] + + # now, its largest and smallest eigenvalues are computed ... + max_eigval = _find_largest_symm_sparse_banded_spd_eigval( + ab=ab, check_finite=with_finite_check + ) + min_eigval = _find_smallest_symm_sparse_banded_spd_eigval( + ab=ab, check_finite=with_finite_check + ) + + # ... and compared to the values obtained by NumPy's ``eigvalsh`` + np_eigvals = np.linalg.eigvalsh(a=a.toarray()) + np_max_eigval = np_eigvals.max() + np_min_eigval = np_eigvals.min() + + assert np.isclose(max_eigval, np_max_eigval), ( + f"Largest eigenvalue for matrix of size {size} with {difference} " + f"sub- and superdiagonals failed. " + f"Chemotools solution: {max_eigval} vs." + f"NumPy's solution: {np_max_eigval}" + ) + assert np.isclose(min_eigval, np_min_eigval), ( + f"Smallest eigenvalue for matrix of size {size} with {difference} " + f"sub- and superdiagonals failed. " + f"Chemotools solution {min_eigval} vs." + f"NumPy's solution {np_min_eigval}" + ) diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 653f5e28..cccab74a 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -4,13 +4,12 @@ from chemotools.augmentation import ( BaselineShift, - ExponentialNoise, + ExponentialNoise, IndexShift, - NormalNoise, + NormalNoise, SpectrumScale, UniformNoise, ) - from chemotools.baseline import ( AirPls, ArPls, @@ -20,6 +19,7 @@ SubtractReference, ) from chemotools.derivative import NorrisWilliams, SavitzkyGolay +from chemotools.feature_selection import IndexSelector, RangeCut from chemotools.scale import MinMaxScaler, NormScaler, PointScaler from chemotools.scatter import ( ExtendedMultiplicativeScatterCorrection, @@ -28,23 +28,20 @@ StandardNormalVariate, ) from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth -from chemotools.feature_selection import IndexSelector, RangeCut -from tests.fixtures import ( - spectrum, - spectrum_arpls, - reference_airpls, - reference_arpls, - reference_msc_mean, - reference_msc_median, - reference_sg_15_2, - reference_snv, - reference_whitakker, -) +from tests.fixtures import reference_airpls # noqa: F401 +from tests.fixtures import reference_arpls # noqa: F401 +from tests.fixtures import reference_msc_mean # noqa: F401 +from tests.fixtures import reference_msc_median # noqa: F401 +from tests.fixtures import reference_sg_15_2 # noqa: F401 +from tests.fixtures import reference_snv # noqa: F401 +from tests.fixtures import reference_whittaker # noqa: F401 +from tests.fixtures import spectrum_arpls # noqa: F401 +from tests.fixtures import spectrum -def test_air_pls(spectrum, reference_airpls): +def test_air_pls_single_signal(spectrum, reference_airpls): # noqa: F811 # Arrange - air_pls = AirPls() + air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) # Act spectrum_corrected = air_pls.fit_transform(spectrum) @@ -53,9 +50,23 @@ def test_air_pls(spectrum, reference_airpls): assert np.allclose(spectrum_corrected[0], reference_airpls[0], atol=1e-7) -def test_ar_pls(spectrum_arpls, reference_arpls): +def test_air_pls_multi_signals(spectrum, reference_airpls): # noqa: F811 + # Arrange + reps = (5, 1) + air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) + + # Act + spectrum_corrected = air_pls.fit_transform(X=np.tile(spectrum, reps=reps)) + + # Assert + assert np.allclose( + spectrum_corrected, np.tile(reference_airpls[0], reps=reps), atol=1e-7 + ) + + +def test_ar_pls(spectrum_arpls, reference_arpls): # noqa: F811 # Arrange - arpls = ArPls(1e2, 0.0001) + arpls = ArPls(lam=1e2, differences=2, ratio=0.0001) reference = np.array(spectrum_arpls) - np.array(reference_arpls) # Act @@ -65,6 +76,19 @@ def test_ar_pls(spectrum_arpls, reference_arpls): assert np.allclose(spectrum_corrected[0], reference[0], atol=1e-4) +def test_ar_pls_multi_signals(spectrum_arpls, reference_arpls): # noqa: F811 + # Arrange + reps = (5, 1) + arpls = ArPls(lam=1e2, differences=2, ratio=0.0001) + reference = np.array(spectrum_arpls) - np.array(reference_arpls) + + # Act + spectrum_corrected = arpls.fit_transform(X=np.tile(spectrum_arpls, reps=reps)) + + # Assert + assert np.allclose(spectrum_corrected, np.tile(reference[0], reps=reps), atol=1e-4) + + def test_baseline_shift(): # Arrange spectrum = np.ones(100).reshape(1, -1) @@ -75,9 +99,11 @@ def test_baseline_shift(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.mean(spectrum_corrected[0]) > np.mean(spectrum[0]) + assert np.mean(spectrum_corrected[0]) > np.mean(spectrum[0]) assert np.isclose(np.std(spectrum_corrected[0]), 0.0, atol=1e-8) - assert np.isclose(np.mean(spectrum_corrected[0]) - np.mean(spectrum[0]), 0.77395605, atol=1e-8) + assert np.isclose( + np.mean(spectrum_corrected[0]) - np.mean(spectrum[0]), 0.77395605, atol=1e-8 + ) def test_constant_baseline_correction(): @@ -119,8 +145,7 @@ def test_exponential_noise(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.allclose(np.mean(spectrum_corrected[0])-1, 0.1, atol=1e-2) - + assert np.allclose(np.mean(spectrum_corrected[0]) - 1, 0.1, atol=1e-2) def test_extended_baseline_correction(): @@ -166,7 +191,6 @@ def test_extended_baseline_correction_with_no_reference(): emsc.fit_transform(spectrum) - def test_extended_baseline_correction_with_wrong_reference(): # Arrange spectrum = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]).reshape( @@ -221,7 +245,7 @@ def test_extended_baseline_correction_through_msc(spectrum): def test_extended_baseline_correction_through_msc_median(spectrum): - # EMSC of 0 order should be equivalient to MSC + # EMSC of 0 order should be equivalent to MSC # Arrange msc = MultiplicativeScatterCorrection(use_median=True) emsc = ExtendedMultiplicativeScatterCorrection(order=0, use_median=True) @@ -232,7 +256,6 @@ def test_extended_baseline_correction_through_msc_median(spectrum): # Assert assert np.allclose(spectrum_emsc[0], spectrum_msc, atol=1e-8) - def test_index_selector(): @@ -279,13 +302,16 @@ def test_index_selector_with_wavenumbers(): def test_index_selector_with_wavenumbers_and_dataframe(): # Arrange wavenumbers = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]) - spectrum = pd.DataFrame(np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]])) + spectrum = pd.DataFrame( + np.array([[1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 21.0, 34.0, 55.0, 89.0]]) + ) + # FIXME: this is not used expected = np.array([[1.0, 2.0, 3.0, 34.0, 55.0, 89.0]]) # Act select_features = IndexSelector( features=np.array([1, 2, 3, 8, 9, 10]), wavenumbers=wavenumbers - ).set_output(transform='pandas') + ).set_output(transform="pandas") spectrum_corrected = select_features.fit_transform(spectrum) @@ -523,7 +549,7 @@ def test_normal_noise(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.allclose(np.mean(spectrum_corrected[0])-1, 0, atol=1e-2) + assert np.allclose(np.mean(spectrum_corrected[0]) - 1, 0, atol=1e-2) assert np.allclose(np.std(spectrum_corrected[0]), 0.5, atol=1e-2) @@ -628,7 +654,9 @@ def test_range_cut_by_wavenumber_with_dataframe(): # Arrange wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] spectrum = pd.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]])) - range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='pandas') + range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output( + transform="pandas" + ) # Act spectrum_corrected = range_cut.fit_transform(spectrum) @@ -747,16 +775,88 @@ def test_uniform_noise(): # Assert assert spectrum.shape == spectrum_corrected.shape - assert np.allclose(np.mean(spectrum_corrected[0])-1, 0, atol=1e-2) - assert np.allclose(np.std(spectrum_corrected[0]), np.sqrt(1/3), atol=1e-2) + assert np.allclose(np.mean(spectrum_corrected[0]) - 1, 0, atol=1e-2) + assert np.allclose(np.std(spectrum_corrected[0]), np.sqrt(1 / 3), atol=1e-2) -def test_whitakker_smooth(spectrum, reference_whitakker): +def test_whittaker_smooth_single_signal_no_weights( + spectrum, reference_whittaker # noqa: F811 +): # Arrange - whitakker_smooth = WhittakerSmooth() + whittaker_smooth = WhittakerSmooth() # Act - spectrum_corrected = whitakker_smooth.fit_transform(spectrum) + spectrum_corrected = whittaker_smooth.fit_transform(X=spectrum) # Assert - assert np.allclose(spectrum_corrected[0], reference_whitakker[0], atol=1e-8) + assert np.allclose(spectrum_corrected[0], reference_whittaker[0], atol=1e-8) + + +def test_whittaker_smooth_multi_signals_no_weights( + spectrum, reference_whittaker # noqa: F811 # +): + # Arrange + reps = (5, 1) + whittaker_smooth = WhittakerSmooth() + + # Act + spectrum_corrected = whittaker_smooth.fit_transform(X=np.tile(spectrum, reps=reps)) + + # Assert + assert np.allclose( + spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 + ) + + +def test_whittaker_smooth_single_signal_with_weights( + spectrum, reference_whittaker # noqa: F811 +): + # Arrange + weights = np.ones(shape=(len(spectrum[0]),)) + whittaker_smooth = WhittakerSmooth() + + # Act + spectrum_corrected = whittaker_smooth.fit_transform( + X=spectrum, sample_weight=weights + ) + + # Assert + assert np.allclose(spectrum_corrected[0], reference_whittaker[0], atol=1e-8) + + +def test_whittaker_smooth_multi_signals_single_weights( + spectrum, reference_whittaker # noqa: F811 +): + # Arrange + weights = np.ones(shape=(len(spectrum[0]),)) + reps = (5, 1) + whittaker_smooth = WhittakerSmooth() + + # Act + spectrum_corrected = whittaker_smooth.fit_transform( + X=np.tile(spectrum, reps=reps), sample_weight=weights + ) + + # Assert + assert np.allclose( + spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 + ) + + +def test_whittaker_smooth_multi_signals_multi_weights( + spectrum, reference_whittaker # noqa: F811 +): + # Arrange + weights = np.ones(shape=(5, len(spectrum[0]))) + reps = (weights.shape[0], 1) + whittaker_smooth = WhittakerSmooth() + + # Act + spectrum_corrected = whittaker_smooth.fit_transform( + X=np.tile(spectrum, reps=reps), sample_weight=weights + ) + + # Assert + assert np.allclose( + spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 + ) diff --git a/tests/test_sklearn_compliance.py b/tests/test_sklearn_compliance.py index a4a192b4..b3accf67 100644 --- a/tests/test_sklearn_compliance.py +++ b/tests/test_sklearn_compliance.py @@ -2,13 +2,12 @@ from chemotools.augmentation import ( BaselineShift, - ExponentialNoise, - NormalNoise, + ExponentialNoise, IndexShift, - SpectrumScale, + NormalNoise, + SpectrumScale, UniformNoise, ) - from chemotools.baseline import ( AirPls, ArPls, @@ -20,6 +19,7 @@ SubtractReference, ) from chemotools.derivative import NorrisWilliams, SavitzkyGolay +from chemotools.feature_selection import IndexSelector, RangeCut from chemotools.scale import MinMaxScaler, NormScaler, PointScaler from chemotools.scatter import ( ExtendedMultiplicativeScatterCorrection, @@ -33,9 +33,6 @@ SavitzkyGolayFilter, WhittakerSmooth, ) -from chemotools.feature_selection import RangeCut, IndexSelector - -from tests.fixtures import spectrum # AirPls @@ -49,7 +46,7 @@ def test_compliance_air_pls(): # ArPls def test_compliance_ar_pls(): # Arrange - transformer = ArPls() + transformer = ArPls(differences=1) # Act & Assert check_estimator(transformer) @@ -60,7 +57,7 @@ def test_compliance_baseline_shift(): transformer = BaselineShift() # Act & Assert check_estimator(transformer) - + # ConstantBaselineCorrection def test_compliance_constant_baseline_correction(): @@ -91,7 +88,7 @@ def test_compliance_extended_multiplicative_scatter_correction(): # Arrange transformer = ExtendedMultiplicativeScatterCorrection() # Act & Assert - check_estimator(transformer) + check_estimator(transformer) # IndexSelector @@ -109,6 +106,7 @@ def test_compliance_spectrum_shift(): # Act & Assert check_estimator(transformer) + # LinearCorrection def test_compliance_linear_correction(): # Arrange @@ -196,7 +194,7 @@ def test_compliance_point_scaler(): # Act & Assert check_estimator(transformer) - + # PolynomialCorrection def test_compliance_polynomial_correction(): # Arrange From 995a9642611bd9b1b2f516c276694197b5d8ec25 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Sat, 23 Dec 2023 18:36:15 +0100 Subject: [PATCH 011/118] style: [44] made docstrings of whittaker smoothers black-compatible; added references --- chemotools/baseline/_air_pls.py | 52 +++++++++++++++++----- chemotools/baseline/_ar_pls.py | 61 +++++++++++++++++--------- chemotools/smooth/_whittaker_smooth.py | 17 ++++++- 3 files changed, 98 insertions(+), 32 deletions(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 3e55814e..128c1a5a 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -1,3 +1,22 @@ +""" +This module contains the ``AirPLS`` transformer, which performs baseline correction on +data according to the Whittaker-Henderson formulation of Penalized Least Squares which +was modified by the introduction of weights that are updated iteratively to improve the +baseline identification. + +References +---------- +It's based on the algorithms described in [1]_ and [2]_ where an implementational +adaption of [2]_ was required to make it numerically stable ([3]_). + +.. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 (2010) +.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023) +.. [3] https://math.stackexchange.com/q/4819039/1261538 + +""" + + import logging import numpy as np @@ -10,27 +29,30 @@ logger = logging.getLogger(__name__) +# TODO: is polynomial_order actually differences and if so, is the description correct? class AirPls( OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver ): """ - This class implements the AirPLS (Adaptive Iteratively Reweighted Penalized Least Squares) algorithm for baseline - correction of spectra data. AirPLS is a common approach for removing the baseline from spectra, which can be useful - in various applications such as spectroscopy and chromatography. + This class implements the Adaptive Iteratively Reweighted Penalized Least Squares + a.k.a AirPLS algorithm for baseline correction of spectra data. AirPLS is a common + approach for removing the baseline from spectra, which can be useful in various + applications such as spectroscopy and chromatography. Parameters ---------- lam : float or int, optional default=1e2 - The lambda parameter controls the smoothness of the baseline. Increasing the value of lambda results in - a smoother baseline. + The lambda parameter that controls the smoothness of the baseline. Higher values + will result in a smoother baseline. polynomial_order : int, optional default=1 - The polynomial order determines the degree of the polynomial used to fit the baseline. A value of 1 corresponds + The degree of the polynomial used to fit the baseline. A value of 1 corresponds to a linear fit, while higher values correspond to higher-order polynomials. nr_iterations : int, optional default=15 - The number of iterations used to calculate the baseline. Increasing the number of iterations can improve the - accuracy of the baseline correction, but also increases the computation time. + The number of iterations used to calculate the baseline. Increasing the number + of iterations can improve the accuracy of the baseline correction at the cost of + computation time. rcond : float, default=1e-15 The relative condition number which is used to keep all matrices involved @@ -53,8 +75,16 @@ class AirPls( References ---------- - - Z.-M. Zhang, S. Chen, and Y.-Z. Liang, Baseline correction using adaptive iteratively reweighted penalized least - squares. Analyst 135 (5), 1138-1146 (2010). + It's based on the algorithms described in [1]_ and [2]_ where an implementational + adaption of [2]_ was required to make it numerically stable ([3]_). + + .. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 + (2010) + .. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 + (2023) + .. [3] https://math.stackexchange.com/q/4819039/1261538 + """ # TODO: polynomial order is actually differences @@ -85,6 +115,7 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": ------- self : AirPls Returns the instance itself. + """ # Check that X is a 2D array and has only finite values X = BaseEstimator._validate_data(self, X, reset=True) # type: ignore @@ -114,6 +145,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: ------- X_ : array-like of shape (n_samples, n_features) The transformed data with the baseline removed. + """ # Check that the estimator is fitted diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index c128c732..2616ef47 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -1,3 +1,23 @@ +""" +This module contains the ``ArPLS`` transformer, which performs baseline correction on +data according to the Whittaker-Henderson formulation of Penalized Least Squares which +was modified by the introduction of weights that are updated iteratively to improve the +baseline identification. It simultaneously estimates the baseline as well as the +baseline noise. + +References +---------- +It's based on the algorithms described in [1]_ and [2]_ where an implementational +adaption of [2]_ was required to make it numerically stable ([3]_). + +.. [1] S.-J. Baek, A. Park, Y.-J. Ahn, J. Choo, "Baseline correction using + asymmetrically reweighted penalized least squares smoothing", Analyst, 140, 250–257 + (2015) +.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023) +.. [3] https://math.stackexchange.com/q/4819039/1261538 + +""" + import logging from numbers import Integral @@ -13,17 +33,21 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver): """ - This class implements the Assymmetrically Reweighted Penalized Least Squares (ArPls) is a baseline - correction method for spectroscopy data. It uses an iterative process - to estimate and remove the baseline from the spectra. + This class implements the Asymmetrically Reweighted Penalized Least Squares a.k.a + ArPLS which is a baseline correction method for spectroscopy data. It uses an + iterative process that simultaneously estimates the baseline as well as the baseline + noise. Parameters ---------- - lam : float or int, optional (default=1e4) - The penalty parameter for the difference matrix in the objective function. + lam : float or int, default=1e4 + The lambda parameter that controls the smoothness of the baseline. Higher values + will result in a smoother baseline. - ratio : float, optional (default=0.01) - The convergence threshold for the weight updating scheme. + ratio : float, default=0.01 + The convergence threshold for the weight updating scheme. Lower values will + result in a more accurate baseline at the cost of computation time and even + convergence. nr_iterations : int, optional (default=100) The maximum number of iterations for the weight updating scheme. @@ -33,7 +57,6 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLike positive definite. This is not actively used at the moment. It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. - Methods ------- fit(X, y=None) @@ -42,17 +65,18 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLike transform(X, y=None) Transform the data by removing the baseline. - _calculate_diff(N) - Calculate the difference matrix for a given size. - _calculate_ar_pls(x) Calculate the baseline for a given spectrum. References ---------- - - Sung-June Baek, Aaron Park, Young-Jin Ahn, Jaebum Choo - Baseline correction using asymmetrically reweighted penalized - least squares smoothing + .. [1] S.-J. Baek, A. Park, Y.-J. Ahn, J. Choo, "Baseline correction using + asymmetrically reweighted penalized least squares smoothing", Analyst, 140, + 250–257 (2015) + .. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 + (2023) + .. [3] https://math.stackexchange.com/q/4819039/1261538 + """ def __init__( @@ -84,6 +108,7 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": ------- self : ArPls Returns the instance itself. + """ # the constructor parameters are checked @@ -92,7 +117,6 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": name="ratio", target_type=float, min_val=1e-15, - max_val=1.0 - 1e-15, ) check_scalar( x=self.nr_iterations, name="nr_iterations", target_type=Integral, min_val=1 @@ -126,6 +150,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: ------- X_ : array-like of shape (n_samples, n_features) The transformed data with the baseline removed. + """ # Check that the estimator is fitted @@ -158,11 +183,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: def _calculate_ar_pls(self, x): # FIXME: this initial weighting strategy might not yield the best results - if self.ratio < 0.5: - w = np.ones_like(x) - else: - w = np.zeros_like(x) - + w = np.ones_like(x) z = np.zeros_like(x) # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` for _ in range(self.nr_iterations): diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index ae806544..65de1c14 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -8,8 +8,8 @@ adaption of [2]_ was required to make it numerically stable ([3]_). .. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive - iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 (2010). -.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023). + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 (2010) +.. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 (2023) .. [3] https://math.stackexchange.com/q/4819039/1261538 """ @@ -63,6 +63,18 @@ class WhittakerSmooth( fit_transform(X, y=None, sample_weight=None) Fit the transformer to the input data and transform it. + References + ---------- + It's based on the algorithms described in [1]_ and [2]_ where an implementational + adaption of [2]_ was required to make it numerically stable ([3]_). + + .. [1] Z.-M. Zhang, S. Chen, and Y.-Z. Liang, "Baseline correction using adaptive + iteratively reweighted penalized least squares", Analyst 135 (5), 1138-1146 + (2010) + .. [2] G. Biessy, "Revisiting Whittaker-Henderson smoothing", arXiv:2306.06932 + (2023) + .. [3] https://math.stackexchange.com/q/4819039/1261538 + """ def __init__( @@ -185,6 +197,7 @@ def fit_transform( ------- X_smoothed : np.ndarray of shape (n_samples, n_features) The transformed data. + """ # noqa: E501 return self.fit(X=X).transform(X=X, sample_weight=sample_weight) From 0437cc7d82d0ca391db70fdefab062aa2a0828b0 Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Mon, 25 Dec 2023 16:31:50 +0100 Subject: [PATCH 012/118] wip: [44] figured out pentapy problems and saved working status for figuring out 100% fix --- chemotools/utils/banded_linalg.py | 3 +- chemotools/utils/models.py | 1 + chemotools/utils/whittaker_base.py | 171 +++++++++++++++++++++-------- tests/test_for_utils.py | 40 +++++-- tests/test_functionality.py | 57 ++++++++++ 5 files changed, 217 insertions(+), 55 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 8f8a2915..89d18d64 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -268,7 +268,8 @@ def lu_banded( warn( f"\nThe ({info-1}, {info-1})-th entry of the factor U is exactly 0, " f"which makes it singular.\n" - f"Linear systems cannot be solved with this factor." + f"Linear systems cannot be solved with this factor.", + RuntimeWarning, ) # else nothing diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index b7124d01..6bae3e8b 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -17,4 +17,5 @@ class BandedSolveDecompositions(str, Enum): CHOLESKY = "cholesky" LU = "lu" + POLYFIT = "polyfit" PENTAPY = "pentapy" diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 4df2b091..95e761e8 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -31,27 +31,28 @@ class WhittakerLikeSolver: It support weights and tries to use the most efficient method available. Besides, it also offers the possibility to fit the roughness penalty itself. - Parameters + Attributes ---------- - lam : int or float or None, default=1e2 + _lam : int or float or None, default=1e2 The lambda parameter to use for the Whittaker smooth. If ``None``, the transformer will fit the smoothness parameter itself by maximising the marginal likelihood, which can be computationally expensive, but more accurate than using (Generalized) Cross-Validation (see Notes). - differences : int, default=1 + _differences : int, default=1 The number of differences to use for the Whittaker smooth. If the aim is to obtain a smooth estimate of the `m`-th order derivative, this should be set to at least ``m + 2``. - polynomial_order : int, default=1 - Same as ``differences``, but for ``AirPls``. - - rcond : float, default=1e-15 + _rcond : float, default=1e-15 The relative condition number which is used to keep all matrices involved positive definite. This is only used if ``lam`` is ``None``. It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. + _allow_pentapy : bool, default=True + Whether to enable the Pentapy solver if available. This is only used for + debugging and testing purposes. + Notes ----- If ``lam`` is ``None``, the pentapy solver cannot be used even if available. @@ -72,17 +73,16 @@ class WhittakerLikeSolver: """ - __log_lam_bounds = ( + __log_lam_bounds: tuple[float, float] = ( -34.5, # 1e-15 115.13, # 1e50 ) + __allow_pentapy: bool = True def __init__( self, ) -> None: - self._lam: int | float | None = float("nan") - self._differences: int = -1 - self._rcond: float = float("nan") + pass def _setup_for_fit( self, @@ -96,9 +96,9 @@ def _setup_for_fit( """ # the input arguments are stored - self._lam = lam - self._differences = differences - self._rcond = rcond + self._lam: int | float | None = lam + self._differences: int = differences + self._rcond: float = rcond # the banded storage for a LAPACK LU decomposition is computed for the squared # forward finite difference matrix D^T @ D which is the penalty matrix P @@ -151,22 +151,48 @@ def _setup_for_fit( # finally, Pentapy is enabled if available, the number of differences is 2, # and the lambda parameter is not fitted automatically self._pentapy_enabled: bool = ( - _PENTAPY_AVAILABLE and self._differences == 2 and not self.auto_lam_ + _PENTAPY_AVAILABLE + and self._differences == 2 + and not self.auto_lam_ + and self.__allow_pentapy ) def _pentapy_solve(self, ab: np.ndarray, bw: np.ndarray) -> np.ndarray: """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` with the Pentapy package. This is written as the system ``A @ x = b`` where ``A = W + lam * D^T @ D`` and ``b = W @ b``. + + Notes + ----- + Pentapy does not (maybe yet) allow for 2D right-hand side matrices, so the + solution is computed for each column of ``bw`` separately. + """ - return pp.solve( - mat=ab, - rhs=bw, - is_flat=True, - index_row_wise=False, - solver=1, - ) + # for 1-dimensional right-hand side vectors, the solution is computed directly + if bw.ndim == 1: + return pp.solve( + mat=ab, + rhs=bw, + is_flat=True, + index_row_wise=False, + solver=1, + ) + + # for 2-dimensional right-hand side matrices, the solution is computed for each + # column separately + else: + solution = np.empty(shape=(bw.shape[1], bw.shape[0])) + for iter_j in range(0, bw.shape[1]): + solution[iter_j, ::] = pp.solve( + mat=ab, + rhs=bw[::, iter_j], + is_flat=True, + index_row_wise=False, + solver=1, + ) + + return solution.transpose() def _cholesky_solve( self, ab: np.ndarray, bw: np.ndarray @@ -198,13 +224,19 @@ def _lu_solve( """ - # the LU decomposition is computed - lub, ipiv = lu_banded( - l_and_u=self.l_and_u_, - ab=ab, - check_finite=False, - ) - decomposition = (lub, ipiv, self.l_and_u_) + # the LU decomposition is computed, but if the matrix cannot properly be + # decomposed and at least one diagonal element of U is zero, a LinAlgError is + # raised + try: + lub, ipiv = lu_banded( + l_and_u=self.l_and_u_, + ab=ab, + check_finite=False, + ) + decomposition = (lub, ipiv, self.l_and_u_) + + except RuntimeWarning: + raise np.linalg.LinAlgError() # the linear system is solved return ( @@ -258,6 +290,24 @@ def _solve( decomposition_type : BandedSolveDecompositions The type of decomposition used to solve the linear system of equations. + Notes + ----- + This methods has the following fallback strategy in case of failures (->): + + - with pentapy: Pentapy -> LU -> weighted polynomial fit (``np.polyfit``) + - without pentapy: Cholesky -> LU -> weightedd polynomial fit + (``np.polyfit``) + + Why ``np.polyfit``? If the LU-decomposition fails, the lambda parameter is so + large that the penalty matrix is numerically singular. But on the other hand + this also means that the ``differences``-th order derivative of the series + should be as small as possible and the data fidelity term has no influence on + the solution. Fortunately, the penalty can be reduced to zero by fitting the + data with a weighted polynomial of order ``differences - 1`` because its + ``differences``-th order derivative is zero. It is however still closer to the + data than smoother solutions, i.e., even lower order polynomials whose + derivatives would also be zero. + """ # the banded storage for a LAPACK LU decomposition is computed by updating the @@ -272,28 +322,54 @@ def _solve( else: ab[self._differences, ::] += 1.0 - # the linear system of equations is solved with the most efficient method - # Case 1: Pentapy can be used - if self._pentapy_enabled: - return ( - self._pentapy_solve(ab=ab, bw=bw), - None, - BandedSolveDecompositions.PENTAPY, - ) - - # Case 2: Pentapy cannot be used, but the matrix is NUMERICALLY positive - # definite + # the linear system of equations is solved with the most efficient method with + # LU decomposition as the fallback try: - x, decomposition = self._cholesky_solve( - ab=ab[0 : self._differences + 1], bw=bw - ) - return x, decomposition, BandedSolveDecompositions.CHOLESKY + # Case 1: Pentapy can be used + if self._pentapy_enabled: + x = self._pentapy_solve(ab=ab, bw=bw) + if np.all(np.isfinite(x)): + return ( + x, + None, + BandedSolveDecompositions.PENTAPY, + ) + + else: + raise np.linalg.LinAlgError() + + # Case 2: Pentapy cannot be used, but the matrix is NUMERICALLY positive + # definite + else: + x, decomposition = self._cholesky_solve( + ab=ab[0 : self._differences + 1], bw=bw + ) + return x, decomposition, BandedSolveDecompositions.CHOLESKY # Case 3: Pentapy cannot be used and the matrix is NOT NUMERICALLY positive - # definite + # definite, so the fallback is to use the LU decomposition except np.linalg.LinAlgError: - x, decomposition = self._lu_solve(ab=ab, bw=bw) - return x, decomposition, BandedSolveDecompositions.LU + try: + x, decomposition = self._lu_solve(ab=ab, bw=bw) + if np.all(np.isfinite(x)): + return x, decomposition, BandedSolveDecompositions.LU + + else: + raise np.linalg.LinAlgError() + + # Case 4: the LU decomposition also fails, so the fallback is to fit a + # polynomial + except np.linalg.LinAlgError: + idx_vect = np.arange( + start=0, + stop=self.series_size_, + step=1, + dtype=np.int64, + ) + poly = np.poly1d( + np.polyfit(x=idx_vect, y=bw, deg=self._differences - 1, w=w) + ) + return poly(idx_vect), None, BandedSolveDecompositions.POLYFIT # FIXME: this method is not yet used and needs to be tested def _calc_neg_marginal_likelihood( @@ -568,6 +644,7 @@ def _whittaker_solve( """ # a nested function is defined for updating the weights + # TODO: add zero-weight protection (eigenvalues are weights themselves) def update_to_next_weights(iter_i: int) -> None: nonlocal w_curr, w_logdet_curr, num_nonzero_w_curr if iter_i > 0: diff --git a/tests/test_for_utils.py b/tests/test_for_utils.py index 6065bc47..633ee263 100644 --- a/tests/test_for_utils.py +++ b/tests/test_for_utils.py @@ -16,8 +16,7 @@ calc_forward_diff_kernel, forward_finite_diff_conv_matrix, ) - -# from chemotools.utils.whittaker_base import WhittakerLikeSolver +from chemotools.utils.whittaker_base import WhittakerLikeSolver from tests.fixtures import reference_finite_differences # noqa: F401 @@ -43,7 +42,7 @@ def test_forward_diff_kernel( @pytest.mark.parametrize("accuracy", list(range(1, 21))) @pytest.mark.parametrize("difference", list(range(0, 21))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) def test_forward_finite_diff_conv_matrix( size: int, difference: int, accuracy: int ) -> None: @@ -85,7 +84,7 @@ def test_forward_finite_diff_conv_matrix( @pytest.mark.parametrize("with_finite_check", [True, False]) @pytest.mark.parametrize("difference", list(range(0, 11))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) def test_stepwise_lu_banded_solve( size: int, difference: int, with_finite_check: bool ) -> None: @@ -146,7 +145,7 @@ def test_stepwise_lu_banded_solve( @pytest.mark.parametrize("with_finite_check", [True, False]) @pytest.mark.parametrize("difference", list(range(0, 11))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) def test_lu_banded_slogdet(size: int, difference: int, with_finite_check: bool) -> None: """Tests the computation of the sign and log determinant of a banded matrix from its LU decomposition by comparing it to NumPy's ``slogdet``. @@ -203,7 +202,7 @@ def test_lu_banded_slogdet(size: int, difference: int, with_finite_check: bool) @pytest.mark.parametrize("with_finite_check", [True, False]) @pytest.mark.parametrize("difference", list(range(0, 11))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) def test_cho_banded_slogdet( size: int, difference: int, with_finite_check: bool ) -> None: @@ -253,7 +252,7 @@ def test_cho_banded_slogdet( @pytest.mark.parametrize("with_finite_check", [True, False]) # @pytest.mark.parametrize("difference", list(range(0, 11))) @pytest.mark.parametrize("difference", [0, 1, 2]) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1000, 5000]) +@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) def test_largest_smallest_eigval_of_spbanded( size: int, difference: int, with_finite_check: bool ) -> None: @@ -303,3 +302,30 @@ def test_largest_smallest_eigval_of_spbanded( f"Chemotools solution {min_eigval} vs." f"NumPy's solution {np_min_eigval}" ) + + +@pytest.mark.parametrize("with_pentapy", [True, False]) +@pytest.mark.parametrize("log10_lam", np.arange(-30.0, 110.0, step=10.0).tolist()) +@pytest.mark.parametrize("size", [3, 10, 50, 100, 500, 1_000, 5_000, 10_000]) +def test_whittaker_solve(size: int, log10_lam: float, with_pentapy: bool) -> None: + """Tests if the Whittaker smoothing still works for very large values of the + smoothing parameter. + """ + + # a Whittaker solver is instantiated ... + whittaker_solver = WhittakerLikeSolver() + # ... pentapy is enabled if requested ... + whittaker_solver._WhittakerLikeSolver__allow_pentapy = with_pentapy # type: ignore + whittaker_solver._setup_for_fit( + series_size=size, + lam=10.0**log10_lam, + differences=2, + ) + # ... and the linear system is solved + np.random.seed(seed=42) + z = whittaker_solver._whittaker_solve(X=np.random.rand(1, size))[0] + + assert np.all(np.isfinite(z)), ( + f"Whittaker solver for series of size {size} with smoothing parameter " + f"{10.0 ** log10_lam} failed." + ) diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 52fef572..398551e2 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -28,6 +28,7 @@ StandardNormalVariate, ) from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth +from chemotools.utils.models import BandedSolveDecompositions from tests.fixtures import reference_airpls # noqa: F401 from tests.fixtures import reference_arpls # noqa: F401 from tests.fixtures import reference_msc_mean # noqa: F401 @@ -860,3 +861,59 @@ def test_whittaker_smooth_multi_signals_multi_weights( assert np.allclose( spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 ) + + +def test_whittaker_with_pentapy_single_signal(): + # Arrange + np.random.seed(42) + spectrum = np.random.rand(1, 1000) + whittaker_smooth = WhittakerSmooth(differences=2) + + # Act with pentapy + spectrum_corr_pentapy = whittaker_smooth.fit_transform(spectrum) + + # Assert with pentapy + assert ( + whittaker_smooth._solve( + bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None + )[2] + == BandedSolveDecompositions.PENTAPY + ) + + # Act without pentapy + whittaker_smooth._WhittakerLikeSolver__allow_pentapy = False # type: ignore + spectrum_corr_scipy = whittaker_smooth.fit_transform(spectrum) + + # Assert without pentapy + assert whittaker_smooth._solve( + bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None + )[2] in {BandedSolveDecompositions.CHOLESKY, BandedSolveDecompositions.LU} + assert np.allclose(spectrum_corr_pentapy[0], spectrum_corr_scipy[0]) + + +def test_whittaker_with_pentapy_multi_signals(): + # Arrange + np.random.seed(42) + spectrum = np.random.rand(5, 1000) + whittaker_smooth = WhittakerSmooth(differences=2) + + # Act with pentapy + spectrum_corr_pentapy = whittaker_smooth.fit_transform(spectrum) + + # Assert with pentapy + assert ( + whittaker_smooth._solve( + bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None + )[2] + == BandedSolveDecompositions.PENTAPY + ) + + # Act without pentapy + whittaker_smooth._WhittakerLikeSolver__allow_pentapy = False # type: ignore + spectrum_corr_scipy = whittaker_smooth.fit_transform(spectrum) + + # Assert without pentapy + assert whittaker_smooth._solve( + bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None + )[2] in {BandedSolveDecompositions.CHOLESKY, BandedSolveDecompositions.LU} + assert np.allclose(spectrum_corr_pentapy, spectrum_corr_scipy) From f37ffe77979e0b737f6a9f7a8ac7bdf0b1a8958e Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Wed, 27 Dec 2023 13:34:02 +0100 Subject: [PATCH 013/118] refactor: [44] changed paradigm in Whittaker smoothing to work with mixed Tikhonov regularisation; shotgun refactor of all respective modules; extended and parametrized tests; added more FIXMEs and TODOs; adapted docstrings --- chemotools/baseline/_air_pls.py | 35 +- chemotools/baseline/_ar_pls.py | 34 +- chemotools/smooth/_whittaker_smooth.py | 30 +- chemotools/utils/banded_linalg.py | 445 +------------ chemotools/utils/check_inputs.py | 24 +- chemotools/utils/finite_differences.py | 223 ++++--- chemotools/utils/models.py | 2 - chemotools/utils/whittaker_base.py | 591 ++++++------------ .../reference_finite_differences.csv | 19 - tests/test_for_utils.py | 331 +++++----- tests/test_functionality.py | 225 ++++--- 11 files changed, 660 insertions(+), 1299 deletions(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 128c1a5a..70d8c077 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -54,11 +54,6 @@ class AirPls( of iterations can improve the accuracy of the baseline correction at the cost of computation time. - rcond : float, default=1e-15 - The relative condition number which is used to keep all matrices involved - positive definite. This is not actively used at the moment. - It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. - Methods ------- fit(X, y=None) @@ -93,12 +88,10 @@ def __init__( lam: int | float = 100, polynomial_order: int = 1, nr_iterations: int = 15, - rcond: float = 1e-15, ): self.lam: int | float = lam self.polynomial_order: int = polynomial_order self.nr_iterations: int = nr_iterations - self.rcond: float = rcond def fit(self, X: np.ndarray, y=None) -> "AirPls": """Fit the AirPls baseline correction estimator to the input data. @@ -106,7 +99,8 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": Parameters ---------- X : array-like of shape (n_samples, n_features) - The input data. + The input data. It is internally promoted to ``np.float64`` to avoid loss of + precision. y : array-like of shape (n_samples,), optional (default=None) The target values. @@ -118,14 +112,20 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": """ # Check that X is a 2D array and has only finite values - X = BaseEstimator._validate_data(self, X, reset=True) # type: ignore + X = BaseEstimator._validate_data( # type: ignore + self, + X, + reset=True, + ensure_2d=True, + force_all_finite=True, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) # the internal solver is set up self._setup_for_fit( series_size=X.shape[1], lam=self.lam, differences=self.polynomial_order, - rcond=self.rcond, ) return self @@ -152,7 +152,10 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values - X = check_input(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) X_ = X.copy() # Check that the number of features is the same as the fitted data @@ -179,18 +182,22 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: def _calculate_air_pls(self, x): # FIXME: this initial weighting strategy might not yield the best results w = np.ones_like(x) + # FIXME: this initialisation will will fail for many signals and produce a + # zero-baseline z = np.zeros_like(x) - dssn_thresh = 1e-3 * np.abs(x).sum() + dssn_thresh = max(1e-3 * np.abs(x).sum(), 1e-308) # to avoid 0 equalities # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` for i in range(0, self.nr_iterations - 1): # the baseline is fitted using the Whittaker smoother framework - z = self._whittaker_solve(X=x, w=w, use_same_w_for_all=True)[0] + z, _ = self._solve_single_x( + x=x, w=w, mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_ + ) d = x - z dssn = np.abs(d[d < 0].sum()) # the algorithm is stopped if the threshold is reached - if dssn < dssn_thresh: + if dssn <= dssn_thresh: break # the weights are updated diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 2616ef47..ba3d5932 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -52,11 +52,6 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLike nr_iterations : int, optional (default=100) The maximum number of iterations for the weight updating scheme. - rcond : float, default=1e-15 - The relative condition number which is used to keep all matrices involved - positive definite. This is not actively used at the moment. - It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. - Methods ------- fit(X, y=None) @@ -85,13 +80,11 @@ def __init__( differences: int = 2, ratio: float = 0.01, nr_iterations: int = 100, - rcond: float = 1e-15, ): self.lam: float | int = lam self.differences: int = differences self.ratio: float = ratio self.nr_iterations: int = nr_iterations - self.rcond: float = rcond def fit(self, X: np.ndarray, y=None) -> "ArPls": """Fit the estimator to the data. @@ -99,7 +92,8 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": Parameters ---------- X : array-like of shape (n_samples, n_features) - The input data. + The input data. It is internally promoted to ``np.float64`` to avoid loss of + precision. y : array-like of shape (n_samples,), optional (default=None) The target values. @@ -123,14 +117,20 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": ) # Check that X is a 2D array and has only finite values - X = BaseEstimator._validate_data(self, X, reset=True) # type: ignore + X = BaseEstimator._validate_data( # type: ignore + self, + X, + reset=True, + ensure_2d=True, + force_all_finite=True, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) # the internal solver is setup self._setup_for_fit( series_size=X.shape[1], lam=self.lam, differences=self.differences, - rcond=self.rcond, ) return self @@ -141,7 +141,8 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: Parameters ---------- X : array-like of shape (n_samples, n_features) - The input data. + The input data. It is internally promoted to ``np.float64`` to avoid loss of + precision. y : array-like of shape (n_samples,), optional (default=None) The target values. @@ -157,7 +158,10 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values - X = check_input(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) X_ = X.copy() # Check that the number of features is the same as the fitted data @@ -184,11 +188,15 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: def _calculate_ar_pls(self, x): # FIXME: this initial weighting strategy might not yield the best results w = np.ones_like(x) + # FIXME: this initialisation will will fail for many signals and produce a + # zero-baseline z = np.zeros_like(x) # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` for _ in range(self.nr_iterations): # the baseline is fitted using the Whittaker smoother framework - z = self._whittaker_solve(X=x, w=w, use_same_w_for_all=True)[0] + z, _ = self._solve_single_x( + x=x, w=w, mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_ + ) d = x - z # if there is no data point below the baseline, the baseline is considered diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 65de1c14..202d7ccb 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -36,13 +36,9 @@ class WhittakerSmooth( differences : int, default=1 The number of differences to use for the Whittaker smooth. If the aim is to - obtain a smooth estimate of the `m`-th order derivative, this should be set to + obtain a smooth estimate of the ``m``-th order derivative, this should be set to at least ``m + 2``. - rcond : float, default=1e-15 - The relative condition number which is used to keep all matrices involved - positive definite. This is not actively used at the moment. - It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. Attributes ---------- @@ -81,11 +77,9 @@ def __init__( self, lam: int | float = 1e2, differences: int = 1, - rcond: float = 1e-15, ): self.lam = lam self.differences = differences - self.rcond = rcond def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": """ @@ -94,7 +88,8 @@ def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": Parameters ---------- X : np.ndarray of shape (n_samples, n_features) - The input data to fit the transformer to. + The input data to fit the transformer to. It is internally promoted to + ``np.float64`` to avoid loss of precision. y : None Ignored. @@ -106,7 +101,10 @@ def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": """ # Check that X is a 2D array and has only finite values - X = check_input(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) # Set the number of features ... self.n_features_in_ = X.shape[1] @@ -115,7 +113,6 @@ def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": series_size=self.n_features_in_, lam=self.lam, differences=self.differences, - rcond=self.rcond, ) # Set the fitted attribute to True @@ -135,7 +132,8 @@ def transform( Parameters ---------- X : np.ndarray of shape (n_samples, n_features) - The input data to transform. + The input data to transform. It is internally promoted to ``np.float64`` to + avoid loss of precision. y : None Ignored. @@ -156,7 +154,10 @@ def transform( check_is_fitted(self, "_is_fitted") # Check that X is a 2D array and has only finite values - X = check_input(X) + X = check_input( + X, + dtype=WhittakerLikeSolver._WhittakerLikeSolver__dtype, # type: ignore + ) X_ = X.copy() # Check that the number of features is the same as the fitted data @@ -183,7 +184,8 @@ def fit_transform( Parameters ---------- X : np.ndarray of shape (n_samples, n_features) - The input data to fit and transform. + The input data to fit and transform. It is internally promoted to + ``np.float64`` to avoid loss of precision. y : None Ignored. @@ -191,6 +193,8 @@ def fit_transform( sample_weight : np.ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None Individual weights for each of the input data. If only 1 weight vector is provided, it is assumed to be the same for the features all samples. + No weights may be negative (< 0.0) and at least one weight needs to be + positive (> 0.0). If ``None``, all features are assumed to have the same weight. Returns diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 89d18d64..81ae3a75 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -1,11 +1,6 @@ from numbers import Integral -from typing import Optional, Union -from warnings import warn import numpy as np -from numpy.typing import ArrayLike -from scipy.linalg import eigvals_banded, lapack -from scipy.sparse import eye as speye from scipy.sparse import spmatrix from sklearn.utils import check_array, check_scalar @@ -57,7 +52,7 @@ def _check_full_arr_n_diag_counts_for_lu_banded( def conv_to_lu_banded_storage( - a: Union[np.ndarray, spmatrix], + a: np.ndarray | spmatrix, l_and_u: tuple[int, int], ) -> np.ndarray: """Converts a (sparse) square banded matrix A to its banded storage required for @@ -137,7 +132,10 @@ def conv_to_lu_banded_storage( # now, the diagonal extraction method is specified based and the banded storage is # filled by it diag_method = a.diagonal # type: ignore - ab = np.zeros(shape=(num_low_diags + 1 + num_upp_diags, num_cols)) + ab = np.zeros( + shape=(num_low_diags + 1 + num_upp_diags, num_cols), + dtype=a.dtype, # type: ignore + ) # the superdiagonals and the main diagonal for offset in range(num_upp_diags, -1, -1): @@ -150,298 +148,6 @@ def conv_to_lu_banded_storage( return ab -def lu_banded( - l_and_u: tuple[int, int], - ab: ArrayLike, - *, - overwrite_ab: bool = False, - check_finite: bool = True, -) -> tuple[np.ndarray, np.ndarray]: - """Computes the LU-decomposition of a banded matrix A using LAPACK-routines. - - This function is a wrapper of the LAPACK-routine ``gbtrf`` which computes the LU- - decomposition of a banded matrix `A` in-place. It wraps the routine in an analogous - way to SciPy's ``cholesky_banded``. - - Parameters - ---------- - l_and_u : tuple[int, int] - The number of "non-zero" sub- (first) and superdiagonals (second element) aside - the main diagonal which does not need to be considered here. "Non-zero" can be - a bit misleading in this context. These numbers should count up to the diagonal - after which all following diagonals are zero. Zero-diagonals that come before - still need to be included. - Neither of both may exceed `num_rows`. - Wrong specification of this can lead to non-zero-diagonals being ignored or - zero-diagonals being included which corrupts the results or reduces the - performance. - ab : np.ndarray of shape (l_and_u[0] + 1 + l_and_u[1], n) - A NumPy-2D-Array resembling the matrix `A` in banded storage format - (see Notes). - - overwrite_ab : bool, default=False - If ``True``, the contents of `ab` can be overwritten by the routine. Otherwise, - a copy of `ab` is created and overwritten. - - check_finite : bool, default=True - Whether to check that the input matrix contains only finite numbers. Disabling - may give a performance gain, but may result in problems (crashes, - non-termination) if the inputs do contain infinities or NaNs. - - Returns - ------- - lu : np.ndarray of shape (l_and_u[0] + 1 + 2 * l_and_u[1], n) - A NumPy-2D-Array resembling the LU-decomposition of `A` in banded storage - format (see Notes). - ipiv : np.ndarray of shape (n,) - A NumPy-1D-Array containing the pivoting indices. It's `i`-th entry resembles - gives the row that was used for pivoting the `i`-th row of `A`. - - Notes - ----- - For LAPACK LU-decomposition, the matrix `a` is stored in `ab` using the matrix - diagonal ordered form: - - ```python - ab[u + i - j, j] == a[i,j] # see below for u - ``` - - An example of `ab` (shape of a is ``(7,7)``, `u`=3 superdiagonals, `l`=2 - subdiagonals) looks like: - - ```python - * * * a03 a14 a25 a36 - * * a02 a13 a24 a35 a46 - * a01 a12 a23 a34 a45 a56 # ^ superdiagonals - a00 a11 a22 a33 a44 a55 a66 # main diagonal - a10 a21 a32 a43 a54 a65 * # v subdiagonals - a20 a31 a42 a53 a64 * * - ``` - - where all entries marked with ``*`` are ``0`` when returned by this function. - Internally LAPACK relies on an expanded version of this format to perform inplace - operations that adds another `l` superdiagonals to the matrix in order to overwrite - them for the purpose of pivoting. The output is thus an expanded version - of the LU-decomposition of `A` in the same format where the main diagonal of - `L` is implicitly taken to be a vector of ones. The output can directly be used - for the LAPACK-routine ``gbtrs`` to solve linear systems of equations based on this - decomposition. - - """ - # the (optional) finite check and Array-conversion are performed - if check_finite: - inter_ab = np.asarray_chkfinite(ab) - else: - inter_ab = np.asarray(ab) - - # then, the number of lower and upper subdiagonals needs to be checked for being - # consistent with the shape of ``ab`` - num_low_diags, num_upp_diags = l_and_u - if num_low_diags + num_upp_diags + 1 != inter_ab.shape[0]: - raise ValueError( - f"\nInvalid values for the number of lower and upper " - f"diagonals: l+u+1 ({num_low_diags + num_upp_diags + 1}) does not equal " - f"ab.shape[0] ({inter_ab.shape[0]})." - ) - # else nothing - - # now, the LAPACK-routines can be called - # to make ``ab`` compatible with the shape the LAPACK expects in this case, it - # needs to be re-written into a larger Array that has zeros elsewhere - # FIXME: for tridiagonal matrices, the SciPy wrapper for ``gttrf`` should be used - lapack_routine = "gbtrf" - (gbtrf,) = lapack.get_lapack_funcs((lapack_routine,), (inter_ab,)) - lpkc_ab = np.zeros( - shape=(2 * num_low_diags + num_upp_diags + 1, inter_ab.shape[1]), - dtype=gbtrf.dtype, - ) - lpkc_ab[num_low_diags::, ::] = inter_ab - lu, ipiv, info = gbtrf( - ab=lpkc_ab, kl=num_low_diags, ku=num_upp_diags, overwrite_ab=overwrite_ab - ) - - # then, the results needs to be validated and returned - # Case 1: the factorisation could be completed, which does not imply that the - # solution can be used for solving a linear system - if info >= 0: - if info > 0: - warn( - f"\nThe ({info-1}, {info-1})-th entry of the factor U is exactly 0, " - f"which makes it singular.\n" - f"Linear systems cannot be solved with this factor.", - RuntimeWarning, - ) - # else nothing - - return lu, ipiv - - # Case 2: the factorisation was not completed due to invalid input - else: - raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrf.") - - -def slogdet_lu_banded( - decomposition: tuple[tuple[int, int], np.ndarray, np.ndarray], -) -> tuple[float, float]: - """Computes the logarithm of the absolute value of the determinant of a banded - matrix A using its LU-decomposition. This is way more efficient than computing the - determinant directly because the LU-decompositions main diagonals already encode - the determinant as the product of the diagonal entries of the factors. - - Parameters - ---------- - (l_and_u, lub, ipiv) : tuple, (tuple[int, int], np.ndarray, np.ndarray) - `l_and_u` is a tuple of two integers specifying the number of sub- and - superdiagonals of the matrix `A` that are non-zero. - `lub` is a NumPy-2D-Array resembling the LU-decomposition of `A` in banded - storage format as returned by ``lu_banded``. - `ipiv` is a NumPy-1D-Array containing the pivoting indices as returned by - ``lu_banded``. - - Returns - ------- - sign : float - A number representing the sign of the determinant. - logabsdet : float - The natural log of the absolute value of the determinant. - - If the determinant is zero, then `sign` will be 0 and `logabsdet` will be - -Inf. In all cases, the determinant is equal to ``sign * np.exp(logabsdet)``. - - """ - # first, the number of lower and upper diagonals is extracted - l_and_u, lub, ipiv = decomposition - num_low_diags, num_upp_diags = l_and_u - num_rows = lub.shape[-1] - - # then, the number of actual row exchanges needs to be counted - unchanged_row_idxs = np.arange(start=0, stop=num_rows, step=1, dtype=ipiv.dtype) - num_row_exchanges = np.count_nonzero(ipiv - unchanged_row_idxs) - - # the sign-prefactor of the determinant is either +1 or -1 depending on whether the - # number of row exchanges is even or odd - if num_row_exchanges % 2 == 1: - sign = -1.0 - else: - sign = 1.0 - - # since the determinant (without sign prefactor) is just the product of the diagonal - # product of L and the diagonal product of U, the calculation simplifies. As the - # main diagonal of L is a vector of ones, only the diagonal product of U is required - main_diag_idx = num_low_diags + num_upp_diags - u_diaprod_sign = np.prod(np.sign(lub[main_diag_idx, ::])) - with np.errstate(divide="ignore", over="ignore"): - logabsdet = np.sum(np.log(np.abs(lub[main_diag_idx, ::]))) - - # logarithms of zero are already properly handled, so there is not reason to worry - # about, since they are -inf which will result in a zero determinant in exp() - # overflow however needs to lead to a raise and in this case the log(det) is either - # +inf in case of overflow only or NaN in case of the simultaneous occurrence of - # zero and overflow - if np.isnan(logabsdet) or np.isposinf(logabsdet): - raise ValueError( - "\nFloating point overflow in natural logarithm. At least 1 main diagonal " - "entry results in overflow, thereby corrupting the determinant." - ) - # else nothing - - # finally, the absolute value of the natural logarithm of the determinant is - # returned together with its sign - if np.isneginf(logabsdet): - sign = 0.0 - elif float(u_diaprod_sign) > 0.0: - pass - else: - sign *= -1.0 - - return sign, logabsdet - - -def lu_solve_banded( - decomposition: tuple[np.ndarray, np.ndarray, tuple[int, int]], - b: ArrayLike, - *, - overwrite_b: bool = False, - check_finite: bool = True, -) -> np.ndarray: - """Solves a linear system of equations ``Ax=b`` with a banded matrix `A` using its - precomputed LU-decomposition. - This function wraps the LAPACK-routine ``gbtrs`` in an analogous way to SciPy's - ``cho_solve_banded``. - - Parameters - ---------- - (l_and_u, lub, ipiv) : tuple, (np.ndarray, np.ndarray, tuple[int, int]) - `lub` is a NumPy-2D-Array resembling the LU-decomposition of `A` in banded - storage format as returned by ``lu_banded``. - `ipiv` is a NumPy-1D-Array containing the pivoting indices as returned by - ``lu_banded``. - `l_and_u` is a tuple of two integers specifying the number of sub- and - superdiagonals of the matrix `A` that are non-zero. - b : np.ndarray of shape (n,) - A 1D-Array containing the right-hand side of the linear system of equations. - overwrite_b : bool, default=False - If ``True``, the contents of `b` can be overwritten by the routine. Otherwise, - a copy of `b` is created and overwritten. - check_finite : bool, default=True - Whether to check that the input matrix contains only finite numbers. Disabling - may give a performance gain, but may result in problems (crashes, - non-termination) if the inputs do contain infinities or NaNs. - - Returns - ------- - x : np.ndarray of shape (n,) - The solution to the system A x = b - - """ - # the (optional) finite check and Array-conversion are performed - lub, ipiv, l_and_u = decomposition - if check_finite: - inter_lub = np.asarray_chkfinite(lub) - inter_ipiv = np.asarray_chkfinite(ipiv) - inter_b = np.asarray_chkfinite(b) - else: - inter_lub = np.asarray(lub) - inter_ipiv = np.asarray(ipiv) - inter_b = np.asarray(b) - - # then, the shapes of the LU-decomposition and ``b`` need to be validated against - # each other - if inter_lub.shape[-1] != inter_b.shape[0]: - raise ValueError( - f"\nShapes of lub ({inter_lub.shape[-1]}) and b ({inter_b.shape[0]}) are " - f"not compatible." - ) - # else nothing - - # now, the LAPACK-routine is called - num_low_diags, num_upp_diags = l_and_u - (gbtrs,) = lapack.get_lapack_funcs(("gbtrs",), (inter_lub, inter_b)) - x, info = gbtrs( - ab=inter_lub, - kl=num_low_diags, - ku=num_upp_diags, - b=inter_b, - ipiv=inter_ipiv, - overwrite_b=overwrite_b, - ) - - # then, the results needs to be validated and returned - # Case 1: the solution could be computed successfully - if info == 0: - return x - - # Case 2: the solution could not be computed due to invalid input - elif info < 0: - raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrs.") - - # Case 3: unexpected error - else: - raise AssertionError( - f"\nThe internal gbtrs returned info > 0 ({info}) which should not happen." - ) - - def slodget_cho_banded(decomposition: tuple[np.ndarray, bool]) -> tuple[float, float]: """Computes the logarithm of the absolute value of the determinant of a banded hermitian matrix `A` using its Cholesky-decomposition. This is way more efficient @@ -472,144 +178,3 @@ def slodget_cho_banded(decomposition: tuple[np.ndarray, bool]) -> tuple[float, f main_diag_idx = 0 if lower else -1 return 1.0, 2.0 * np.sum(np.log(decomposition[0][main_diag_idx, ::])) - - -def _find_largest_symm_sparse_banded_spd_eigval( - ab: np.ndarray, check_finite: bool = True -) -> float: - """Finds the largest eigenvalue of a symmetric sparse banded matrix `A` using - SciPy's ``sparse.linalg.eigsh``. - - Notes - ----- - This function is intended for matrices that are known to be at least positive - semi-definite from a mathematical point of view (all eigenvalues >= 0). However, due - to numerical inaccuracies, the smallest eigenvalue may be negative. Such a - restriction is not critical in this context since the largest eigenvalue is - typically positive. - From a performance perspective, this function relies on LAPACK's banded eigensolver - and it thus highly efficient already. - - """ - - return eigvals_banded( - a_band=ab, - lower=False, - select="i", - select_range=(ab.shape[1] - 1, ab.shape[1] - 1), - check_finite=check_finite, - )[0] - - -def _find_smallest_symm_sparse_banded_spd_eigval( - ab: np.ndarray, check_finite: bool = True -) -> float: - """Finds the smallest eigenvalue of a symmetric sparse banded matrix `A` using - SciPy's ``sparse.linalg.eigsh``. - - Notes - ----- - This function is intended for matrices that are known to be at least positive - semi-definite from a mathematical point of view (all eigenvalues >= 0). However, due - to numerical inaccuracies, the smallest eigenvalue may be negative. - From a performance perspective, this function relies on LAPACK's banded eigensolver - and it thus highly efficient already. - - """ - return eigvals_banded( - a_band=ab, - lower=False, - select="i", - select_range=(0, 0), - check_finite=check_finite, - )[0] - - -def conv_symm_sparse_banded_sposdef_to_posdef( - a: spmatrix, - *, - l_and_u: tuple[int, int], - rcond: Optional[float] = None, - check_finite: bool = True, -) -> spmatrix: - """Converts a symmetric sparse banded matrix `A` to a positive definite matrix - `B` by adding a small multiple of the identity matrix to `A` (see Notes). - - Parameters - ---------- - a : spmatrix of shape (n, n) - A square symmetric sparse banded matrix. - - l_and_u : tuple[int, int] - The number of "non-zero" sub- (first) and superdiagonals (second element) aside - the main diagonal which does not need to be considered here. "Non-zero" can be - a bit misleading in this context. These numbers should count up to the diagonal - after which all following diagonals are zero. Zero-diagonals that come before - still need to be included. - Wrong specification of this can lead to non-zero-diagonals being ignored or - zero-diagonals being included which corrupts the results or reduces the - performance. - Both its entries must coincide. - - rcond : float, default=None - The relative condition number of the positive definite matrix `B`. - If ``None``, the default value of ``scipy.linalg.pinvh`` is used which is - ``eps * n`` where ``eps`` is the machine precision of the datatype of `a`. - - Returns - ------- - b : spmatrix of shape (n, n) - A positive definite matrix which is identical to ``a`` except for main diagonal. - - Raises - ------ - ValueError - If `a` is not square or symmetric. - - Notes - ----- - This function performs no checks on `a`. - It is intended for matrices that are known to be at least positive semi-definite - from a mathematical point of view (all eigenvalues >= 0). However, due to numerical - inaccuracies, the smallest eigenvalue may be negative. - For making `A` positive definite, a small multiple of the identity matrix is added - to it as ``B = A + delta * I`` where `delta` is chosen to be the smallest positive - number such that the smallest eigenvalue of ``A + delta * I`` is numerically - positive when compared to the largest eigenvalue of ``A + delta * I``. In other - words, given the smallest and largest eigenvalue of `A` `lam_min` and `lam_max`, - respectively, `delta` is is chosen such that - ``(lam_min + delta) / (lam_max + delta) >= rcond`` because ``lam_min + delta`` and - ``lam_max + delta`` are the smallest and largest eigenvalue of the resulting `B`. - Internally ``1.1 * rcond`` is used as the smallest ratio to account for numerical - inaccuracies in the conducted and potential future computations of eigenvalues. - - """ - - # first, the rcond-parameter is determined - if rcond is None: - inter_rcond = np.finfo(a.dtype).eps * a.shape[0] # type: ignore - else: - inter_rcond = rcond - - # then, the smallest and largest eigenvalue are computed - ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u)[0 : l_and_u[1] + 1, ::] - max_eigenvalue = _find_largest_symm_sparse_banded_spd_eigval( - ab=ab, check_finite=check_finite - ) - min_eigenvalue = _find_smallest_symm_sparse_banded_spd_eigval( - ab=ab, check_finite=check_finite - ) - - # if the ratio is fine already, the matrix is returned - if (min_eigenvalue / max_eigenvalue) >= inter_rcond: - return a.copy() # type: ignore - # else nothing - - # otherwise, the smallest multiple of the identity matrix is computed that makes - # the ratio fine and the resulting matrix is returned - inter_rcond *= 1.1 - delta = (min_eigenvalue - inter_rcond * max_eigenvalue) / (inter_rcond - 1.0) - - return a + delta * speye( - m=a.shape[0], dtype=a.dtype, format=a.format # type: ignore - ) diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index dbc49ad4..ac146ef9 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -1,10 +1,12 @@ +from typing import Literal + import numpy as np from sklearn.utils.validation import check_array -def check_input(X, y=None): +def check_input(X, y=None, dtype: type | Literal["numeric"] | None = "numeric"): # Check that X is a 2D array and has only finite values - X = check_array(X, ensure_2d=True, force_all_finite=True) + X = check_array(X, ensure_2d=True, force_all_finite=True, dtype=dtype) # Check that y is None or a 1D array of the same length as X if y is not None: @@ -39,19 +41,27 @@ def check_weights( # afterwards, they are checked for having the right shape if weights_checked.shape[0] not in {1, n_samples}: raise ValueError( - f"\nWeights must have either 1 or {n_samples} rows, but they have " + f"Weights must have either 1 or {n_samples} rows, but they have " f"{weights_checked.shape[0]} rows." ) elif weights_checked.shape[1] != n_features: raise ValueError( - f"\nWeights must have {n_features} columns, but they have " + f"Weights must have {n_features} columns, but they have " f"{weights_checked.shape[1]} columns." ) # else nothing - # finally, it is checked whether the weights are all non-negative - if not np.all(weights_checked >= 0.0): - raise ValueError("\nWeights must be non-negative.") + # finally, it needs to be checked that the weights are all non-negative ... + if np.any(weights < 0.0): + raise ValueError( + f"Weights may not be negative, but {np.sum(weights < 0.0)} negative " + f"weights were found." + ) + # ... and also at least one of them is positive + elif np.sum(weights) <= 0.0: + raise ValueError( + "At least one weights needs to be > 0, but all weights were 0.0." + ) # else nothing # the weights are returned together with a flag whether to apply the same weights diff --git a/chemotools/utils/finite_differences.py b/chemotools/utils/finite_differences.py index bce324ba..6cd6240b 100644 --- a/chemotools/utils/finite_differences.py +++ b/chemotools/utils/finite_differences.py @@ -1,49 +1,22 @@ -from math import factorial +from math import comb from numbers import Integral import numpy as np -from scipy.sparse import dia_matrix +from scipy.sparse import csr_matrix, dia_matrix from scipy.sparse import diags as spdiags +from scipy.sparse import eye as speye from sklearn.utils import check_scalar -def _calc_arbitrary_fin_diff_kernel( - *, - grid_points: np.ndarray, - differences: int, -) -> np.ndarray: - """Computes the kernel for finite differences with arbitrary grid points.""" - # the number of grid points is counted - num_grid_points = grid_points.size - - # if the grid points cannot support the respective difference, an error is raised - if differences >= num_grid_points: - raise ValueError( - f"\n{num_grid_points} grid points cannot support a {differences}-th order " - f"difference." - ) - # else nothing - - # then, the system of linear equations to solve is set up as A@x = b where x is - # the kernel vector - lhs_mat_a = np.vander(x=grid_points, N=num_grid_points, increasing=True).T - rhs_vect_b = np.zeros(shape=(num_grid_points,), dtype=np.float64) - rhs_vect_b[differences] = factorial(differences) - - # the kernel is computed and returned - return np.linalg.solve(a=lhs_mat_a, b=rhs_vect_b) - - def calc_forward_diff_kernel( *, differences: int, - accuracy: int = 1, ) -> np.ndarray: """Computes the kernel for forward finite differences which can be applied to a series by means of a convolution, e.g., ```python - kernel = calc_forward_fin_diff_kernel(differences=2, accuracy=1) + kernel = calc_forward_fin_diff_kernel(differences=2) differences = np.convolve(series, np.flip(kernel), mode="valid") # NOTE: NumPy flips the kernel internally due to the definition of convolution ``` @@ -55,20 +28,17 @@ def calc_forward_diff_kernel( first order, 2 for the second order, ..., and ``m`` for the ``m``-th order differences. Values below 0 are not allowed. - accuracy : int, default=1 - The accuracy of the approximation which must be a positive integer starting - from 1. Returns ------- - fin_diff_kernel : np.ndarray of shape (differences + accuracy,) - A NumPy-1D-vector resembling the kernel from the code example above. + fin_diff_kernel : np.ndarray of shape (differences + 1,) + A NumPy-1D-vector resembling the kernel from the code example above. To avoid + loss of precision, the data type is ``np.int64``. Raises ------ ValueError - If the difference order is below 0, the accuracy is below 1, or the number of - grid points is not sufficient to support the respective difference order. + If the difference order is below 0. """ # the input is validated @@ -79,51 +49,34 @@ def calc_forward_diff_kernel( min_val=0, include_boundaries="left", ) - check_scalar( - accuracy, - name="accuracy", - target_type=Integral, - min_val=1, - include_boundaries="left", - ) - # afterwards, the number of grid points is evaluated, which is simply the sum of the - # difference order and the accuracy - num_grid_points = differences + accuracy - - # then, the system of linear equations is solved for the x in A@x = b since x is - # the kernel vector - grid_points = np.arange( - start=0, - stop=num_grid_points, - step=1, - dtype=np.float64, + # afterwards, the kernel is computed using the binomial coefficients + return np.array( + [ + ((-1) ** iter_i) * comb(differences, iter_i) + for iter_i in range(differences, -1, -1) + ], + dtype=np.int64, ) - fin_diff_kernel = _calc_arbitrary_fin_diff_kernel( - grid_points=grid_points, differences=differences - ) - - return fin_diff_kernel def forward_finite_diff_conv_matrix( *, differences: int, - accuracy: int = 1, series_size: int, ) -> dia_matrix: """Computes the convolution matrix for forward finite differences which can be applied to a series by means of a matrix multiplication, e.g., ```python - conv_mat = finite_diff_conv_matrix(differences=2, accuracy=1, series_size=10) + conv_mat = finite_diff_conv_matrix(differences=2, series_size=10) differences = conv_mat @ series # boundaries require special care ``` this is equivalent to ```python - kernel = calc_forward_fin_diff_kernel(differences=2, accuracy=1) + kernel = calc_forward_fin_diff_kernel(differences=2) differences = np.convolve(series, np.flip(kernel), mode="valid") # NOTE: NumPy flips the kernel internally due to the definition of convolution ``` @@ -135,31 +88,26 @@ def forward_finite_diff_conv_matrix( first order, 2 for the second order, ..., and ``m`` for the ``m``-th order differences. Values below 0 are not allowed. - accuracy : int, default=1 - The accuracy of the approximation which must be a positive integer starting - from 1. series_size : int The number of data points in the series to which the convolution matrix is applied. Returns ------- - conv_mat : dia_matrix of shape (series_size - differences, series_size) + diff_mat : dia_matrix of shape (series_size - differences, series_size) A sparse matrix resembling the convolution matrix from the code example above. + To avoid loss of precision, the data type is ``np.int64``. Raises ------ ValueError - If the difference order is below 0, the accuracy is below 1, or the number of - grid points is not sufficient to support the respective difference order. - ValueError - If ``series_size`` is not enough to support the respective ``differences`` and - ``accuracy``. + If the difference order is below 0, or ``series_size`` is not sufficient to + support the respective difference order. """ - # the input is validated (``differences`` and ``accuracy`` are validated in the - # function ``calc_forward_diff_kernel``) - kernel_size = differences + accuracy + + # the input is validated + kernel_size = differences + 1 try: check_scalar( series_size, @@ -172,11 +120,132 @@ def forward_finite_diff_conv_matrix( raise ValueError(f"Got n_features = {series_size}, must be >= {kernel_size}.") # afterwards, the kernel is computed ... - kernel = calc_forward_diff_kernel(differences=differences, accuracy=accuracy) + kernel = calc_forward_diff_kernel(differences=differences) # ... and the convolution matrix is created return spdiags( diagonals=kernel, offsets=np.arange(start=0, stop=kernel_size, step=1), # type: ignore shape=(series_size - kernel_size + 1, series_size), format="dia", + dtype=np.int64, ) + + +def calc_limit_max_eigval_fin_diff_mat(differences: int) -> int: + """Computes the maximum eigenvalue of the forward finite difference matrix as + computed by ``forward_finite_diff_conv_matrix`` for the given difference order. It + only uses the limit value as the series size tends to infinity, but from some + tests, this seems to be an upper limit for the maximum singular value for any + series size which makes it ideal for thresholding. + + Parameters + ---------- + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 0 are not allowed. + + Returns + ------- + max_eigval : int + The maximum eigenvalue of the forward finite difference matrix. + + """ + + # NOTE: this was found rather empirically, but it works + return 4**differences + + +def posdef_mod_squared_fw_fin_diff_conv_matrix( + *, + fw_fin_diff_mat: dia_matrix, + differences: int, + dia_mod_matrix: dia_matrix | None, + max_eigval_mult: float, + dtype: type, +) -> csr_matrix: + """Computes the modified squared forward finite difference matrix ``P`` for the + given difference order and series size. It is computed as + + ```python + # the pre-computation is obtained which might still be positive semi-definite + P = D.T @ M @ D + # the maximum eigenvalue of P is estimated to make it positive definite + max_lam_p = max_lam_dtd * max_lam_m + # by lifting the main diagonal, P is made numerical positive definite + P += max_lam_p * max_eigval_mult * I + ``` + + where ``D`` is the convolution matrix for forward finite differences, ``M`` is the + diagonal matrix of the modified weights, ``max_lam_dtd`` is the maximum eigenvalue + ``D.T @ D``, ``max_lam_m`` is the maximum eigenvalue of ``M``, i.e., the maximum + weight (since diagonal matrix), and ``max_lam_p`` is the maximum eigenvalue of + ``P``. For details on this approximation, please see Notes. + + Parameters + ---------- + fw_fin_diff_mat : dia_matrix + The convolution matrix for forward finite differences resembling ``D`` from the + description above. It can be computed by ``forward_finite_diff_conv_matrix``. + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 0 are not allowed. + dia_mod_matrix : dia_matrix or None + The sparse diagonal matrix of the modification weights resembling ``M`` from the + description above. If ``None``, this multiplication is skipped. + max_eigval_mult : float + The multiple of the maximum eigenvalue of the modified squared forward finite + differences matrix that is added to the main diagonal of the output matrix to + make it positive definite according to the description above. + dtype : type + The data type of the output matrix. + + Returns + ------- + posdef_squ_diff_mat : csr_matrix of shape (series_size - differences, series_size) + A positive definite sparse matrix resembling the squared forward finite + difference matrix ``P`` from the description above. It will be of data type + ``dtype``. + + Raises + ------ + ValueError + If the difference order is below 0, or the number of grid points is not + sufficient to support the respective difference order. + + Notes + ----- + The approximation of ``max_lam_p`` is based on the spectral norm of the + matrix product. Since the spectral norm ``||P||2`` is submultiplicative, the + estimate ``||D.T||2 * ||M||2 * ||D||2`` is an upper bound for ``||P||2``. + As ``||D||2 = ||D.T||2 = sqrt(max_lam_dtd)`` and + ``||M||2 = sqrt(max_lam_m**2) = max_lam_m = M.max()``, the maximum eigenvalue of + ``P`` is estimated as ``max_lam_p = max_lam_dtd * max_lam_m``. For just ensuring + numerical stability, this is perfectly fine and it also won't overestimate the + maximum eigenvalue of ``P`` too much and therefore, the perturbation of the main + diagonal is kept small. + + """ + + # first, the maximum eigenvalue of the finite difference matrix is computed + squ_diff_mat_eigval_max = calc_limit_max_eigval_fin_diff_mat( + differences=differences + ) + + # afterwards, the squared convolution matrix is computed + if dia_mod_matrix is None: + squ_diff_mat = fw_fin_diff_mat.T @ fw_fin_diff_mat + m_eigval_max = 1.0 + else: + squ_diff_mat = fw_fin_diff_mat.T @ dia_mod_matrix @ fw_fin_diff_mat + m_eigval_max = dia_mod_matrix.data.max() + + # the main diagonal is lifted by a multiple of the machine epsilon + lift_mat = speye(m=fw_fin_diff_mat.shape[1], dtype=dtype, format="csr") + lift_mat *= max_eigval_mult * squ_diff_mat_eigval_max * m_eigval_max + + # the positive definite matrix is returned + return squ_diff_mat + lift_mat diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 6bae3e8b..7a8dce2c 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -16,6 +16,4 @@ class BandedSolveDecompositions(str, Enum): CHOLESKY = "cholesky" - LU = "lu" - POLYFIT = "polyfit" PENTAPY = "pentapy" diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 95e761e8..f276358c 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -1,27 +1,21 @@ import numpy as np from scipy.linalg import cho_solve_banded, cholesky_banded -from scipy.optimize import minimize_scalar - -from chemotools.utils.banded_linalg import ( - conv_symm_sparse_banded_sposdef_to_posdef, - conv_to_lu_banded_storage, - lu_banded, - lu_solve_banded, - slodget_cho_banded, +from scipy.sparse import csr_matrix, dia_matrix + +from chemotools.utils.banded_linalg import conv_to_lu_banded_storage +from chemotools.utils.finite_differences import ( + forward_finite_diff_conv_matrix, + posdef_mod_squared_fw_fin_diff_conv_matrix, ) -from chemotools.utils.finite_differences import forward_finite_diff_conv_matrix from chemotools.utils.models import _PENTAPY_AVAILABLE, BandedSolveDecompositions if _PENTAPY_AVAILABLE: import pentapy as pp # else nothing -_LUDecomposition = tuple[np.ndarray, np.ndarray, tuple[int, int]] _CholeskyDecomposition = tuple[np.ndarray, bool] _PentapyDecomposition = None -_Decomposition = _PentapyDecomposition | _CholeskyDecomposition | _LUDecomposition - -LN_OF_TWO_PI = np.log(2.0 * np.pi) +_Decomposition = _PentapyDecomposition | _CholeskyDecomposition class WhittakerLikeSolver: @@ -29,54 +23,55 @@ class WhittakerLikeSolver: matrices as they occur in applications like the Whittaker-Henderson-smoothing or derived methods like Asymmetric Least Squares (ALS) baseline correction. It support weights and tries to use the most efficient method available. - Besides, it also offers the possibility to fit the roughness penalty itself. Attributes ---------- - _lam : int or float or None, default=1e2 + series_size_ : int + The size of the series to smooth. It is equivalent to `n_features_in_`, but it + was renamed to be allow for definition after the initialisation. + lam_ : int or float The lambda parameter to use for the Whittaker smooth. - If ``None``, the transformer will fit the smoothness parameter itself by - maximising the marginal likelihood, which can be computationally expensive, but - more accurate than using (Generalized) Cross-Validation (see Notes). - - _differences : int, default=1 + differences_ : int The number of differences to use for the Whittaker smooth. If the aim is to - obtain a smooth estimate of the `m`-th order derivative, this should be set to + obtain a smooth estimate of the ``m``-th order derivative, this should be set to at least ``m + 2``. - - _rcond : float, default=1e-15 - The relative condition number which is used to keep all matrices involved - positive definite. This is only used if ``lam`` is ``None``. - It works in the same way as the ``rcond`` parameter of SciPy's ``linalg.pinvh``. - - _allow_pentapy : bool, default=True + l_and_u_ : tuple[int, int] + The number of sub- (first) and superdiagonals (second element). Both will equal + ``differences_``. + fw_fin_diff_mat_ : dia_matrix + The finite difference matrix, which serves as a precursor for the penalty matrix + in its sparse representation as DIA-matrix. + base_squ_fw_fin_diff_mat_ : csr_matrix + The squared finite difference matrix, which serves as the penalty matrix in its + sparse representation as CSR-matrix. It is already made positive definite by + adding a multiple of the identity matrix to the main diagonal, but otherwise it + is its original form. It can be used directly for baseline correction algorithms + that do not require sophisticated modifications of the penalty matrix. + base_squ_fw_fin_diff_mat_lub_ : np.ndarray + The banded storage version of ``base_squ_fw_fin_diff_mat_`` for LAPACK LU + decomposition. It is stored this way because it is efficient for Pentapy while + being slightly inefficient for the Cholesky decomposition. Since the conversion + for the latter only required row access in a C-order array, this should not be + a major time sink though. + __dtype : type, default=np.float64 + The data type to which the series to be smoothed will be converted to. To avoid + numerical issues, all series are converted to double precision. + __machine_min_tol_mult : int, default=10 + The multiple of the machine epsilon that is used to make the penalty matrix + positive definite. It is only relevant if it exceeds ``series_size``. + Positive definiteness is assured by lifting the main diagonal by a small + multiple of the identity matrix whose scale depends on the machine precision + as well as the maximum eigenvalue of the squared forward finite difference + matrix. Please refer to the documentation of + ``posdef_mod_squared_fw_fin_diff_conv_matrix`` for more details. + __allow_pentapy : bool, default=True Whether to enable the Pentapy solver if available. This is only used for debugging and testing purposes. - Notes - ----- - If ``lam`` is ``None``, the pentapy solver cannot be used even if available. - Besides, the computational load increases since - especially for large series and - high differences - the pseudo-determinant of the penalty matrix ``P = D^T @ D`` - needs to be computed, which is computationally expensive and also subject to - numerical inaccuracies. The latter cause some eigenvalues to be numerically - negative, even though their true value is positive and of order ``<< 1e-16``, which - makes their accurate computation numerically impossible with double precision. - Therefore, the eigenvalues are lifted by adding a small value to the diagonal of - the penalty matrix before computing the pseudo-determinant to make the smallest - eigenvalue numerically positive when compared to the largest eigenvalue. From a - smoothing point of view, this turns the Whittaker-Smoothing with derivative penalty - into a blend of Whittaker Smoothing and Tikhonov Regularisation. - So, in contrast to ``P = D^T @ D``, the penalty matrix ``P = D^T @ D + c * I`` is - used where ``c`` is a very small numerical value, so in first approximation, the - combined smoother is still mostly a Whittaker smoother. - - """ - - __log_lam_bounds: tuple[float, float] = ( - -34.5, # 1e-15 - 115.13, # 1e50 - ) + """ # noqa: E501 + + __dtype: type = np.float64 + __machine_min_tol_mult: int = 10 __allow_pentapy: bool = True def __init__( @@ -87,74 +82,45 @@ def __init__( def _setup_for_fit( self, series_size: int, - lam: int | float | None, + lam: int | float, differences: int, - rcond: float = 1e-15, ) -> None: """Pre-computes everything that can be computed for the smoothing in general as well as for fitting the lambda parameter itself. """ # the input arguments are stored - self._lam: int | float | None = lam - self._differences: int = differences - self._rcond: float = rcond - - # the banded storage for a LAPACK LU decomposition is computed for the squared - # forward finite difference matrix D^T @ D which is the penalty matrix P - self.auto_lam_: bool = self._lam is None - self.l_and_u_: tuple[int, int] = (self._differences, self._differences) self.series_size_: int = series_size - self.squ_fw_fin_diff_mat_ = forward_finite_diff_conv_matrix( - differences=self._differences, - accuracy=1, - series_size=series_size, - ) - self.squ_fw_fin_diff_mat_ = ( - self.squ_fw_fin_diff_mat_.T @ self.squ_fw_fin_diff_mat_ + self.lam_: int | float = lam + self.differences_: int = differences + self.max_eigval_mult_: float = ( # type: ignore + np.finfo(self.__dtype).eps * max(self.__machine_min_tol_mult, series_size) ) - # if the lambda parameter is to be fitted automatically, the penalty matrix is - # converted to a positive definite matrix and its log-determinant is computed - if self.auto_lam_: - self.squ_fw_fin_diff_mat_ = conv_symm_sparse_banded_sposdef_to_posdef( - a=self.squ_fw_fin_diff_mat_, l_and_u=self.l_and_u_, rcond=self._rcond + # the forward finite difference matrix is computed ... + self.l_and_u_: tuple[int, int] = (self.differences_, self.differences_) + self.fw_fin_diff_mat_: dia_matrix = forward_finite_diff_conv_matrix( + differences=self.differences_, series_size=self.series_size_ + ) + # ... followed by the squared forward finite difference matrix + self.base_squ_fw_fin_diff_mat_: csr_matrix = ( + posdef_mod_squared_fw_fin_diff_conv_matrix( + fw_fin_diff_mat=self.fw_fin_diff_mat_, + differences=self.differences_, + dia_mod_matrix=None, + max_eigval_mult=self.max_eigval_mult_, + dtype=self.__dtype, ) - # else nothing - - # finally, the matrix is converted to a banded storage - self.fw_fin_diff_mat_lu_banded_: np.ndarray = conv_to_lu_banded_storage( - a=self.squ_fw_fin_diff_mat_, + ) + self.base_squ_fw_fin_diff_mat_lub_: np.ndarray = conv_to_lu_banded_storage( + a=self.base_squ_fw_fin_diff_mat_, l_and_u=self.l_and_u_, ) - # if the lambda parameter is to be fitted automatically, the log-determinant of - # the penalty matrix is computed, which reduces to summing up the logarithms of - # of the squared main diagonal elements of its banded Cholesky decomposition - if self._lam is None: - lower = False - penalty_chol = cholesky_banded( - ab=self.fw_fin_diff_mat_lu_banded_[0 : self._differences + 1, ::], - lower=lower, - check_finite=False, - ) - - self.penalty_log_det_: float - det_sign, self.penalty_log_det_ = slodget_cho_banded( - decomposition=(penalty_chol, lower) - ) - assert det_sign > 0.0, "The penalty matrix is still not positive definite." - - else: - self.penalty_log_det_: float = float("nan") - # finally, Pentapy is enabled if available, the number of differences is 2, # and the lambda parameter is not fitted automatically self._pentapy_enabled: bool = ( - _PENTAPY_AVAILABLE - and self._differences == 2 - and not self.auto_lam_ - and self.__allow_pentapy + _PENTAPY_AVAILABLE and self.differences_ == 2 and self.__allow_pentapy ) def _pentapy_solve(self, ab: np.ndarray, bw: np.ndarray) -> np.ndarray: @@ -207,7 +173,7 @@ def _cholesky_solve( """ - lower = False + lower = True cb = cholesky_banded(ab, lower=lower, check_finite=False) decomposition = (cb, lower) return ( @@ -215,44 +181,12 @@ def _cholesky_solve( decomposition, ) - def _lu_solve( - self, ab: np.ndarray, bw: np.ndarray - ) -> tuple[np.ndarray, tuple[np.ndarray, np.ndarray, tuple[int, int]]]: - """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` - with the LU decomposition. This is written as the system ``A @ x = b`` where - ``A = W + lam * D^T @ D`` and ``b = W @ b``. - - """ - - # the LU decomposition is computed, but if the matrix cannot properly be - # decomposed and at least one diagonal element of U is zero, a LinAlgError is - # raised - try: - lub, ipiv = lu_banded( - l_and_u=self.l_and_u_, - ab=ab, - check_finite=False, - ) - decomposition = (lub, ipiv, self.l_and_u_) - - except RuntimeWarning: - raise np.linalg.LinAlgError() - - # the linear system is solved - return ( - lu_solve_banded( - decomposition=decomposition, - b=bw, - check_finite=False, - ), - decomposition, - ) - def _solve( self, bw: np.ndarray, log_lam: float, w: np.ndarray | None, + mod_squ_fin_diff_mat_lub: np.ndarray, ) -> tuple[np.ndarray, _Decomposition, BandedSolveDecompositions]: """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` where ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and @@ -267,260 +201,73 @@ def _solve( The logarithm of the lambda parameter to use for the Whittaker-like smooth. w : np.ndarray of shape (n,) The weights to use for the linear system of equations. It must be a vector - even if ``wb`` is a matrix because having ``wb`` as a matrix is only + even if ``bw`` is a matrix because having ``bw`` as a matrix is only possible if lambda is fixed and the same weights are applied to all series. + mod_squ_fin_diff_mat_lub : np.ndarray of shape (n, n) + The positive definite (modified) squared forward finite difference matrix + stored in the banded storage for LAPACK LU decomposition. Returns ------- x : np.ndarray of shape (n,) The solution vector of the linear system of equations. - decomposition : tuple The decomposition used to solve the linear system of equations. - For the LU decomposition, this is a tuple ``(lub, ipiv, l_and_u)`` where - ``lub`` is the banded storage of the LU decomposition, ``ipiv`` is the pivot - vector, and ``l_and_u`` is the tuple ``(l, u)`` with the lower and upper - bandwidth of ``lub``. For the Cholesky decomposition, this is a tuple ``(cb, lower)`` where ``cb`` is the banded storage of the Cholesky decomposition and ``lower`` is a boolean flag indicating whether the lower or upper triangular matrix is stored. - For the Pentapy solver, this is ``None``. - + For the Pentapy solver this is ``None``. decomposition_type : BandedSolveDecompositions The type of decomposition used to solve the linear system of equations. - Notes - ----- - This methods has the following fallback strategy in case of failures (->): - - - with pentapy: Pentapy -> LU -> weighted polynomial fit (``np.polyfit``) - - without pentapy: Cholesky -> LU -> weightedd polynomial fit - (``np.polyfit``) - - Why ``np.polyfit``? If the LU-decomposition fails, the lambda parameter is so - large that the penalty matrix is numerically singular. But on the other hand - this also means that the ``differences``-th order derivative of the series - should be as small as possible and the data fidelity term has no influence on - the solution. Fortunately, the penalty can be reduced to zero by fitting the - data with a weighted polynomial of order ``differences - 1`` because its - ``differences``-th order derivative is zero. It is however still closer to the - data than smoother solutions, i.e., even lower order polynomials whose - derivatives would also be zero. - """ # the banded storage for a LAPACK LU decomposition is computed by updating the # diagonal of the squared forward finite difference matrix D^T @ D with the # weights - # NOTE: using the inverse of lambda is more efficient than using lambda directly - # since then it needs to be applied to the weights only rather than a - # possible large matrix - ab = np.exp(log_lam) * self.fw_fin_diff_mat_lu_banded_ + ab = np.exp(log_lam) * mod_squ_fin_diff_mat_lub if w is not None: - ab[self._differences, ::] += w + ab[self.differences_, ::] += w else: - ab[self._differences, ::] += 1.0 + ab[self.differences_, ::] += 1.0 # the linear system of equations is solved with the most efficient method with - # LU decomposition as the fallback - try: - # Case 1: Pentapy can be used - if self._pentapy_enabled: - x = self._pentapy_solve(ab=ab, bw=bw) - if np.all(np.isfinite(x)): - return ( - x, - None, - BandedSolveDecompositions.PENTAPY, - ) - - else: - raise np.linalg.LinAlgError() + # Cholesky decomposition as fallback + # Case 1: Pentapy can be used + if self._pentapy_enabled: + x = self._pentapy_solve(ab=ab, bw=bw) + if np.all(np.isfinite(x)): + return ( + x, + None, + BandedSolveDecompositions.PENTAPY, + ) - # Case 2: Pentapy cannot be used, but the matrix is NUMERICALLY positive - # definite else: + # if Pentapy fails, the Cholesky decomposition is used as fallback x, decomposition = self._cholesky_solve( - ab=ab[0 : self._differences + 1], bw=bw + ab=ab[self.differences_ : :, ::], bw=bw ) return x, decomposition, BandedSolveDecompositions.CHOLESKY - # Case 3: Pentapy cannot be used and the matrix is NOT NUMERICALLY positive - # definite, so the fallback is to use the LU decomposition - except np.linalg.LinAlgError: - try: - x, decomposition = self._lu_solve(ab=ab, bw=bw) - if np.all(np.isfinite(x)): - return x, decomposition, BandedSolveDecompositions.LU - - else: - raise np.linalg.LinAlgError() - - # Case 4: the LU decomposition also fails, so the fallback is to fit a - # polynomial - except np.linalg.LinAlgError: - idx_vect = np.arange( - start=0, - stop=self.series_size_, - step=1, - dtype=np.int64, - ) - poly = np.poly1d( - np.polyfit(x=idx_vect, y=bw, deg=self._differences - 1, w=w) - ) - return poly(idx_vect), None, BandedSolveDecompositions.POLYFIT - - # FIXME: this method is not yet used and needs to be tested - def _calc_neg_marginal_likelihood( - self, - x_orig: np.ndarray, - x_smooth: np.ndarray, - decomposition: _Decomposition, - solver: BandedSolveDecompositions, - log_lam: float, - w: np.ndarray | None, - w_logdet: float, - lml_sample_size_corr: float, - ) -> float: - """Computes the negative marginal likelihood of the Whittaker-like smooth.""" - - # running this method is only possible if the lambda parameter is fitted and - # the decomposition originates from a Cholesky decomposition - # TODO: maybe also allow LU decompositions - assert self.auto_lam_ and solver == BandedSolveDecompositions.CHOLESKY, ( - "The solution of the linear system could not be computed with a Cholesky " - "decomposition." - ) - - # now, the weighted sum of squared residuals (wRSS) is computed - if w is not None: - wrss = np.sum(w * np.square(x_orig - x_smooth)) + # Case 2: Pentapy cannot be used, but the matrix is NUMERICALLY positive + # definite else: - wrss = np.sum(np.square(x_orig - x_smooth)) - - # then, the sum of squared penalties (SSP) is computed using the positive - # definite penalty matrix - ssp = np.exp(log_lam) * (x_smooth @ self.squ_fw_fin_diff_mat_ @ x_smooth) - - # besides, the log-determinant of the matrix 1/lambda * W + P which is simple - # because its Cholesky decomposition is already available - ww_plus_p_det_sign, ww_plus_p_log_det = slodget_cho_banded( - decomposition=decomposition # type: ignore - ) - assert ( - ww_plus_p_det_sign > 0.0 - ), "The matrix to invert was not positive definite." - - # finally, the reduced marginal likelihood is computed - return 0.5 * ( - wrss - + ssp - - w_logdet - - (self.series_size_ * log_lam + self.penalty_log_det_) - + ww_plus_p_log_det - + lml_sample_size_corr - ) - - # FIXME: this method is not yet used and needs to be tested - def opt_target_auto_lam( - self, - log_lam: float, - x: np.ndarray, - x_weighted: np.ndarray, - w: np.ndarray | None, - w_logdet: float, - lml_sample_size_corr: float, - ) -> float: - """Computes the target function to be minimised when fitting the lambda - parameter itself. - """ - - # first, the solution of the linear system of equations is computed - x_smooth, decomposition, solver = self._solve( - bw=x_weighted, log_lam=log_lam, w=w - ) - - # then, the reduced marginal likelihood is determined and returned - return self._calc_neg_marginal_likelihood( - x_orig=x, - x_smooth=x_smooth, - decomposition=decomposition, - solver=solver, - log_lam=log_lam, - w=w, - w_logdet=w_logdet, - lml_sample_size_corr=lml_sample_size_corr, - ) - - # FIXME: this method is not yet used and needs to be tested - def _solve_single_x_auto_lam( - self, - x: np.ndarray, - x_weighted: np.ndarray, - w: np.ndarray | None, - w_logdet: float, - num_nonzero_w: int, - ) -> tuple[np.ndarray, float]: - """Fits the lambda parameter itself by maximising the reduced marginal - likelihood. "Reduced" refers to the fact that only the terms that depend on the - smoothing parameter `lam` are considered. - - For the parameters, please refer to the documentation of ``solve``. Instead of - a 2D-Array, a 1D-Array is expected for ``x`` and ``w``. Besides, it expects - the product ``x * w`` to be passed as ``x_weighted`` since this is more - efficient than computing it inside the solver. - - Notes - ----- - The logarithm of the marginal likelihood is defined as - ``-0.5 * (wRSS + SSP - log(pdet(W)) - log(pdet(P)) + log(det(W + P)) + - (n_obs - diff) * log(2 * pi))`` where - - - `W` as the diagonal matrix of weights - - `P` is the penalty matrix - - `wRSS` is the weighted Sum of Squares Residuals between the original and the - smoothed series `x` and `x_smoothed` - (``(x - x_smoothed).T @ W @ (x - x_smoothed)``) - - `SSP` is the sum of squared penalties (``x_smoothed.T @ P @ x_smoothed``) - - `pdet` is the pseudo-determinant of a matrix (product of its non-zero - eigenvalues) - - `det` is the determinant of a matrix (product of its eigenvalues) - - `n_obs` is the number of observations with non-zero weights - -""" - - # the sample size correction summand for the marginal likelihood is computed - lml_sample_size_corr = (num_nonzero_w - self._differences) * LN_OF_TWO_PI - # the target function is minimised using the bounded Brent method - opt_res = minimize_scalar( - fun=self.opt_target_auto_lam, - bounds=self.__log_lam_bounds, - method="bounded", - args=( - x, - x_weighted, - w, - w_logdet, - lml_sample_size_corr, - ), - ) - assert opt_res.success, "The optimisation did not converge." - - # the solution of the linear system of equations is computed - x_smooth, _, _ = self._solve(bw=x_weighted, log_lam=opt_res.x, w=w) - - # finally, the solution and the lambda parameter are returned - return x_smooth, np.exp(opt_res.x) + x, decomposition = self._cholesky_solve( + ab=ab[self.differences_ : :, ::], bw=bw + ) + return x, decomposition, BandedSolveDecompositions.CHOLESKY def _solve_single_x_fixed_lam( self, - x: np.ndarray, x_weighted: np.ndarray, w: np.ndarray | None, + mod_squ_fin_diff_mat_lub: np.ndarray, ) -> tuple[np.ndarray, float]: """Fits the Whittaker-like smooth with a fixed lambda parameter. - For the parameters, please refer to the documentation of ``solve``. Instead of + For the parameters, please refer to the documentation of ``_solve``. Instead of a 2D-Array, a 1D-Array is expected for ``x`` and ``w``. Besides, it expects the product ``x * w`` to be passed as ``x_weighted`` since this is more efficient than computing it inside the solver. @@ -530,79 +277,100 @@ def _solve_single_x_fixed_lam( # the solution of the linear system of equations is computed x_smooth, _, _ = self._solve( bw=x_weighted, - log_lam=np.log(self._lam), # type: ignore + log_lam=np.log(self.lam_), # type: ignore w=w, + mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub, ) # finally, the solution is returned together with the lambda parameter - return x_smooth, self._lam # type: ignore + return x_smooth, self.lam_ # type: ignore def _solve_single_x( self, x: np.ndarray, w: np.ndarray | None, - w_logdet: float, - num_nonzero_w: int, + mod_squ_fin_diff_mat_lub: np.ndarray, ) -> tuple[np.ndarray, float]: """Fits the Whittaker-like smooth to a single series for a fixed or fitted lambda parameter. For the parameters, please refer to the documentation of ``solve``. Instead of - a 2D-Array, a 1D-Array is expected for ``x`` and ``w``. + 2D-Arrays, 1D-Arrays are expected for ``x`` and ``w``. """ - # first, the weighted series is computed + # first, the weights need to be ensured to be invertible by using the relative + # condition number and then the weighted series is computed + # NOTE: this numerical trick ensures that the smoothing also works in the + # limiting case that ``lam`` is vanishing. Since the diagonal matrix W has + # eigenvalues that correspond to the main diagonal entries, this problem + # is readily solved by bounding the minimum weight to ``rcond * w.max()`` + # which works since a maximum weight of zero has already been excluded if w is not None: - x_weighted = w * x + w_lifted = np.maximum(w, self.max_eigval_mult_ * w.max()) + x_wavg = np.average(x, weights=w_lifted) + x_weighted = w_lifted * (x - x_wavg) else: - x_weighted = x + w_lifted = None + x_wavg = np.average(x) + x_weighted = x - x_wavg # then, the solution of the linear system of equations is computed - if self.auto_lam_: - return self._solve_single_x_auto_lam( - x=x, - x_weighted=x_weighted, - w=w, - w_logdet=w_logdet, - num_nonzero_w=num_nonzero_w, - ) - else: - return self._solve_single_x_fixed_lam(x=x, x_weighted=x_weighted, w=w) + # NOTE: this is a placeholder where an if-else-statement needs to be inserted + # for then the lambda parameter needs to be evaluated automatically + x_smooth, lam = self._solve_single_x_fixed_lam( + x_weighted=x_weighted, + w=w_lifted, + mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub, + ) + return x_smooth + x_wavg, lam def _solve_multiple_x( self, X: np.ndarray, w: np.ndarray | None, + mod_squ_fin_diff_mat_lub: np.ndarray, ) -> tuple[np.ndarray, np.ndarray]: """Fits the Whittaker-like smooth to multiple series when the lambda parameter is fixed and the same weights are applied to all series. - It leverages the ability of Pentapy and LAPACK to solve multiple linear systems - of equations at once from the same inversion. + It leverages the ability of LAPACK (not pentapy) to solve multiple linear + systems of equations at once from the same inversion. - For the parameters, please refer to the documentation of ``solve``. + For the parameters, please refer to the documentation of ``_solve``. """ # in this special case, the solution of the linear system of equations can be # computed with a single matrix inversion + # first, the weights need to be ensured to be invertible by using the relative + # condition number and then the weighted series is computed + # NOTE: this numerical trick ensures that the smoothing also works in the + # limiting case that ``lam`` is vanishing. Since the diagonal matrix W has + # eigenvalues that correspond to the main diagonal entries, this problem + # is readily solved by bounding the minimum weight to ``rcond * w.max()`` + # which works since a maximum weight of zero has already been excluded if w is not None: - x_weighted = np.transpose(w * X) - w_inter = w.ravel() + w_lifted = np.maximum(w, self.max_eigval_mult_ * w.max()).ravel() + x_wavg = np.average(X, weights=w_lifted, axis=1) + x_weighted = np.transpose( + w_lifted[np.newaxis, ::] * (X - x_wavg[::, np.newaxis]) + ) else: - x_weighted = np.transpose(X) - w_inter = w + x_wavg = np.average(X, axis=1) + x_weighted = np.transpose(X) - x_wavg[np.newaxis, ::] + w_lifted = None # then, the solution of the linear system of equations is computed X_smooth, _, _ = self._solve( bw=x_weighted, - log_lam=np.log(self._lam), # type: ignore - w=w_inter, + log_lam=np.log(self.lam_), # type: ignore + w=w_lifted, + mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub, ) return ( - np.transpose(X_smooth), - np.full(shape=(X.shape[0],), fill_value=self._lam), # type: ignore + np.transpose(X_smooth + x_wavg[np.newaxis, ::]), + np.full(shape=(X.shape[0],), fill_value=self.lam_), # type: ignore ) def _whittaker_solve( @@ -612,9 +380,10 @@ def _whittaker_solve( w: np.ndarray | None = None, use_same_w_for_all: bool = False, ) -> tuple[np.ndarray, np.ndarray]: - """Solves the linear equations for Whittaker-Henderson smoothing. Internally it - chooses the most appropriate method and solver depending on the data - dimensionality, the weights, and the system's available packages (pentapy). + """Solves the linear equations for Whittaker-Henderson smoothing for Arrays that + are stored in 2D format, i.e., each series is stored as a row. + Internally it chooses the most appropriate method and solver depending on the + data dimensionality, the weights, and the system's available packages (pentapy). Parameters ---------- @@ -626,7 +395,7 @@ def _whittaker_solve( in `X`, which enhances the smoothing a lot for fixed smoothing parameters `lam`. If ``None``, no weights are applied and each datapoint is assumed to have - equal importance, This allows for ``use_same_w_for_all`` to be ``True`` + equal importance. This allows for ``use_same_w_for_all`` to be ``True`` as well. use_same_w_for_all Whether to use the same weights for all series in `X`. This is only possible @@ -636,7 +405,6 @@ def _whittaker_solve( ------- X_smooth : np.ndarray of shape(n, m) The smoothed series stored as individual rows. - lam : np.ndarray of shape(n,) The lambda parameter used for the smoothing of each series. If `lam` was fixed, this is a vector of length `n` with the same value for each series. @@ -646,42 +414,46 @@ def _whittaker_solve( # a nested function is defined for updating the weights # TODO: add zero-weight protection (eigenvalues are weights themselves) def update_to_next_weights(iter_i: int) -> None: - nonlocal w_curr, w_logdet_curr, num_nonzero_w_curr + nonlocal w_curr if iter_i > 0: if w is None: w_curr = None - w_logdet_curr = 0.0 # for identity matrix - num_nonzero_w_curr = self.series_size_ return elif not use_same_w_for_all: - w_curr = w[iter_i, ::] - non_zero_idxs = ( - w_curr - >= np.finfo(w_curr.dtype).eps * w_curr.max() * w_curr.size - ) - w_logdet_curr = np.sum(np.log(w_curr[non_zero_idxs])) - num_nonzero_w_curr = np.sum(non_zero_idxs) + w_curr = w[iter_i, ::].copy() else: return else: if w is None: w_curr = None - w_logdet_curr = 1.0 - num_nonzero_w_curr = self.series_size_ else: - w_curr = w[iter_i, ::] - non_zero_idxs = ( - w_curr - >= np.finfo(w_curr.dtype).eps * w_curr.max() * w_curr.size - ) - w_logdet_curr = np.sum(np.log(w_curr[non_zero_idxs])) + w_curr = w[iter_i, ::].copy() + + assert ( + X.dtype == self.__dtype + ), f"Internal error: Promotion to {self.__dtype} failed." + + # the modified squared forward finite difference matrix is computed + mod_squ_fin_diff_mat_lub = posdef_mod_squared_fw_fin_diff_conv_matrix( + fw_fin_diff_mat=self.fw_fin_diff_mat_, + differences=self.differences_, + dia_mod_matrix=None, + max_eigval_mult=self.max_eigval_mult_, + dtype=self.__dtype, + ) + mod_squ_fin_diff_mat_lub = conv_to_lu_banded_storage( + a=mod_squ_fin_diff_mat_lub, + l_and_u=self.l_and_u_, + ) # if multiple x with the same weights are to be solved for fixed lambda, this # can be done more efficiently by leveraging Pentapy's and LAPACK'S ability to # perform multiple solves from the same inversion at once - if not self.auto_lam_ and use_same_w_for_all: - return self._solve_multiple_x(X=X, w=w) + if use_same_w_for_all: + return self._solve_multiple_x( + X=X, w=w, mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub + ) # else nothing # otherwise, the solution of the linear system of equations is computed for @@ -689,15 +461,10 @@ def update_to_next_weights(iter_i: int) -> None: X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) w_curr = None - w_logdet_curr = float("nan") - num_nonzero_w_curr = -1 for iter_i, x in enumerate(X): update_to_next_weights(iter_i=iter_i) X_smooth[iter_i], lam[iter_i] = self._solve_single_x( - x=x, - w=w_curr, - w_logdet=w_logdet_curr, - num_nonzero_w=num_nonzero_w_curr, + x=x, w=w_curr, mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub ) return X_smooth, lam diff --git a/tests/resources/reference_finite_differences.csv b/tests/resources/reference_finite_differences.csv index 497a3c31..6dfea2af 100644 --- a/tests/resources/reference_finite_differences.csv +++ b/tests/resources/reference_finite_differences.csv @@ -1,25 +1,6 @@ From https://en.wikipedia.org/wiki/Finite_difference_coefficient#Forward_finite_difference,,,,,,,,,, Difference Order,Accuracy,0,1,2,3,4,5,6,7,8 1,1,-1,1,,,,,,, -1,2,-1.5,2,-0.5,,,,,, -1,3,-1.833333333,3,-1.5,0.333333333,,,,, -1,4,-2.083333333,4,-3,1.333333333,-0.25,,,, -1,5,-2.283333333,5,-5,3.333333333,-1.25,0.2,,, -1,6,-2.45,6,-7.5,6.666666667,-3.75,1.2,-0.166666667,, 2,1,1,-2,1,,,,,, -2,2,2,-5,4,-1,,,,, -2,3,2.916666667,-8.666666667,9.5,-4.666666667,0.916666667,,,, -2,4,3.75,-12.83333333,17.83333333,-13,5.083333333,-0.833333333,,, -2,5,4.511111111,-17.4,29.25,-28.22222222,16.5,-5.4,0.761111111,, -2,6,5.211111111,-22.3,43.95,-52.72222222,41,-20.1,5.661111111,-0.7, 3,1,-1,3,-3,1,,,,, -3,2,-2.5,9,-12,7,-1.5,,,, -3,3,-4.25,17.75,-29.5,24.5,-10.25,1.75,,, -3,4,-6.125,29,-57.625,62,-38.375,13,-1.875,, -3,5,-8.058333333,42.53333333,-98.225,129.6666667,-106.0416667,53.6,-15.40833333,1.933333333, -3,6,-10.0125,58.16666667,-152.9416667,239.1,-242.8333333,163.0333333,-70.125,17.56666667,-1.954166667 4,1,1,-4,6,-4,1,,,, -4,2,3,-14,26,-24,11,-2,,, -4,3,5.833333333,-31,68.5,-80.66666667,53.5,-19,2.833333333,, -4,4,9.333333333,-55.5,142,-203.1666667,176,-92.5,27.33333333,-3.5, -4,5,13.3625,-87.73333333,254.8166667,-428.8,458.0416667,-318.1333333,140.15,-35.73333333,4.029166667 diff --git a/tests/test_for_utils.py b/tests/test_for_utils.py index 633ee263..36565421 100644 --- a/tests/test_for_utils.py +++ b/tests/test_for_utils.py @@ -1,20 +1,16 @@ +from typing import Literal + import numpy as np import pytest -from scipy.linalg import cholesky_banded +from scipy.linalg import cholesky_banded, eigvals_banded, solveh_banded from scipy.sparse import eye as speye -from chemotools.utils.banded_linalg import ( - _find_largest_symm_sparse_banded_spd_eigval, - _find_smallest_symm_sparse_banded_spd_eigval, - conv_to_lu_banded_storage, - lu_banded, - lu_solve_banded, - slodget_cho_banded, - slogdet_lu_banded, -) +from chemotools.utils.banded_linalg import conv_to_lu_banded_storage, slodget_cho_banded from chemotools.utils.finite_differences import ( calc_forward_diff_kernel, + calc_limit_max_eigval_fin_diff_mat, forward_finite_diff_conv_matrix, + posdef_mod_squared_fw_fin_diff_conv_matrix, ) from chemotools.utils.whittaker_base import WhittakerLikeSolver from tests.fixtures import reference_finite_differences # noqa: F401 @@ -24,34 +20,30 @@ def test_forward_diff_kernel( reference_finite_differences: list[tuple[int, int, np.ndarray]] # noqa: F811 ) -> None: # Arrange - for differences, accuracy, reference in reference_finite_differences: + for differences, _, reference in reference_finite_differences: # Act - kernel = calc_forward_diff_kernel(differences=differences, accuracy=accuracy) + kernel = calc_forward_diff_kernel(differences=differences) # Assert assert kernel.size == reference.size, ( - f"Difference order {differences} with accuracy {accuracy} " - f"expected kernel size {reference.size} but got {kernel.size}" + f"Difference order {differences} with accuracy 1 expected kernel size " + f"{reference.size} but got {kernel.size}" ) assert np.allclose(kernel, reference, atol=1e-8), ( - f"Difference order {differences} with accuracy {accuracy} " - f"expected kernel\n{reference.tolist()}\n" - f"but got\n{kernel.tolist()}" + f"Difference order {differences} with accuracy 1 expected kernel " + f"{reference.tolist()} but got {kernel.tolist()}" ) -@pytest.mark.parametrize("accuracy", list(range(1, 21))) @pytest.mark.parametrize("difference", list(range(0, 21))) @pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) -def test_forward_finite_diff_conv_matrix( - size: int, difference: int, accuracy: int -) -> None: +def test_forward_finite_diff_conv_matrix(size: int, difference: int) -> None: """Tests the generated convolution matrix for forward finite differences by comparing it to NumPy's ``convolve``. """ # the test is skipped if the kernel is too large - if difference + accuracy > size: + if difference + 1 > size: pytest.skip( f"Test skipped because the kernel size {difference + 1} is larger than the " f"series size {size}." @@ -63,92 +55,70 @@ def test_forward_finite_diff_conv_matrix( series = np.random.rand(size) # the kernel is computed ... - kernel = calc_forward_diff_kernel(differences=difference, accuracy=accuracy) + kernel = calc_forward_diff_kernel(differences=difference) # ... and the random series is convolved with the kernel ... # NOTE: the kernel is flipped because of the way NumPy's convolve works numpy_convolved_series = np.convolve(series, np.flip(kernel), mode="valid") # the convolution matrix is computed ... conv_matrix = forward_finite_diff_conv_matrix( - differences=difference, accuracy=accuracy, series_size=series.size + differences=difference, series_size=series.size ) # ... and the series is convolved with the convolution matrix matrix_convolved_series = conv_matrix @ series # the actual test is performed + assert conv_matrix.dtype == np.int64, ( + f"Convolution matrix for difference order {difference} with accuracy 1 for " + f"series of size {size} expected data type np.int64 but got " + f"{conv_matrix.dtype}." + ) + assert np.allclose(matrix_convolved_series, numpy_convolved_series), ( - f"Differences by matrix product for Difference order {difference} with " - f"accuracy {accuracy} for series of size {size} failed." + f"Differences by matrix product for difference order {difference} with " + f"accuracy 1 for series of size {size} failed." ) -@pytest.mark.parametrize("with_finite_check", [True, False]) -@pytest.mark.parametrize("difference", list(range(0, 11))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) -def test_stepwise_lu_banded_solve( - size: int, difference: int, with_finite_check: bool -) -> None: - """Tests the LU decomposition of a banded matrix by comparing the solution of the - linear systems involved in Whittaker smoothing with the solution obtained by NumPy's - ``solve``. - It gets ill-condition for ``differences`` >> 10, but this is not the intended use - case. +@pytest.mark.parametrize("difference", list(range(0, 21))) +def test_limit_eigval_squ_fin_diff_mat(difference: int) -> None: + """Tests the computation of the limit of the maximum eigenvalue of the squared + forward finite difference matrix. """ - # the test is skipped if the kernel is too large - if difference + 1 > size: - pytest.skip( - f"Test skipped because the kernel size {difference + 1} is larger than the " - f"series size {size}." - ) - # else nothing - - # a random right hand side vector is generated - np.random.seed(seed=42) - b = np.random.rand( - size, + # the limit of the maximum eigenvalue is computed empirically + series_size = 10000 + eigval_max_empirical = calc_limit_max_eigval_fin_diff_mat(differences=difference) + # ... and compared to the reference value + squ_diff_mat = forward_finite_diff_conv_matrix( + differences=difference, series_size=series_size ) - - # a finite difference matrix is generated with an updated diagonal to - # ensure positive definiteness - l_and_u = (difference, difference) - d = forward_finite_diff_conv_matrix( - differences=difference, accuracy=1, series_size=size - ) - a = d.T @ d + speye(size) - - # it is converted to LU banded storage ... - ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u) - # ... its LU decomposition is computed ... - lub, ipiv = lu_banded( - l_and_u=l_and_u, - ab=ab, - overwrite_ab=False, - check_finite=with_finite_check, - ) - # ... and the linear system is solved - x = lu_solve_banded( - decomposition=(lub, ipiv, l_and_u), - b=b, - check_finite=with_finite_check, - ) - - # the solution is compared to the solution obtained by NumPy's - # solve - np_x = np.linalg.solve(a=a.toarray(), b=b) - - assert np.allclose(x, np_x), ( - f"Banded LU decomposition for matrix of size {size} with {difference} sub- and " - f"superdiagonals failed." + squ_diff_mat = squ_diff_mat.T @ squ_diff_mat + squ_diff_mat_b = conv_to_lu_banded_storage( + a=squ_diff_mat, l_and_u=(difference, difference) + ).astype(np.float64)[difference:, ::] + eigval_max_reference = eigvals_banded( + a_band=squ_diff_mat_b, + lower=True, + select="i", + select_range=(series_size - 1, series_size - 1), + )[0] + + assert np.isclose(eigval_max_empirical, eigval_max_reference), ( + f"Empirical limit of the maximum eigenvalue for difference order {difference} " + f"with accuracy 1 for series of size {series_size} failed." ) -@pytest.mark.parametrize("with_finite_check", [True, False]) -@pytest.mark.parametrize("difference", list(range(0, 11))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) -def test_lu_banded_slogdet(size: int, difference: int, with_finite_check: bool) -> None: - """Tests the computation of the sign and log determinant of a banded matrix from - its LU decomposition by comparing it to NumPy's ``slogdet``. +@pytest.mark.parametrize("difference", list(range(0, 21))) +@pytest.mark.parametrize( + "size", + np.arange(start=1, stop=1001, step=1).tolist() + + np.arange(start=1000, stop=100000, step=2500).tolist(), +) +def test_posdef_squ_fin_diff_conv_matrix(size: int, difference: int) -> None: + """Tests the generated convolution matrix for forward finite differences by + comparing it against itself after SciPy's ``solveh_banded`` has been applied. """ # the test is skipped if the kernel is too large @@ -159,44 +129,46 @@ def test_lu_banded_slogdet(size: int, difference: int, with_finite_check: bool) ) # else nothing - # a finite difference matrix is generated with an updated diagonal to ensure - # positive definiteness - l_and_u = (difference, difference) - d = forward_finite_diff_conv_matrix( - differences=difference, accuracy=1, series_size=size - ) - a = d.T @ d + speye(size) + # the random signal is generated + min_eigval_size = 5000 + np.random.seed(seed=42) + series = np.random.rand(size) - # it is converted to LU banded storage ... - ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u) - # ... its LU decomposition is computed ... - lub, ipiv = lu_banded( - l_and_u=l_and_u, - ab=ab, - overwrite_ab=False, - check_finite=with_finite_check, + # this is solved against a finite difference matrix with an updated diagonal to + # ensure positive definiteness + l_and_u = (difference, difference) + max_eigval_mult = max(min_eigval_size, size) * np.finfo(np.float64).eps # type: ignore # noqa: E501 + fw_fin_diff_mat = forward_finite_diff_conv_matrix( + differences=difference, + series_size=size, ) - # ... and the sign and log determinant are determined - sign, logabsdet = slogdet_lu_banded( - decomposition=(l_and_u, lub, ipiv), + squ_diff_mat = posdef_mod_squared_fw_fin_diff_conv_matrix( + fw_fin_diff_mat=fw_fin_diff_mat, + differences=difference, + dia_mod_matrix=None, + max_eigval_mult=max_eigval_mult, + dtype=np.float64, ) - # the sign and log determinant are compared to the values obtained by NumPy's - # slogdet - np_sign, np_logabsdet = np.linalg.slogdet(a=a.toarray()) # type: ignore - - assert np_sign > 0, ( - f"Sign of log determinant for matrix of size {size} with {difference} sub- and " - f"superdiagonals failed." + assert squ_diff_mat.dtype == np.float64, ( # type: ignore + f"Convolution matrix for difference order {difference} with accuracy 1 for " + f"series of size {size} expected data type np.float64 but got " + f"{squ_diff_mat.dtype}." ) - assert np.isclose(sign, np_sign), ( - f"Sign of log determinant for matrix of size {size} with {difference} sub- and " - f"superdiagonals failed." + # now, the linear system is solved + ab = conv_to_lu_banded_storage(a=squ_diff_mat, l_and_u=l_and_u) + x = solveh_banded( + ab=ab[difference:, ::], + b=series, + lower=True, ) - assert np.isclose(logabsdet, np_logabsdet), ( - f"Log determinant for matrix of size {size} with {difference} sub- and " - f"superdiagonals failed." + + # NOTE: ``atol`` is set relatively high because the matrix is not very well + # conditioned and an approximate result is expected but also fine + assert np.allclose(series, squ_diff_mat @ x, atol=5e-4), ( + f"Positive definite squared convolution matrix for difference order " + f"{difference} with accuracy 1 for series of size {size} failed." ) @@ -220,16 +192,26 @@ def test_cho_banded_slogdet( # a finite difference matrix is generated with an updated diagonal to # ensure positive definiteness l_and_u = (difference, difference) - d = forward_finite_diff_conv_matrix( - differences=difference, accuracy=1, series_size=size + fw_fin_diff_mat = forward_finite_diff_conv_matrix( + differences=difference, + series_size=size, ) - a = d.T @ d + speye(size) + a = posdef_mod_squared_fw_fin_diff_conv_matrix( + fw_fin_diff_mat=fw_fin_diff_mat, + differences=difference, + dia_mod_matrix=None, + max_eigval_mult=0.0, + dtype=np.float64, + ) + a += speye(size, dtype=np.int64) # type: ignore # it is converted to LU banded storage ... - ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u) + ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u).astype(np.float64) # ... its Cholesky decomposition is computed ... lower = False - chob = cholesky_banded(ab=ab[0 : difference + 1, ::], lower=lower) + chob = cholesky_banded( + ab=ab[0 : difference + 1, ::], lower=lower, check_finite=with_finite_check + ) # ... and the sign and log determinant are determined sign, logabsdet = slodget_cho_banded(decomposition=(chob, lower)) @@ -247,19 +229,27 @@ def test_cho_banded_slogdet( ) -# FIXME: this test takes forever and is currently not even required, so the differences -# screened was limited a lot -@pytest.mark.parametrize("with_finite_check", [True, False]) -# @pytest.mark.parametrize("difference", list(range(0, 11))) -@pytest.mark.parametrize("difference", [0, 1, 2]) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) -def test_largest_smallest_eigval_of_spbanded( - size: int, difference: int, with_finite_check: bool +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("with_weights", [True, False, "bad"]) +@pytest.mark.parametrize("with_pentapy", [True, False]) +@pytest.mark.parametrize("log10_lam", np.arange(-50.0, 170.0, step=20.0).tolist()) +@pytest.mark.parametrize("nrows", [1, 2]) +@pytest.mark.parametrize( + "size", [3, 11, 50, 100, 500, 1_000, 5_000, 10_000, 50_000, 100_000] +) +@pytest.mark.parametrize("difference", [2, 10]) +def test_whittaker_solve( + difference: int, + size: int, + nrows: int, + log10_lam: float, + with_pentapy: bool, + with_weights: bool | Literal["bad"], + same_weights_for_all: bool, ) -> None: - """Tests the computation of the largest and smallest eigenvalues of a symmetric - mathematically positive semi-definite banded matrix by comparing it to NumPy's - ``eigvalsh``. Squared finite difference matrices are used for this test since they - are symmetric and mathematically positive semi-definite. + """Tests if the Whittaker smoothing still works for very low and large values of the + smoothing parameter combined with different numerically challenging weights. If it + survives this, arbitrary combinations can be considered safe. """ # the test is skipped if the kernel is too large @@ -268,49 +258,7 @@ def test_largest_smallest_eigval_of_spbanded( f"Test skipped because the kernel size {difference + 1} is larger than the " f"series size {size}." ) - - # a finite difference matrix is generated and squared - l_and_u = (difference, difference) - d = forward_finite_diff_conv_matrix( - differences=difference, accuracy=1, series_size=size - ) - a = d.T @ d - ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u)[0 : difference + 1, ::] - - # now, its largest and smallest eigenvalues are computed ... - max_eigval = _find_largest_symm_sparse_banded_spd_eigval( - ab=ab, check_finite=with_finite_check - ) - min_eigval = _find_smallest_symm_sparse_banded_spd_eigval( - ab=ab, check_finite=with_finite_check - ) - - # ... and compared to the values obtained by NumPy's ``eigvalsh`` - np_eigvals = np.linalg.eigvalsh(a=a.toarray()) - np_max_eigval = np_eigvals.max() - np_min_eigval = np_eigvals.min() - - assert np.isclose(max_eigval, np_max_eigval), ( - f"Largest eigenvalue for matrix of size {size} with {difference} " - f"sub- and superdiagonals failed. " - f"Chemotools solution: {max_eigval} vs." - f"NumPy's solution: {np_max_eigval}" - ) - assert np.isclose(min_eigval, np_min_eigval), ( - f"Smallest eigenvalue for matrix of size {size} with {difference} " - f"sub- and superdiagonals failed. " - f"Chemotools solution {min_eigval} vs." - f"NumPy's solution {np_min_eigval}" - ) - - -@pytest.mark.parametrize("with_pentapy", [True, False]) -@pytest.mark.parametrize("log10_lam", np.arange(-30.0, 110.0, step=10.0).tolist()) -@pytest.mark.parametrize("size", [3, 10, 50, 100, 500, 1_000, 5_000, 10_000]) -def test_whittaker_solve(size: int, log10_lam: float, with_pentapy: bool) -> None: - """Tests if the Whittaker smoothing still works for very large values of the - smoothing parameter. - """ + # else nothing # a Whittaker solver is instantiated ... whittaker_solver = WhittakerLikeSolver() @@ -319,13 +267,32 @@ def test_whittaker_solve(size: int, log10_lam: float, with_pentapy: bool) -> Non whittaker_solver._setup_for_fit( series_size=size, lam=10.0**log10_lam, - differences=2, + differences=difference, ) - # ... and the linear system is solved + + # ... weights are generated ... np.random.seed(seed=42) - z = whittaker_solver._whittaker_solve(X=np.random.rand(1, size))[0] + if with_weights: + weights = np.random.rand(1, size) + if with_weights == "bad": + idxs = np.arange(start=0, stop=size, step=1, dtype=np.int64) + weights[0, np.random.choice(idxs, size=int(size / 2), replace=False)] = 0.0 + # else nothing + else: + weights = None + + if not same_weights_for_all and weights is not None: + weights = np.tile(weights.reshape((1, -1)), reps=(nrows, 1)) + # else nothing + + # ... and the linear system is solved + z = whittaker_solver._whittaker_solve( + X=np.random.rand(nrows, size), + w=weights, + use_same_w_for_all=same_weights_for_all, + )[0] assert np.all(np.isfinite(z)), ( f"Whittaker solver for series of size {size} with smoothing parameter " - f"{10.0 ** log10_lam} failed." + f"{10.0 ** log10_lam} and weights {weights} failed." ) diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 398551e2..a848b74f 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -40,33 +40,46 @@ from tests.fixtures import spectrum -def test_air_pls_single_signal(spectrum, reference_airpls): # noqa: F811 +@pytest.mark.parametrize("n_samples", [1, 5]) +def test_air_pls( + spectrum, + reference_airpls, # noqa: F811 + n_samples: int, +): # Arrange + reps = (n_samples, 1) air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) # Act spectrum_corrected = air_pls.fit_transform(spectrum) # Assert - assert np.allclose(spectrum_corrected[0], reference_airpls[0], atol=1e-7) + assert np.allclose( + spectrum_corrected[0], np.tile(reference_airpls, reps=reps), atol=1e-7 + ) -def test_air_pls_multi_signals(spectrum, reference_airpls): # noqa: F811 - # Arrange - reps = (5, 1) - air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) +# FIXME: Deactivated because it fails; Issue created: +# @pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) +# @pytest.mark.parametrize("size", [5_000]) +# def test_air_pls_constant_signal(size: int, fill_value: float) -> None: +# # Arrange +# spectrum = np.full(shape=(size,), fill_value=fill_value).reshape((1, -1)) +# air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) - # Act - spectrum_corrected = air_pls.fit_transform(X=np.tile(spectrum, reps=reps)) +# # Act +# spectrum_corrected = air_pls.fit_transform(spectrum) - # Assert - assert np.allclose( - spectrum_corrected, np.tile(reference_airpls[0], reps=reps), atol=1e-7 - ) +# # Assert +# assert np.allclose(spectrum_corrected[0], spectrum[0]) -def test_ar_pls(spectrum_arpls, reference_arpls): # noqa: F811 +# FIXME: working with such a high ``atol`` indicates that the reference is not up to +# date anymore +@pytest.mark.parametrize("n_samples", [1, 5]) +def test_ar_pls(spectrum_arpls, reference_arpls, n_samples: int): # noqa: F811 # Arrange + reps = (n_samples, 1) arpls = ArPls(lam=1e2, differences=2, ratio=0.0001) reference = np.array(spectrum_arpls) - np.array(reference_arpls) @@ -74,20 +87,22 @@ def test_ar_pls(spectrum_arpls, reference_arpls): # noqa: F811 spectrum_corrected = arpls.fit_transform(spectrum_arpls) # Assert - assert np.allclose(spectrum_corrected[0], reference[0], atol=1e-4) + assert np.allclose(spectrum_corrected[0], np.tile(reference, reps=reps), atol=1e-4) -def test_ar_pls_multi_signals(spectrum_arpls, reference_arpls): # noqa: F811 - # Arrange - reps = (5, 1) - arpls = ArPls(lam=1e2, differences=2, ratio=0.0001) - reference = np.array(spectrum_arpls) - np.array(reference_arpls) +# FIXME: Deactivated because it fails; Issue created: +# @pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) +# @pytest.mark.parametrize("size", [5_000]) +# def test_ar_pls_constant_signal(size: int, fill_value: float) -> None: +# # Arrange +# spectrum = np.full(shape=(size,), fill_value=fill_value).reshape((1, -1)) +# ar_pls = ArPls(lam=1e2, differences=2, ratio=0.0001) - # Act - spectrum_corrected = arpls.fit_transform(X=np.tile(spectrum_arpls, reps=reps)) +# # Act +# spectrum_corrected = ar_pls.fit_transform(spectrum) - # Assert - assert np.allclose(spectrum_corrected, np.tile(reference[0], reps=reps), atol=1e-4) +# # Assert +# assert np.allclose(spectrum_corrected[0], spectrum[0]) def test_baseline_shift(): @@ -780,58 +795,25 @@ def test_uniform_noise(): assert np.allclose(np.std(spectrum_corrected[0]), np.sqrt(1 / 3), atol=1e-2) -def test_whittaker_smooth_single_signal_no_weights( - spectrum, reference_whittaker # noqa: F811 -): - # Arrange - whittaker_smooth = WhittakerSmooth() - - # Act - spectrum_corrected = whittaker_smooth.fit_transform(X=spectrum) - - # Assert - assert np.allclose(spectrum_corrected[0], reference_whittaker[0], atol=1e-8) - - -def test_whittaker_smooth_multi_signals_no_weights( - spectrum, reference_whittaker # noqa: F811 # -): - # Arrange - reps = (5, 1) - whittaker_smooth = WhittakerSmooth() - - # Act - spectrum_corrected = whittaker_smooth.fit_transform(X=np.tile(spectrum, reps=reps)) - - # Assert - assert np.allclose( - spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 - ) - - -def test_whittaker_smooth_single_signal_with_weights( - spectrum, reference_whittaker # noqa: F811 -): - # Arrange - weights = np.ones(shape=(len(spectrum[0]),)) - whittaker_smooth = WhittakerSmooth() - - # Act - spectrum_corrected = whittaker_smooth.fit_transform( - X=spectrum, sample_weight=weights - ) - - # Assert - assert np.allclose(spectrum_corrected[0], reference_whittaker[0], atol=1e-8) - - -def test_whittaker_smooth_multi_signals_single_weights( - spectrum, reference_whittaker # noqa: F811 +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("with_weights", [True, False]) +@pytest.mark.parametrize("n_samples", [1, 5]) +def test_whittaker_smooth( + spectrum, + reference_whittaker, # noqa: F811 + n_samples: int, + with_weights: bool, + same_weights_for_all: bool, ): # Arrange - weights = np.ones(shape=(len(spectrum[0]),)) - reps = (5, 1) + reps = (n_samples, 1) whittaker_smooth = WhittakerSmooth() + if with_weights and not same_weights_for_all: + weights = np.ones(shape=(n_samples, len(spectrum[0]))) + elif with_weights and same_weights_for_all: + weights = np.ones(shape=(len(spectrum[0]),)) + else: + weights = None # Act spectrum_corrected = whittaker_smooth.fit_transform( @@ -844,76 +826,79 @@ def test_whittaker_smooth_multi_signals_single_weights( ) -def test_whittaker_smooth_multi_signals_multi_weights( - spectrum, reference_whittaker # noqa: F811 +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("with_weights", [True, False]) +@pytest.mark.parametrize("n_samples", [1, 5]) +def test_whittaker_with_pentapy( + n_samples: int, with_weights: bool, same_weights_for_all: bool ): - # Arrange - weights = np.ones(shape=(5, len(spectrum[0]))) - reps = (weights.shape[0], 1) - whittaker_smooth = WhittakerSmooth() - - # Act - spectrum_corrected = whittaker_smooth.fit_transform( - X=np.tile(spectrum, reps=reps), sample_weight=weights - ) - - # Assert - assert np.allclose( - spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 - ) - - -def test_whittaker_with_pentapy_single_signal(): # Arrange np.random.seed(42) - spectrum = np.random.rand(1, 1000) + spectrum = np.random.rand(n_samples, 1000) whittaker_smooth = WhittakerSmooth(differences=2) + if with_weights and not same_weights_for_all: + weights = np.ones(shape=(n_samples, len(spectrum[0]))) + elif with_weights and same_weights_for_all: + weights = np.ones(shape=(len(spectrum[0]),)) + else: + weights = None # Act with pentapy - spectrum_corr_pentapy = whittaker_smooth.fit_transform(spectrum) + spectrum_corr_pentapy = whittaker_smooth.fit_transform( + spectrum, sample_weight=weights + ) # Assert with pentapy assert ( whittaker_smooth._solve( - bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None + bw=spectrum.transpose(), + log_lam=np.log(whittaker_smooth.lam), + w=None, + mod_squ_fin_diff_mat_lub=whittaker_smooth.base_squ_fw_fin_diff_mat_lub_, )[2] == BandedSolveDecompositions.PENTAPY ) # Act without pentapy whittaker_smooth._WhittakerLikeSolver__allow_pentapy = False # type: ignore - spectrum_corr_scipy = whittaker_smooth.fit_transform(spectrum) + spectrum_corr_scipy = whittaker_smooth.fit_transform( + spectrum, sample_weight=weights + ) # Assert without pentapy - assert whittaker_smooth._solve( - bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None - )[2] in {BandedSolveDecompositions.CHOLESKY, BandedSolveDecompositions.LU} + assert ( + whittaker_smooth._solve( + bw=spectrum.transpose(), + log_lam=np.log(whittaker_smooth.lam), + w=None, + mod_squ_fin_diff_mat_lub=whittaker_smooth.base_squ_fw_fin_diff_mat_lub_, + )[2] + == BandedSolveDecompositions.CHOLESKY + ) assert np.allclose(spectrum_corr_pentapy[0], spectrum_corr_scipy[0]) -def test_whittaker_with_pentapy_multi_signals(): +@pytest.mark.parametrize( + "log10_lam", np.arange(start=-50.0, stop=170.0, step=20.0).tolist() +) +@pytest.mark.parametrize("difference", [1, 2, 10]) +@pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) +@pytest.mark.parametrize("size", [5_000]) +def test_whittaker_constant_signal( + size: int, fill_value: float, difference: int, log10_lam: float +) -> None: # Arrange - np.random.seed(42) - spectrum = np.random.rand(5, 1000) - whittaker_smooth = WhittakerSmooth(differences=2) + spectrum = np.full(shape=(size,), fill_value=fill_value).reshape((1, -1)) + whittaker_smooth = WhittakerSmooth(lam=10.0**log10_lam, differences=difference) - # Act with pentapy - spectrum_corr_pentapy = whittaker_smooth.fit_transform(spectrum) + # Act + spectrum_corrected = whittaker_smooth.fit_transform(spectrum) - # Assert with pentapy - assert ( - whittaker_smooth._solve( - bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None - )[2] - == BandedSolveDecompositions.PENTAPY + # Assert + # this test needs to be as strict as possible because the result has to be exact + assert np.allclose( + spectrum_corrected[0], + spectrum[0], + atol=size * np.finfo(np.float64).eps, # type: ignore + rtol=0.0, ) - - # Act without pentapy - whittaker_smooth._WhittakerLikeSolver__allow_pentapy = False # type: ignore - spectrum_corr_scipy = whittaker_smooth.fit_transform(spectrum) - - # Assert without pentapy - assert whittaker_smooth._solve( - bw=spectrum.transpose(), log_lam=np.log(whittaker_smooth.lam), w=None - )[2] in {BandedSolveDecompositions.CHOLESKY, BandedSolveDecompositions.LU} - assert np.allclose(spectrum_corr_pentapy, spectrum_corr_scipy) From 8f3b56f5c04bd0c244df9282388496bce37560cf Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Wed, 27 Dec 2023 14:29:57 +0100 Subject: [PATCH 014/118] fix: [44] added skip for pentapy tests --- tests/test_functionality.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_functionality.py b/tests/test_functionality.py index a848b74f..21a615e4 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -1,3 +1,5 @@ +import logging + import numpy as np import pandas as pd import pytest @@ -28,7 +30,7 @@ StandardNormalVariate, ) from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth -from chemotools.utils.models import BandedSolveDecompositions +from chemotools.utils.models import _PENTAPY_AVAILABLE, BandedSolveDecompositions from tests.fixtures import reference_airpls # noqa: F401 from tests.fixtures import reference_arpls # noqa: F401 from tests.fixtures import reference_msc_mean # noqa: F401 @@ -832,6 +834,12 @@ def test_whittaker_smooth( def test_whittaker_with_pentapy( n_samples: int, with_weights: bool, same_weights_for_all: bool ): + # this test is skipped with a warning if pentapy is not installed + if not _PENTAPY_AVAILABLE: + logging.warning("pentapy is not installed") + pytest.skip("pentapy is not installed, test cannot be performed") + # else nothing + # Arrange np.random.seed(42) spectrum = np.random.rand(n_samples, 1000) From b9fd2e09d92a113e68824ae14f1f0c07c22ac61f Mon Sep 17 00:00:00 2001 From: Niklas Zell Date: Wed, 27 Dec 2023 21:16:11 +0100 Subject: [PATCH 015/118] refactor: [44] temporarily disabled long computation of squared finite difference matrix in favour of speed; added explanation of Whittaker smoothing --- chemotools/smooth/_whittaker_smooth.py | 3 +- .../utils/WHITTAKER_IMPLEMENTATION_DETAILS.md | 200 ++++++++++++++++++ chemotools/utils/whittaker_base.py | 23 +- 3 files changed, 208 insertions(+), 18 deletions(-) create mode 100644 chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 202d7ccb..bd625fbf 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -38,7 +38,8 @@ class WhittakerSmooth( The number of differences to use for the Whittaker smooth. If the aim is to obtain a smooth estimate of the ``m``-th order derivative, this should be set to at least ``m + 2``. - + Currently, values >= 6 are highly discouraged and might lead to obscured + smoothing. Attributes ---------- diff --git a/chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md b/chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md new file mode 100644 index 00000000..a5b3eea9 --- /dev/null +++ b/chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md @@ -0,0 +1,200 @@ +# 🧮 Implementation of the Whittaker Henderson smoothing algorithm + +The internal linear system to be solved for the Whittaker Henderson smoothing algorithm +is given by the following set of equations:
+ +$\mathbf{z}=(\mathbf{W}+\tau\cdot\mathbf{D}_{m}^{T}\mathbf{D}_{m})^{-1}\mathbf{W}\mathbf{y}$ + +where $\mathbf{z}$ is the vector of smoothed values, $\mathbf{W}$ is the diagonal matrix +of weights, $\tau$ is the smoothing parameter, $\mathbf{D}_{m}$ is the ${m}$-th order +forward finite difference matrix, and $\mathbf{y}$ is the vector of observed values. +The matrix $P=\mathbf{D}_{m}^{T}\mathbf{D}_{m}$ is often referred to as the +*penalty matrix*.

+The matrix to invert is symmetric, sparse, banded with ${2\cdot m + 1}$ non-zero +diagonals, and positive definite, i.e., all its eigenvalues are strictly positive +($>0$). From an algorithmic point of view, this means that the inversion can be performed +in ${\mathcal{O}\left(n\right)}$ time, where ${n}$ is the number of observations, by +using a banded Cholesky decomposition:
+ +$\mathbf{W}+\tau\cdot\mathbf{P}=\mathbf{L}\mathbf{L}^T$ + +where $\mathbf{L}$ is a lower triangular matrix which shares the same sparsity pattern +as $\mathbf{W}+\tau\cdot\mathbf{P}$. Inversion of a lower triangular matrix is trivial +when backward and forward substitution are used. + +## ⚠️ Problem + +However, all this is only true from a mathematical point of view. In practice, +floating point arithmetics introduce numerical errors which can lead to an indefinite +matrix. In this case, the Cholesky decomposition fails and the algorithm cannot be used. +This happens for relatively small $n$ already when $m$ exceeds 4, but in spectroscopy +$n > 1000$ is not uncommon and $m = 6$ has been shown helpful in deriving an additional +set of weights fro $\tau$ to make the smooth spatially adaptive.
+Besides, there is another problem. The penalty matrix +$\mathbf{P}$ alone is only positive semi-definite since it possesses $m$ zero +eigenvalues by design. From a mathematical perspective, this would not pose a problem +since $\mathbf{W}+\tau\cdot\mathbf{P}$ would still be positive definite as $\tau$ tends +to $+\infty$. Numerically, this is by far not the case because $\tau$-values that are an +order of $10^{16}$ greater than the order of the weights are already sufficient to make +the matrix positive semi-definite or even indefinite if some of the small eigenvalues go +negative in the calculations (for 64-bit float precision).
+On the other hand, as $\tau$ tends to $0$, the matrix can also become ill-conditioned +as well if some weights are numerically zero due to zero division.

+All in all, the banded Cholesky decomposition is not a robust algorithm for solving the +system of linear equations and even pivoted LU-decomposition suffers from the same +problems - even though it can withstand a few more orders of magnitude in $\tau$. + +## 💡 Solution + +One way out of this dilemma is to make the matrix positive definite by adding a small +positive constant to the main diagonal: + +$\mathbf{W}+\tau\cdot\mathbf{P}+\epsilon\cdot\mathbf{I}$ + +where $\epsilon$ is a small positive constant and $\mathbf{I}$ is the identity matrix. +Despite its simplicity, this approach requires that $\epsilon$ is determined at runtime +which can be costly because it depends on the eigenvalues of $\mathbf{W}$, the +eigenvalues of $\mathbf{P}$, as well as $\tau$.
+Therefore, both $\mathbf{W}$ and $\mathbf{P}$ are made positive +definite by adding a small positive constant to their main diagonal before the +decomposition is performed: + +$\mathbf{W}+\epsilon_{w}\cdot\mathbf{I}+\tau\cdot\left(\mathbf{P}+\epsilon_{p}\cdot\mathbf{I}\right)=\mathbf{L}\mathbf{L}^T$ + +Now, the only thing that remains to be done is to determine $\epsilon_{w}$ and +$\epsilon_{p}$ at runtime in an efficient manner that does not require the calculation +of any of the eigenvalues because this would be too costly. On top of that, if +approximations are used, they need to be as close as possible to the actual values, +because too large values of $\epsilon_{w}$ and $\epsilon_{p}$ can obscure the smoothing +effect while too small values can lead to numerical instabilities. A typically applied +way of scaling looks like + +$\epsilon_{a}=\varepsilon\cdot n\cdot\lambda_{max}\left(\mathbf{A}\right)$ + +where $\varepsilon$ is the floating point machine imprecision, $n$ is the number of +observations, and $\lambda_{max}\left(\mathbf{A}\right)$ is the largest eigenvalue of +the matrix $\mathbf{A}$ in question.
+This scaling is used, e.g., in ``numpy.linalg.lstsq`` where singular values that are +numerically zero need to be removed (it is used as a threshold there). + +### 🏋️ Determination of the weight $\epsilon_{w}$ + +The largest eigenvalue of $\mathbf{W}$ is given by the largest weight since it is a +diagonal matrix. Therefore, $\epsilon_{w}$ can be determined by + +$\epsilon_{w}=\varepsilon\cdot n\cdot\max\left(diag\left(\mathbf{W}\right)\right)$ + +where $diag\left(\mathbf{W}\right)$ is the vector of diagonal elements of $\mathbf{W}$ +and $max$ extracts the maximum value. This is trivial and efficient to calculate. + +### ☄️ Determination of the penalty $\epsilon_{p}$ + +Finding the largest eigenvalue of $\mathbf{D}_{m}^{T}\mathbf{D}_{m}$ is more +complicated. However, some simulations have shown that the limit value of the largest +eigenvalue is given by + +$\lim_{n \to \infty} \lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{D}_{m}\right)=4^{m}$ + +which appears to be a strict upper limit and thus perfectly suited for the scaling +factor + +$\epsilon_{p}=\varepsilon\cdot n\cdot 4^{m}$ + +Such an approximation is also cheap to compute, thereby making the algorithm both +efficient and robust.
+❗❗❗
+Due to the power of $m$, this approximation scales badly with increasing $m$ and +$n$. It is therefore recommended not to use $m > 6$. Probably also $n$ needs to be +limited in the future by running multiple smooths on subsets of the data and then +combining the results.
+❗❗❗ + +### 🧑‍💻 Final Implementation + +The updated weights matrix is then given by + +$\mathbf{W^{+}}=\mathbf{W}+\epsilon_{1}\cdot\mathbf{I}=\mathbf{W}+\varepsilon\cdot max\left(n, 10\right)\cdot\max\left(diag\left(\mathbf{W}\right)\right)\cdot\mathbf{I}$ + +where the $max$-operator was included to prevent $\epsilon_{1}$ from becoming too small. +Lifting the weights is not a problem because weights that need to be lifted will still +be negligible compared to the other weights afterwards.
+Analogously, the updated penalty matrix is given by + +$\mathbf{P}^{+}=\mathbf{P}+\epsilon_{2}\cdot\mathbf{I}=\mathbf{P}+\varepsilon\cdot max\left(n, 10\right)\cdot 4^{m}\cdot\mathbf{I}$ + +From a mathematical point of view, this approach introduces a second penalty term which +is the classical Tikhonov regularization term. Yet, this term is very small and +therefore virtually negligible compared to the actual derivative penalty term.
+Nevertheless, the Tikhonov regularization term will penalize large absolute values of +the smoothed values $\mathbf{z}$ which is not desirable since this will pull +$\mathbf{z}$ towards zero. To resolve this, the weighted average of the original values +$\mathbf{y}$ is subtracted before the smoothing is performed and added again afterwards: + +$\bar{y}=\frac{\sum_{i=1}^{n}w_{i}\cdot y_{i}}{\sum_{i=1}^{n}w_{i}}$
+$\mathbf{z}=\bar{y}+\left(\mathbf{W^{+}}+\tau\cdot\mathbf{P^{+}}\right)^{-1}\mathbf{W^{+}}\left(\mathbf{y}-\bar{y}\right)$ + +Consequently, $\mathbf{z}$ is pulled towards the weighted average of the original values +$\mathbf{y}$ instead of zero as $\tau$ tends to $+\infty$ which is way more desirable +(note that as $\tau$ tends to $+\infty$ $\mathbf{z}$ becomes a flat line anyway and +making it become the weighted average of $\mathbf{y}$ is mathematically sound). + +## 🏄 Extensions + +To make the smoothing spatially adaptive, the smoothing parameter $\tau$ is replaced by +a individual smoothing parameters $\tau_{i}$ for each observation $y_{i}$. +Mathematically, this is equivalent to + +$\mathbf{z}=\left(\mathbf{W}+\tau\cdot\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right)^{-1}\mathbf{W}\mathbf{y}$ + +where $\mathbf{M}$ is a diagonal matrix of smoothing parameter weights.
+Now, the determination of $\epsilon_{p}$ becomes more complicated because the +eigenvalues of $\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}$ are not known. However, they +can be estimated via the spectral norm which is defined as + +$\left\Vert\mathbf{A}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{A}^{T}\mathbf{A}\right)}$ + +where $\mathbf{A}$ is a matrix and $\lambda_{max}\left(\mathbf{A}^{T}\mathbf{A}\right)$ +is the largest eigenvalue of the matrix product $\mathbf{A}^{T}\mathbf{A}$.
+This norm is sub-multiplicative, i.e., + +$\left\Vert \mathbf{A}\mathbf{B}\right\Vert _{2}\leq\left\Vert\mathbf{A}\right\Vert _{2}\cdot\left\Vert\mathbf{B}\right\Vert _{2}$ + +which means that an upper bound for the maximum eigenvalue of $\mathbf{A}\mathbf{B}$ can +be estimated when the spectral norms of $\mathbf{A}$ and $\mathbf{B}$ are known. This is +the case for $\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}$ since $\mathbf{M}$ is again a +diagonal matrix and the spectral norm of $\mathbf{D}_{m}$ has almost been calculated +above as the maximum eigenvalue of $\mathbf{D}_{m}^{T}\mathbf{D}_{m}$.
+It follows that + +$\left\Vert\mathbf{M}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{M}^{T}\mathbf{M}\right)}=\sqrt{max\left(\mathbf{M}^{T}\mathbf{M}\right)}=\sqrt{max\left(diag\left(\mathbf{M}\right)\right)^{2}}=max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)$ + +where $abs\left(diag\left(\mathbf{M}\right)\right)$ is the vector of absolute values of +the diagonal elements of $\mathbf{M}$.
+For $\mathbf{D}_{m}^{T}$ the spectral norm is given by + +$\left\Vert\mathbf{D}_{m}^{T}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{D}_{m}\right)}=\sqrt{4^{m}}=2^{m}$ + +Finally, the upper bound for the maximum eigenvalue of +$\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}$ is given by + +$\left\Vert\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{M}^T\mathbf{D}_{m}\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right)}=\lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right)\leq\left\Vert\mathbf{D}_{m}^{T}\right\Vert _{2}\cdot\left\Vert\mathbf{M}\right\Vert _{2}\cdot\left\Vert\mathbf{D}_{m}\right\Vert _{2}=2^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)\cdot 2^{m}=4^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)$ + +Combining all this $\mathbf{P}^{+}$ can be determined by + +$\epsilon_{p}=\varepsilon\cdot max\left(n, 10\right)\cdot 4^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)$
+$\mathbf{P}^{+}=\mathbf{P}+\epsilon_{p}\cdot\mathbf{I}=\mathbf{P}+\varepsilon\cdot max\left(n, 10\right)\cdot 4^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)\cdot\mathbf{I}$ + +This is again a cheap and robust approximation that does not require the calculation of +any eigenvalues.
+❗❗❗
+Due to the power of $m$, this approximation scales badly with increasing $m$ and +$n$. It is therefore recommended not to use $m > 6$. Probably also $n$ needs to be +limited in the future by running multiple smooths on subsets of the data and then +combining the results.
+❗❗❗
+Such an approach will be useful for a spatially adaptive smoothing algorithm like the +one provided in + +*A. Corbas, S.J. Choquette: "Automated Spectral Smoothing with Spatially Adaptive +Penalized Least Squares", Applied Spectroscopy Volume 65, Issue 6, pp.665-677, 2011 +[DOI](https://doi.org/10.1366/10-05971)* diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index f276358c..2d4164c4 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -387,7 +387,7 @@ def _whittaker_solve( Parameters ---------- - X : np.ndarray of shape(n, m) + X : np.ndarray of shape (n, m) The series to be smoothed stored as individual rows. w : np.ndarray of shape(1, m), shape(n, m), or None The weights to be applied for smoothing. If only a single row is provided @@ -409,7 +409,7 @@ def _whittaker_solve( The lambda parameter used for the smoothing of each series. If `lam` was fixed, this is a vector of length `n` with the same value for each series. - """ + """ # noqa: E501 # a nested function is defined for updating the weights # TODO: add zero-weight protection (eigenvalues are weights themselves) @@ -434,25 +434,12 @@ def update_to_next_weights(iter_i: int) -> None: X.dtype == self.__dtype ), f"Internal error: Promotion to {self.__dtype} failed." - # the modified squared forward finite difference matrix is computed - mod_squ_fin_diff_mat_lub = posdef_mod_squared_fw_fin_diff_conv_matrix( - fw_fin_diff_mat=self.fw_fin_diff_mat_, - differences=self.differences_, - dia_mod_matrix=None, - max_eigval_mult=self.max_eigval_mult_, - dtype=self.__dtype, - ) - mod_squ_fin_diff_mat_lub = conv_to_lu_banded_storage( - a=mod_squ_fin_diff_mat_lub, - l_and_u=self.l_and_u_, - ) - # if multiple x with the same weights are to be solved for fixed lambda, this # can be done more efficiently by leveraging Pentapy's and LAPACK'S ability to # perform multiple solves from the same inversion at once if use_same_w_for_all: return self._solve_multiple_x( - X=X, w=w, mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub + X=X, w=w, mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_ ) # else nothing @@ -464,7 +451,9 @@ def update_to_next_weights(iter_i: int) -> None: for iter_i, x in enumerate(X): update_to_next_weights(iter_i=iter_i) X_smooth[iter_i], lam[iter_i] = self._solve_single_x( - x=x, w=w_curr, mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub + x=x, + w=w_curr, + mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_, ) return X_smooth, lam From fa0479a9d108753b6233ba731a7942024eac67ab Mon Sep 17 00:00:00 2001 From: MothNik Date: Thu, 18 Apr 2024 10:34:50 +0200 Subject: [PATCH 016/118] test push --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index febecc49..332ecf5a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ black isort matplotlib -pentapy +maturin pytest \ No newline at end of file From 25b5682b8e0f47f922433f89616dc847f25a235e Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 20 Apr 2024 11:41:14 +0200 Subject: [PATCH 017/118] fix: removed `maturin` from dev `requirements` --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 332ecf5a..febecc49 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ black isort matplotlib -maturin +pentapy pytest \ No newline at end of file From 11f4937b10ab92c69cc347364db948576e127fb9 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 20 Apr 2024 13:33:18 +0200 Subject: [PATCH 018/118] feat: re-added improved version of banded LU-decomposition; added tests --- chemotools/utils/banded_linalg.py | 238 ++++++++++++++++++++++++++++++ chemotools/utils/models.py | 16 ++ tests/test_for_utils.py | 106 ++++++++++++- 3 files changed, 358 insertions(+), 2 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 81ae3a75..2e4cfb9f 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -1,9 +1,31 @@ from numbers import Integral import numpy as np +from numpy.typing import ArrayLike +from scipy.linalg import lapack from scipy.sparse import spmatrix from sklearn.utils import check_array, check_scalar +from chemotools.utils.models import BandedLUFactorization + + +def _datacopied(arr, original): + """ + Strictly check for ``arr`` not sharing any data wit ``original``, under the + assumption that ``arr = asarray(original)`` + + Was copied from Scipy to be consistent in the LAPACK-wrappers implemented here. + + """ + + if arr is original: + return False + + if not isinstance(original, np.ndarray) and hasattr(original, "__array__"): + return False + + return arr.base is None + def _check_full_arr_n_diag_counts_for_lu_banded( a_shape: tuple[int, int], @@ -148,6 +170,222 @@ def conv_to_lu_banded_storage( return ab +def lu_banded( + l_and_u: tuple[int, int], + ab: ArrayLike, + *, + check_finite: bool = True, +) -> BandedLUFactorization: + """ + Computes the LU-decomposition of a banded matrix ``A`` using LAPACK-routines. + This function is a wrapper of the LAPACK-routine ``gbtrf`` which computes the LU- + decomposition of a banded matrix ``A`` in-place. It wraps the routine in an + analogous way to SciPy's ``scipy.linalg.cholesky_banded``. + + Parameters + ---------- + l_and_u : (int, int) + The number of "non-zero" sub- (first) and superdiagonals (second element) aside + the main diagonal which does not need to be considered here. "Non-zero" can be + a bit misleading in this context. These numbers should count up to the diagonal + after which all following diagonals are all zero. Zero-diagonals that come + before still need to be included. + Neither of both may exceed ``num_rows``. + Wrong specification of this can lead to non-zero-diagonals being ignored or + zero-diagonals being included which corrupts the results or reduces the + performance. + ab : array_like of shape (l_and_u[0] + 1 + l_and_u[1], n) + A 2D-Array resembling the matrix ``A`` in banded storage format (see Notes). + check_finite : bool, default=True + Whether to check that the input matrix contains only finite numbers. Disabling + may give a performance gain, but may result in problems (crashes, + non-termination) if the inputs do contain infinities or NaNs. + + Returns + ------- + lub_factorization : BandedLUFactorization + A dataclass containing the LU-factorization of the matrix ``A`` as follows: + ``lub``: The LU-decomposition of ``A`` in banded storage format (see Notes). + ``ipiv``: The pivoting indices. + ``l_and_u``: The number of sub- and superdiagonals of the matrix ``A`` that + are non-zero. + ``singular``: A boolean indicating whether the matrix is singular. + + Notes + ----- + For LAPACK LU-decomposition, the matrix ``a`` is stored in ``ab`` using the matrix + diagonal ordered form: + + ```python + ab[u + i - j, j] == a[i,j] # see below for u + ``` + + An example of ``ab`` (shape of a is ``(7,7)``, ``u``=3 superdiagonals, ``l``=2 + subdiagonals) looks like: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + a10 a21 a32 a43 a54 a65 * # v subdiagonals + a20 a31 a42 a53 a64 * * + ``` + + where all entries marked with ``*`` are arbitrary values when returned by this + function. + Internally LAPACK relies on an expanded version of this format to perform inplace + operations that adds another ``l`` superdiagonals to the matrix in order to + overwrite them for the purpose of pivoting. The output is thus an expanded version + of the LU-decomposition of ``A`` in the same format where the main diagonal of + ``L`` is implicitly taken to be a vector of ones. The output can directly be used + for the LAPACK-routine ``gbtrs`` to solve linear systems of equations based on this + decomposition. + + """ + + # the (optional) finite check and Array-conversion are performed + if check_finite: + ab = np.asarray_chkfinite(ab) + else: + ab = np.asarray(ab) + + # then, the number of lower and upper subdiagonals needs to be checked for being + # consistent with the shape of ``ab`` + num_low_diags, num_upp_diags = l_and_u + if num_low_diags + num_upp_diags + 1 != ab.shape[0]: + raise ValueError( + f"\nInvalid values for the number of lower and upper " + f"diagonals: l+u+1 ({num_low_diags + num_upp_diags + 1}) does not equal " + f"ab.shape[0] ({ab.shape[0]})." + ) + + # now, the LAPACK-routines can be called + # to make ``ab`` compatible with the shape the LAPACK expects in this case, it + # needs to be re-written into a larger Array that has zeros elsewhere + # FIXME: for tridiagonal matrices, the SciPy wrapper for ``gttrf`` should be used + lapack_routine = "gbtrf" + (gbtrf,) = lapack.get_lapack_funcs((lapack_routine,), (ab,)) + lpkc_ab = np.row_stack( + ( + np.zeros((num_low_diags, ab.shape[1]), dtype=ab.dtype), + ab, + ) + ) + lub, ipiv, info = gbtrf( + ab=lpkc_ab, kl=num_low_diags, ku=num_upp_diags, overwrite_ab=True + ) + + # then, the results needs to be validated and returned + # Case 1: the factorisation could be completed, which does not imply that the + # solution can be used for solving a linear system + if info >= 0: + return BandedLUFactorization( + lub=lub, + ipiv=ipiv, + l_and_u=l_and_u, + singular=info > 0, + ) + + # Case 2: the factorisation was not completed due to invalid input + raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrf.") + + +def lu_solve_banded( + lub_factorization: BandedLUFactorization, + b: ArrayLike, + *, + overwrite_b: bool = False, + check_finite: bool = True, +) -> np.ndarray: + """ + Solves a linear system of equations ``Ax=b`` with a banded matrix ``A`` using its + precomputed LU-decomposition. + This function wraps the LAPACK-routine ``gbtrs`` in an analogous way to SciPy's + ``scipy.linalg.cho_solve_banded``. + + Parameters + ---------- + lub_factorization : BandedLUFactorization + The LU-decomposition of the matrix ``A`` in banded storage format as returned by + the function :func:`lu_banded`. + b : ndarray of shape (n,) + A 1D-Array containing the right-hand side of the linear system of equations. + overwrite_b : bool, default=False + If ``True``, the contents of ``b`` can be overwritten by the routine. Otherwise, + a copy of ``b`` is created and overwritten. + check_finite : bool, default=True + Whether to check that the input contains only finite numbers. Disabling may give + a performance gain, but may result in problems (crashes, non-termination) if the + inputs do contain infinities or NaNs. + + Returns + ------- + x : ndarray of shape (n,) + The solution to the system ``A x = b``. + + Raises + ------ + LinAlgError + If the system to solve is singular. + + """ + + # if the matrix is singular, the solution cannot be computed + if lub_factorization.singular: + raise np.linalg.LinAlgError("\nSystem is singular.") + + # the (optional) finite check and Array-conversion are performed + if check_finite: + lub_factorization.lub = np.asarray_chkfinite(lub_factorization.lub) + lub_factorization.ipiv = np.asarray_chkfinite(lub_factorization.ipiv) + b_inter = np.asarray_chkfinite(b) + else: + lub_factorization.lub = np.asarray(lub_factorization.lub) + lub_factorization.ipiv = np.asarray(lub_factorization.ipiv) + b_inter = np.asarray(b) + + overwrite_b = overwrite_b or _datacopied(b_inter, b) + + # then, the shapes of the LU-decomposition and ``b`` need to be validated against + # each other + if lub_factorization.shape[-1] != b_inter.shape[0]: + raise ValueError( + f"\nShapes of lub ({lub_factorization.shape[-1]}) and b " + f"({b_inter.shape[0]}) are not compatible." + ) + + # now, the LAPACK-routine is called + (gbtrs,) = lapack.get_lapack_funcs(("gbtrs",), (lub_factorization.lub, b)) + x, info = gbtrs( + ab=lub_factorization.lub, + kl=lub_factorization.l_and_u[0], + ku=lub_factorization.l_and_u[1], + b=b, + ipiv=lub_factorization.ipiv, + overwrite_b=overwrite_b, + ) + + # then, the results needs to be validated and returned + # Case 1: the solution could be computed truly successfully, i.e., without any + # NaN-values + if info == 0 and not np.any(np.isnan(x)): + return x + + # Case 2: the solution was computed, but there were NaN-values in it + elif info == 0: + raise np.linalg.LinAlgError("\nMatrix is singular.") + + # Case 3: the solution could not be computed due to invalid input + elif info < 0: + raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrs.") + + # Case 4: unexpected error + raise AssertionError( + f"\nThe internal gbtrs returned info > 0 ({info}) which should not happen." + ) + + def slodget_cho_banded(decomposition: tuple[np.ndarray, bool]) -> tuple[float, float]: """Computes the logarithm of the absolute value of the determinant of a banded hermitian matrix `A` using its Cholesky-decomposition. This is way more efficient diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 7a8dce2c..4ec75f04 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -1,5 +1,8 @@ +from dataclasses import dataclass, field from enum import Enum +import numpy as np + # if possible, pentapy is imported since it provides a more efficient implementation # of solving pentadiagonal systems of equations, but the package is not in the # dependencies, so ``chemotools`` needs to be made aware of whether it is available @@ -17,3 +20,16 @@ class BandedSolveDecompositions(str, Enum): CHOLESKY = "cholesky" PENTAPY = "pentapy" + + +@dataclass() +class BandedLUFactorization: + lub: np.ndarray + ipiv: np.ndarray + l_and_u: tuple[int, int] + singular: bool + + shape: tuple[int, int] = field(default=(-1, -1), init=False) + + def __post_init__(self): + self.shape = self.lub.shape # type: ignore diff --git a/tests/test_for_utils.py b/tests/test_for_utils.py index 36565421..b01e6bcd 100644 --- a/tests/test_for_utils.py +++ b/tests/test_for_utils.py @@ -2,10 +2,15 @@ import numpy as np import pytest -from scipy.linalg import cholesky_banded, eigvals_banded, solveh_banded +from scipy.linalg import cholesky_banded, eigvals_banded, solve_banded, solveh_banded from scipy.sparse import eye as speye -from chemotools.utils.banded_linalg import conv_to_lu_banded_storage, slodget_cho_banded +from chemotools.utils.banded_linalg import ( + conv_to_lu_banded_storage, + lu_banded, + lu_solve_banded, + slodget_cho_banded, +) from chemotools.utils.finite_differences import ( calc_forward_diff_kernel, calc_limit_max_eigval_fin_diff_mat, @@ -80,6 +85,103 @@ def test_forward_finite_diff_conv_matrix(size: int, difference: int) -> None: ) +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("overwrite_b", [True, False]) +@pytest.mark.parametrize("n_rhs", [0, 1, 2]) +@pytest.mark.parametrize("n_upp_bands", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize("n_low_bands", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize( + "n_rows", [1, 2, 10, 11, 50, 51, 100, 101, 500, 501, 1_000, 1001, 5_000, 5001] +) +def test_lu_banded_solve( + n_rows: int, + n_low_bands: int, + n_upp_bands: int, + n_rhs: int, + overwrite_b: bool, + with_finite_check: bool, +) -> None: + """ + Tests the separate LU decomposition followed by solving a system of linear equations + for banded matrices. + + NOTE: A number of 0 right-hand sides are used for making the vector to solve a + NOTE: 1D-Array. + + """ + + # if the matrix cannot exist with the given shape, the test is skipped + n_rows_min = n_low_bands + n_upp_bands + 1 + if n_rows < n_rows_min: + pytest.skip( + f"Test skipped because the number of rows {n_rows} is smaller than the " + f"minimum number of rows {n_rows_min} required by the number of sub- " + f"{n_low_bands} and superdiagonals {n_upp_bands}." + ) + + # a random banded matrix and right-hand-side-vector/-matrix are generated + np.random.seed(seed=42) + ab = np.random.rand(n_low_bands + n_upp_bands + 1, n_rows) + b = np.random.rand(n_rows) if n_rhs == 0 else np.random.rand(n_rows, n_rhs) + + # first, the Scipy solution is computed because if this fails due to singularity, + # the test has to not test for equivalent results, but for failure + # NOTE: failure is indicated by the solution being ``None`` + # NOTE: this order of evaluation is also better for testing if the overwrite flag + # NOTE: is working correctly because otherwise SciPy would get the overwritten b + l_and_u = (n_low_bands, n_upp_bands) + x_ref = None + try: + x_ref = solve_banded( + l_and_u=l_and_u, + ab=ab, + b=b, + check_finite=True, + ) + + # NOTE: even if SciPy computes the solution "successfully", there might be NaNs + # NOTE: in the result, so the test has to check for that as well + if np.any(np.isnan(x_ref)): + x_ref = None + + except np.linalg.LinAlgError: + pass + + # the banded matrix is LU decomposed with the respective Chemotools function + lu_fact = lu_banded( + l_and_u=l_and_u, + ab=ab, + check_finite=with_finite_check, + ) + + # the linear system is solved with the respective Chemotools function + # Case 1: Scipy failed + if x_ref is None: + # in this case, the Chemotools function has to raise an exception as well + with pytest.raises(np.linalg.LinAlgError): + x = lu_solve_banded( + lub_factorization=lu_fact, + b=b, + overwrite_b=overwrite_b, + check_finite=with_finite_check, + ) + return + + # Case 2: Scipy succeeded + # in this case, the Chemotools function has to return the same result as Scipy + x = lu_solve_banded( + lub_factorization=lu_fact, + b=b, + overwrite_b=overwrite_b, + check_finite=with_finite_check, + ) + + # NOTE: the following check has to be fairly strict when it comes to equivalence + # NOTE: since the SciPy and Chemotools are basically doing the same under the hood + # NOTE: when it comes to the solution process (first LU, then triangular solve) + assert np.allclose(x, x_ref, atol=1e-10, rtol=1e-10) + + @pytest.mark.parametrize("difference", list(range(0, 21))) def test_limit_eigval_squ_fin_diff_mat(difference: int) -> None: """Tests the computation of the limit of the maximum eigenvalue of the squared From d9bb39ab517f818fec28df129a9b9f9c1f71ada1 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 20 Apr 2024 13:33:46 +0200 Subject: [PATCH 019/118] feat: parallelized `pytest` --- requirements-dev.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index febecc49..3b69fc17 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,4 +2,5 @@ black isort matplotlib pentapy -pytest \ No newline at end of file +pytest +pytest-xdist \ No newline at end of file From 556b92ff59ca2ad06809154f9c50a3ec0c14f6a2 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 20 Apr 2024 17:35:05 +0200 Subject: [PATCH 020/118] feat: added wrappers for a dataclass-based banded Cholesky decomposition and solve; added preliminary tests --- chemotools/utils/banded_linalg.py | 181 +++++++++++++++++++++++++++--- chemotools/utils/models.py | 73 ++++++++++++ 2 files changed, 241 insertions(+), 13 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 2e4cfb9f..95feb0f1 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -2,11 +2,13 @@ import numpy as np from numpy.typing import ArrayLike +from scipy.linalg import cho_solve_banded as scipy_cho_solve_banded +from scipy.linalg import cholesky_banded as scipy_cholesky_banded from scipy.linalg import lapack from scipy.sparse import spmatrix from sklearn.utils import check_array, check_scalar -from chemotools.utils.models import BandedLUFactorization +from chemotools.utils.models import BandedCholeskyFactorization, BandedLUFactorization def _datacopied(arr, original): @@ -349,9 +351,9 @@ def lu_solve_banded( # then, the shapes of the LU-decomposition and ``b`` need to be validated against # each other - if lub_factorization.shape[-1] != b_inter.shape[0]: + if lub_factorization.n_cols != b_inter.shape[0]: raise ValueError( - f"\nShapes of lub ({lub_factorization.shape[-1]}) and b " + f"\nShapes of lub ({lub_factorization.n_cols}) and b " f"({b_inter.shape[0]}) are not compatible." ) @@ -386,7 +388,143 @@ def lu_solve_banded( ) -def slodget_cho_banded(decomposition: tuple[np.ndarray, bool]) -> tuple[float, float]: +def slogdet_lu_banded( + lub_factorization: BandedLUFactorization, +) -> tuple[float, float]: + """ + Computes the logarithm of the absolute value and the sign of the determinant of a + banded matrix A using its LU-decomposition. This is way more efficient than + computing the determinant directly because the LU-decompositions main diagonals + already encode the determinant as the product of the diagonal entries of the + factors. + + Parameters + ---------- + lub_factorization : BandedLUFactorization + The LU-decomposition of the matrix ``A`` in banded storage format as returned by + the function :func:`lu_banded`. + + Returns + ------- + sign : float + A number representing the sign of the determinant. + logabsdet : float + The natural log of the absolute value of the determinant. + If the determinant is zero, then `sign` will be 0 and `logabsdet` will be + -Inf. In all cases, the determinant is equal to ``sign * np.exp(logabsdet)``. + + Raises + ------ + OverflowError + If any of the diagonal entries of the LU-decomposition leads to an overflow in + the natural logarithm. + + """ + + # first, the number of actual row exchanges needs to be counted + unchanged_row_idxs = np.arange( + start=0, + stop=lub_factorization.n_rows, + step=1, + dtype=lub_factorization.ipiv.dtype, + ) + num_row_exchanges = np.count_nonzero(lub_factorization.ipiv - unchanged_row_idxs) + + # the sign-prefactor of the determinant is either +1 or -1 depending on whether the + # number of row exchanges is even or odd + sign = -1.0 if num_row_exchanges % 2 == 1 else 1.0 + + # since the determinant (without sign prefactor) is just the product of the diagonal + # product of L and the diagonal product of U, the calculation simplifies. As the + # main diagonal of L is a vector of ones, only the diagonal product of U is required + main_diag = lub_factorization.lub[lub_factorization.main_diag_row_idx, ::] + u_diaprod_sign = np.prod(np.sign(main_diag)) + with np.errstate(divide="ignore", over="ignore"): + logabsdet = np.sum(np.log(np.abs(main_diag))) + + # logarithms of zero are already properly handled, so there is not reason to worry + # about, since they are -inf which will result in a zero determinant in exp(); + # overflow however needs to lead to a raise and in this case the log(det) is either + # +inf in case of overflow only or NaN in case of the simultaneous occurrence of + # zero and overflow + if np.isnan(logabsdet) or np.isposinf(logabsdet): + raise OverflowError( + "\nFloating point overflow in natural logarithm. At least 1 main diagonal " + "entry results in overflow, thereby corrupting the determinant." + ) + + # finally, the absolute value of the natural logarithm of the determinant is + # returned together with its sign + if np.isneginf(logabsdet): + return 0.0, logabsdet + elif float(u_diaprod_sign) > 0.0: + return sign, logabsdet + + return -sign, logabsdet + + +def cholesky_banded( + ab: np.ndarray, + overwrite_ab: bool = False, + lower: bool = False, + check_finite: bool = True, +) -> BandedCholeskyFactorization: + """ + A drop-in replacement for SciPy's ``cholesky_banded`` that stores the factorization + in a dataclass. + + Please refer to the SciPy documentation for further information that is not + mentioned here. + + Returns + ------- + chob_factorization : BandedCholeskyFactorization + A dataclass containing the Cholesky-factorization of the matrix ``A`` as + follows: + ``lb``: The Cholesky-decomposition of ``A`` in banded storage format. + ``lower``: A boolean indicating whether the Cholesky-decomposition is in + lower triangular form (``True``) or in upper triangular form + (``False``). + """ + + return BandedCholeskyFactorization( + lb=scipy_cholesky_banded(**locals()), + lower=lower, + ) + + +def cho_solve_banded( + chob_factorization: BandedCholeskyFactorization, + b: np.ndarray, + overwrite_b: bool = False, + check_finite: bool = True, +) -> np.ndarray: + """ + A drop-in replacement for SciPy's ``cho_solve_banded`` that relies on the + factorization being stored in a dataclass. + + Please refer to the SciPy documentation for further information that is not + mentioned here. + + Parameters + ---------- + chob_factorization : BandedCholeskyFactorization + The Cholesky-factorization of the matrix ``A`` in banded storage format as + returned by the function :func:`cholesky_banded`. + + """ + + return scipy_cho_solve_banded( + cb_and_lower=(chob_factorization.lb, chob_factorization.lower), + b=b, + overwrite_b=overwrite_b, + check_finite=check_finite, + ) + + +def slodget_cho_banded( + chob_factorization: BandedCholeskyFactorization, +) -> tuple[float, float]: """Computes the logarithm of the absolute value of the determinant of a banded hermitian matrix `A` using its Cholesky-decomposition. This is way more efficient than computing the determinant directly because the Cholesky factors' main @@ -394,12 +532,9 @@ def slodget_cho_banded(decomposition: tuple[np.ndarray, bool]) -> tuple[float, f Parameters ---------- - (cb, lower) : tuple, (np.ndarray, bool) - `cb` is a NumPy-2D-Array resembling the Cholesky-decomposition of `A` in banded - storage format as returned by ``cholesky_banded``. - `lower` is a boolean indicating whether the Cholesky-decomposition the lower - triangular form (``True``) or the upper triangular form was of `A` was used - (``False``). + chob_factorization : BandedCholeskyFactorization + The Cholesky-factorization of the matrix `A` in banded storage format as + returned by the function :func:`cholesky_banded`. Returns ------- @@ -410,9 +545,29 @@ def slodget_cho_banded(decomposition: tuple[np.ndarray, bool]) -> tuple[float, f The natural log of the absolute value of the determinant. It cannot be zero since the matrix under consideration is positive definite. + Raises + ------ + OverflowError + If any of the diagonal entries of the Cholesky-decomposition leads to an + overflow in the natural logarithm. + """ - lower = decomposition[1] - main_diag_idx = 0 if lower else -1 + # the sign-prefactor of the determinant is always +1 since the matrix is positive + # definite, so only the diagonal product of the Cholesky-decomposition is required + main_diag = chob_factorization.lb[chob_factorization.main_diag_row_idx, ::] + with np.errstate(divide="ignore", over="ignore"): + logabsdet = 2.0 * np.sum(np.log(main_diag)) + + # logarithms of zero are already properly handled, so there is not reason to worry + # about, since they are -inf which will result in a zero determinant in exp(); + # overflow however needs to lead to a raise and in this case the log(det) is either + # +inf in case of overflow only or NaN in case of the simultaneous occurrence of + # zero and overflow + if np.isnan(logabsdet) or np.isposinf(logabsdet): + raise OverflowError( + "\nFloating point overflow in natural logarithm. At least 1 main diagonal " + "entry results in overflow, thereby corrupting the determinant." + ) - return 1.0, 2.0 * np.sum(np.log(decomposition[0][main_diag_idx, ::])) + return 1.0, logabsdet diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 4ec75f04..ea215d76 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -18,18 +18,91 @@ class BandedSolveDecompositions(str, Enum): + """ + Defines the types of decompositions that can be used to solve linear systems + involving banded matrices, i.e., + + - ``CHOLESKY``: Cholesky decomposition + - ``PIVOTED_LU``: LU decomposition with partial pivoting + - ``PENTAPY``: Pentadiagonal "decomposition" (it's actually a direct solve) + + """ + CHOLESKY = "cholesky" + PIVOTED_LU = "lu" PENTAPY = "pentapy" +@dataclass() +class BandedCholeskyFactorization: + """ + A dataclass that holds the Cholesky factorization of a symmetric positive-definite + matrix. + + Attributes + ---------- + lb: ndarray of shape (n_low_bands + 1, n_cols) or (1 + n_upp_bands, n_cols) + The lower or upper Cholesky factor of the matrix ``A`` in banded storage format. + lower : bool + If ``True``, the lower Cholesky factor is stored, otherwise the upper one. + shape : (int, int) + The shape of the matrix ``A`` in dense form. + n_rows, n_cols : int + The number of rows and columns of the matrix ``A`` in dense form. + main_diag_row_idx : int + The index of the main diagonal in the banded storage format. + + """ + + lb: np.ndarray + lower: bool + + shape: tuple[int, int] = field(default=(-1, -1), init=False) + n_rows: int = field(default=-1, init=False) + n_cols: int = field(default=-1, init=False) + main_diag_row_idx: int = field(default=-1, init=False) + + def __post_init__(self): + self.shape = self.lb.shape # type: ignore + self.n_rows, self.n_cols = self.shape + self.main_diag_row_idx = 0 if self.lower else self.n_rows - 1 + + @dataclass() class BandedLUFactorization: + """ + A dataclass that holds the partially pivoted LU factorization of a banded matrix. + + Attributes + ---------- + lub: ndarray of shape (n_rows, n_cols) + The LU factorization of the matrix ``A`` in banded storage format. + ipiv: ndarray of shape (n_rows,) + The pivot indices. + l_and_u: tuple[int, int] + The number of lower and upper bands in the LU factorization. + singular: bool + If ``True``, the matrix ``A`` is singular. + shape : (int, int) + The shape of the matrix ``A`` in dense form. + n_rows, n_cols : int + The number of rows and columns of the matrix ``A`` in dense form. + main_diag_row_idx : int + The index of the main diagonal in the banded storage format. + + """ + lub: np.ndarray ipiv: np.ndarray l_and_u: tuple[int, int] singular: bool shape: tuple[int, int] = field(default=(-1, -1), init=False) + n_rows: int = field(default=-1, init=False) + n_cols: int = field(default=-1, init=False) + main_diag_row_idx: int = field(default=-1, init=False) def __post_init__(self): self.shape = self.lub.shape # type: ignore + self.n_rows, self.n_cols = self.shape + self.main_diag_row_idx = self.l_and_u[1] From 064afcfa69b9a2cd3ed86a2d2cc4e5e70a866a7a Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 21 Apr 2024 13:58:41 +0200 Subject: [PATCH 021/118] refactor: implemented fast computation of both squared forward finite differences matrix version that does not rely on `scipy.sparse` anymore to avoid overhead and redundant computations --- chemotools/utils/finite_differences.py | 320 +++++++++++++------------ 1 file changed, 164 insertions(+), 156 deletions(-) diff --git a/chemotools/utils/finite_differences.py b/chemotools/utils/finite_differences.py index 6cd6240b..f60de614 100644 --- a/chemotools/utils/finite_differences.py +++ b/chemotools/utils/finite_differences.py @@ -2,9 +2,6 @@ from numbers import Integral import numpy as np -from scipy.sparse import csr_matrix, dia_matrix -from scipy.sparse import diags as spdiags -from scipy.sparse import eye as speye from sklearn.utils import check_scalar @@ -12,7 +9,8 @@ def calc_forward_diff_kernel( *, differences: int, ) -> np.ndarray: - """Computes the kernel for forward finite differences which can be applied to a + """ + Computes the kernel for forward finite differences which can be applied to a series by means of a convolution, e.g., ```python @@ -27,18 +25,18 @@ def calc_forward_diff_kernel( The order of the differences starting from 0 for the original curve, 1 for the first order, 2 for the second order, ..., and ``m`` for the ``m``-th order differences. - Values below 0 are not allowed. + Values below 1 are not allowed. Returns ------- - fin_diff_kernel : np.ndarray of shape (differences + 1,) + fin_diff_kernel : ndarray of shape (differences + 1,) A NumPy-1D-vector resembling the kernel from the code example above. To avoid loss of precision, the data type is ``np.int64``. Raises ------ ValueError - If the difference order is below 0. + If the difference order is below 1. """ # the input is validated @@ -46,7 +44,7 @@ def calc_forward_diff_kernel( differences, name="differences", target_type=Integral, - min_val=0, + min_val=1, include_boundaries="left", ) @@ -60,192 +58,202 @@ def calc_forward_diff_kernel( ) -def forward_finite_diff_conv_matrix( +def _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( *, + n_data: int, differences: int, - series_size: int, -) -> dia_matrix: - """Computes the convolution matrix for forward finite differences which can be - applied to a series by means of a matrix multiplication, e.g., - - ```python - conv_mat = finite_diff_conv_matrix(differences=2, series_size=10) - differences = conv_mat @ series # boundaries require special care - ``` - - this is equivalent to - - ```python - kernel = calc_forward_fin_diff_kernel(differences=2) - differences = np.convolve(series, np.flip(kernel), mode="valid") - # NOTE: NumPy flips the kernel internally due to the definition of convolution - ``` - - Parameters - ---------- - differences : int - The order of the differences starting from 0 for the original curve, 1 for the - first order, 2 for the second order, ..., and ``m`` for the ``m``-th order - differences. - Values below 0 are not allowed. - series_size : int - The number of data points in the series to which the convolution matrix is - applied. +) -> np.ndarray: + """ + Generates the squared forward finite differences matrix ``D.T @ D`` from the + forward finite difference matrix ``D`` of order ``differences``. It will be cast to + to the upper banded storage format used for LAPACK's banded Cholesky decomposition. - Returns - ------- - diff_mat : dia_matrix of shape (series_size - differences, series_size) - A sparse matrix resembling the convolution matrix from the code example above. - To avoid loss of precision, the data type is ``np.int64``. - - Raises - ------ - ValueError - If the difference order is below 0, or ``series_size`` is not sufficient to - support the respective difference order. + All unused elements in the banded matrix are set to zero. """ - # the input is validated - kernel_size = differences + 1 - try: - check_scalar( - series_size, - name="n_features", # for compatibility with sklearn - target_type=Integral, - min_val=kernel_size, - include_boundaries="left", - ) - except ValueError: - raise ValueError(f"Got n_features = {series_size}, must be >= {kernel_size}.") - - # afterwards, the kernel is computed ... + # the problems has to be split into a leading, a central, and a trailing part + # first, the leading part is computed because it might be that this is already + # enough + # first, the kernel for the forward differences is computed and the bandwidth is + # determined kernel = calc_forward_diff_kernel(differences=differences) - # ... and the convolution matrix is created - return spdiags( - diagonals=kernel, - offsets=np.arange(start=0, stop=kernel_size, step=1), # type: ignore - shape=(series_size - kernel_size + 1, series_size), - format="dia", - dtype=np.int64, + n_bands = 1 + 2 * differences + lead_n_rows = min(kernel.size, n_data - kernel.size + 1) + lead_n_cols = kernel.size + lead_n_rows - 1 + + # the leading matrix is computed as a dense matrix + dtd = np.zeros(shape=(lead_n_rows, lead_n_cols), dtype=np.int64) + for row_idx in range(0, lead_n_rows): + dtd[row_idx, row_idx : row_idx + kernel.size] = kernel + + # its squared form is computed + dtd = dtd.T @ dtd + + # now, the leading matrix is converted to a banded matrix + dtd_banded = np.zeros(shape=(differences + 1, lead_n_cols), dtype=np.int64) + for diag_idx in range(0, differences + 1): + offset = differences - diag_idx + dtd_banded[diag_idx, offset:None] = np.diag(dtd, k=offset) + + # if the number of data points is less than the kernel size minus one, the + # leading matrix is already the final matrix + if n_data <= n_bands: + return dtd_banded + + # otherwise, a central part has to be inserted + # this turns out to be just a column-wise repetition of the kernel computed with + # double the difference order, so this matrix can simple be inserted into the + # computed leading D.T @ D matrix + # NOTE: the doubled kernel is the most central column of the banded D.T @ D already + # computed + central_n_cols = n_data - dtd_banded.shape[1] + kernel_double = dtd_banded[::, kernel.size - 1].reshape((-1, 1)) + return np.concatenate( + ( + dtd_banded[::, 0 : kernel.size], + np.tile(kernel_double, (1, central_n_cols)), + dtd_banded[::, kernel.size :], + ), + axis=1, ) -def calc_limit_max_eigval_fin_diff_mat(differences: int) -> int: - """Computes the maximum eigenvalue of the forward finite difference matrix as - computed by ``forward_finite_diff_conv_matrix`` for the given difference order. It - only uses the limit value as the series size tends to infinity, but from some - tests, this seems to be an upper limit for the maximum singular value for any - series size which makes it ideal for thresholding. - - Parameters - ---------- - differences : int - The order of the differences starting from 0 for the original curve, 1 for the - first order, 2 for the second order, ..., and ``m`` for the ``m``-th order - differences. - Values below 0 are not allowed. +def _gen_squ_fw_fin_diff_mat_cho_banded_orig_first( + *, + n_data: int, + differences: int, +) -> np.ndarray: + """ + Generates the squared forward finite differences matrix ``D @ D.T`` from the + forward finite difference matrix ``D`` of order ``differences``. It will be cast to + to the upper banded storage format used for LAPACK's banded Cholesky decomposition. - Returns - ------- - max_eigval : int - The maximum eigenvalue of the forward finite difference matrix. + All unused elements in the banded matrix are set to zero. """ - # NOTE: this was found rather empirically, but it works - return 4**differences + # this case is simpler than the transposed case because the matrix is just a + # Toeplitz matrix with the kernel of double the difference order on the diagonal + kernel_double = calc_forward_diff_kernel(differences=2 * differences)[ + differences:None + ] + # for an odd difference order, the sign of the kernel has to be flipped + if differences % 2 == 1: + kernel_double = np.negative(kernel_double) + + n_rows = n_data - kernel_double.size + 1 + n_upp_plus_central_bands = min(n_rows, 1 + differences) + + # the matrix is computed as a dense and simple filled by means of a loop + ddt_banded = np.zeros(shape=(n_upp_plus_central_bands, n_rows), dtype=np.int64) + main_diag_idx = min(differences, n_upp_plus_central_bands - 1) + for offset in range(0, n_upp_plus_central_bands): + ddt_banded[main_diag_idx - offset, offset:None] = kernel_double[offset] + return ddt_banded -def posdef_mod_squared_fw_fin_diff_conv_matrix( + +def gen_squ_fw_fin_diff_mat_cho_banded( *, - fw_fin_diff_mat: dia_matrix, + n_data: int, differences: int, - dia_mod_matrix: dia_matrix | None, - max_eigval_mult: float, - dtype: type, -) -> csr_matrix: - """Computes the modified squared forward finite difference matrix ``P`` for the - given difference order and series size. It is computed as - - ```python - # the pre-computation is obtained which might still be positive semi-definite - P = D.T @ M @ D - # the maximum eigenvalue of P is estimated to make it positive definite - max_lam_p = max_lam_dtd * max_lam_m - # by lifting the main diagonal, P is made numerical positive definite - P += max_lam_p * max_eigval_mult * I - ``` + orig_first: bool, +) -> np.ndarray: + """ + Generates the squared forward finite differences matrix ``D @ D.T`` or ``D.T @ D`` + from the forward finite difference matrix ``D`` of order ``differences``. It will be + cast to to the upper banded storage format used for LAPACK's banded Cholesky + decomposition. - where ``D`` is the convolution matrix for forward finite differences, ``M`` is the - diagonal matrix of the modified weights, ``max_lam_dtd`` is the maximum eigenvalue - ``D.T @ D``, ``max_lam_m`` is the maximum eigenvalue of ``M``, i.e., the maximum - weight (since diagonal matrix), and ``max_lam_p`` is the maximum eigenvalue of - ``P``. For details on this approximation, please see Notes. + All unused elements in the banded matrix are set to zero. Parameters ---------- - fw_fin_diff_mat : dia_matrix - The convolution matrix for forward finite differences resembling ``D`` from the - description above. It can be computed by ``forward_finite_diff_conv_matrix``. + n_data : int + The number of data points in the series to which the forward finite differences + are applied. differences : int The order of the differences starting from 0 for the original curve, 1 for the first order, 2 for the second order, ..., and ``m`` for the ``m``-th order differences. - Values below 0 are not allowed. - dia_mod_matrix : dia_matrix or None - The sparse diagonal matrix of the modification weights resembling ``M`` from the - description above. If ``None``, this multiplication is skipped. - max_eigval_mult : float - The multiple of the maximum eigenvalue of the modified squared forward finite - differences matrix that is added to the main diagonal of the output matrix to - make it positive definite according to the description above. - dtype : type - The data type of the output matrix. + Values below 1 are not allowed. + orig_first : bool + If ``True``, the squared forward finite differences matrix ``D @ D.T`` is + computed. Otherwise, the squared forward finite differences matrix ``D.T @ D`` + is computed. Returns ------- - posdef_squ_diff_mat : csr_matrix of shape (series_size - differences, series_size) - A positive definite sparse matrix resembling the squared forward finite - difference matrix ``P`` from the description above. It will be of data type - ``dtype``. + squ_fw_fin_diff_mat_cho_banded : ndarray of shape (n_bands, n_data - differences + 1) or (n_bands, n_data) + The squared forward finite differences matrix in the upper banded storage format + used for LAPACK's banded Cholesky decomposition (see Notes for details). + When ``orig_first`` is ``True``, the matrix has at maximum ``differences + 1`` + bands (rows) and ``n_data - differences + 1`` columns. + Otherwise, the matrix has at maximum ``differences + 1`` bands (rows) and + ``n_data`` columns. Raises ------ ValueError - If the difference order is below 0, or the number of grid points is not - sufficient to support the respective difference order. + If ``n_data`` is below ``differences + 1``, i.e., the kernel does not fit into + the data at least once. + ValueError + If the difference order is below 1. Notes ----- - The approximation of ``max_lam_p`` is based on the spectral norm of the - matrix product. Since the spectral norm ``||P||2`` is submultiplicative, the - estimate ``||D.T||2 * ||M||2 * ||D||2`` is an upper bound for ``||P||2``. - As ``||D||2 = ||D.T||2 = sqrt(max_lam_dtd)`` and - ``||M||2 = sqrt(max_lam_m**2) = max_lam_m = M.max()``, the maximum eigenvalue of - ``P`` is estimated as ``max_lam_p = max_lam_dtd * max_lam_m``. For just ensuring - numerical stability, this is perfectly fine and it also won't overestimate the - maximum eigenvalue of ``P`` too much and therefore, the perturbation of the main - diagonal is kept small. + The squared forward finite differences matrix is stored in the upper banded storage + format used for LAPACK's banded Cholesky decomposition. + This upper diagonal ordered form is given by the following ordering - """ + ```python + ab[u + i - j, j] == a[i,j] + ``` - # first, the maximum eigenvalue of the finite difference matrix is computed - squ_diff_mat_eigval_max = calc_limit_max_eigval_fin_diff_mat( - differences=differences - ) + e.g., for a symmetric matrix of shape (6, 6) with in total 3 superdiagonals, + 3 subdiagonals, and the main diagonal, the ordering is as follows: + + ```python + * * * a03 a14 a25 + * * a02 a13 a24 a35 + * a01 a12 a23 a34 a45 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 # main diagonal + ``` - # afterwards, the squared convolution matrix is computed - if dia_mod_matrix is None: - squ_diff_mat = fw_fin_diff_mat.T @ fw_fin_diff_mat - m_eigval_max = 1.0 - else: - squ_diff_mat = fw_fin_diff_mat.T @ dia_mod_matrix @ fw_fin_diff_mat - m_eigval_max = dia_mod_matrix.data.max() + where each `*` denotes a zero element. + + Written out, this would give the following matrix: + + ```python + a00 a01 a02 a03 0 0 + a01 a11 a12 a13 a14 0 + a02 a12 a22 a23 a24 a25 + a03 a13 a23 a33 a34 a35 + 0 a14 a24 a34 a44 a45 + 0 0 a25 a35 a45 a55 + ``` - # the main diagonal is lifted by a multiple of the machine epsilon - lift_mat = speye(m=fw_fin_diff_mat.shape[1], dtype=dtype, format="csr") - lift_mat *= max_eigval_mult * squ_diff_mat_eigval_max * m_eigval_max + """ # noqa: E501 - # the positive definite matrix is returned - return squ_diff_mat + lift_mat + # first, it needs to be ensured that the number of data points is enough to + # support the kernel for the respective difference order at least once + check_scalar( + n_data, + name="n_data", + target_type=Integral, + min_val=differences + 1, + include_boundaries="left", + ) + + # afterwards, the squared forward finite differences matrix is computed + if orig_first: + return _gen_squ_fw_fin_diff_mat_cho_banded_orig_first( + n_data=n_data, + differences=differences, + ) + + return _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( + n_data=n_data, + differences=differences, + ) From 097d4f0ce12e7e8db0f17e00c0b934b943066e45 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 22 Apr 2024 08:28:23 +0200 Subject: [PATCH 022/118] refactor: renamed decomposition model to solver model; added PentaPy model --- chemotools/utils/models.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index ea215d76..1855e0f2 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -17,10 +17,10 @@ # banded matrices -class BandedSolveDecompositions(str, Enum): +class BandedSolvers(str, Enum): """ - Defines the types of decompositions that can be used to solve linear systems - involving banded matrices, i.e., + Defines the types of solvers that can be used to solve linear systems involving + banded matrices, i.e., - ``CHOLESKY``: Cholesky decomposition - ``PIVOTED_LU``: LU decomposition with partial pivoting @@ -28,9 +28,20 @@ class BandedSolveDecompositions(str, Enum): """ - CHOLESKY = "cholesky" - PIVOTED_LU = "lu" - PENTAPY = "pentapy" + CHOLESKY = "Cholesky decomposition" + PIVOTED_LU = "pivoted LU decomposition" + PENTAPY = "direct pentadiagonal solver" + + +class BandedPentapyFactorization: + """ + A class that resembles the factorization of a pentadiagonal matrix with ``pentapy``. + It has no attributes since the factorization is not stored, but the class is used to + provide an easy way to check if the factorization is available. + + """ + + pass @dataclass() From bc2198b05500efde8442d615a6be7faf8cee91c6 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 22 Apr 2024 08:33:22 +0200 Subject: [PATCH 023/118] feat: added new banded storage conversion function; added submodule header; added sections; renamed "LU-" to "LU " --- chemotools/utils/banded_linalg.py | 160 ++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 20 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 95feb0f1..d0c0e70d 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -1,3 +1,22 @@ +""" +This utility submodule provides functions for the linear algebra with banded matrices, +namely + +- conversion from the upper banded storage for LAPACK's banded Cholesky decomposition + to the banded storage for LAPACK's banded LU decomposition, +- wrappers for SciPy's LAPACK-routines for the banded Cholesky decomposition and the + corresponding linear solver, +- LU decomposition of a banded matrix and the corresponding linear solver, +- computation of the log-determinant of a banded matrix using its Cholesky or LU + decomposition + +The decomposition functions return dataclasses that facilitate the handling of the +factorizations. + +""" + +### Imports ### + from numbers import Integral import numpy as np @@ -10,6 +29,13 @@ from chemotools.utils.models import BandedCholeskyFactorization, BandedLUFactorization +### Type Aliases ### + +LAndUBandCounts = tuple[int, int] + + +### Auxiliary Functions ### + def _datacopied(arr, original): """ @@ -34,7 +60,7 @@ def _check_full_arr_n_diag_counts_for_lu_banded( l_and_u: tuple[int, int], ) -> None: """Validates the shape of a full array and the number of sub- and superdiagonals - for LU-decomposition of a banded (sparse) matrix. + for LU decomposition of a banded (sparse) matrix. """ num_rows, num_cols = a_shape num_low_diags, num_upp_diags = l_and_u @@ -75,12 +101,99 @@ def _check_full_arr_n_diag_counts_for_lu_banded( # else nothing +def conv_upper_chol_banded_to_lu_banded_storage( + ab: np.ndarray, +) -> tuple[LAndUBandCounts, np.ndarray]: + """ + Converts the upper banded storage format used by LAPACK's banded Cholesky + decomposition to the banded storage format used by LAPACK's banded LU + decomposition. + + Parameters + ---------- + ab : np.ndarray of shape (n_upp_bands + 1, n_cols) + The matrix ``A`` stored in the upper banded storage format used by LAPACK's + banded Cholesky decomposition (see Notes for details). + + Returns + ------- + l_and_u : (int, int) + The number of sub- (first) and superdiagonals (second element) aside the main + diagonal which does not need to be considered here. + ab : np.ndarray of shape (l_and_u[0] + 1 + l_and_u[1], n_cols) + The matrix ``A`` stored in the banded storage format used by LAPACK's banded LU + decomposition (see Notes for details). + + Notes + ----- + The upper diagonal ordered form for LAPACK's Cholesky decomposition is given by the + following ordering + + ```python + ab[u + i - j, j] == a[i,j] + ``` + + e.g., for a symmetric matrix ``A`` of shape (7, 7) with in total 3 superdiagonals, + 3 subdiagonals, and the main diagonal, the ordering is as follows: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + ``` + + where each `*` denotes a zero element. + + For LAPACK's LU decomposition, the matrix `A` is stored in `ab` using the matrix + diagonal ordered form: + + ```python + ab[u + i - j, j] == a[i,j] + ``` + + The example from above would then look like this where basicall, all the + superdiagonal rows are just copied to the subdiagonal rows and moved to the left so + that the first non-zero element of each row is in the first column: + + ```python + * * * a03 a14 a25 a36 + * * a02 a13 a24 a35 a46 + * a01 a12 a23 a34 a45 a56 # ^ superdiagonals + a00 a11 a22 a33 a44 a55 a66 # main diagonal + a01 a12 a23 a34 a45 a56 * # v subdiagonals + a02 a13 a24 a35 a46 * * + a03 a14 a25 a36 * * * + ``` + + where all entries marked with `*` are as well zero elements although they will be + set to arbitrary values by this function. + + """ + + # an Array is initialised to store the subdiagonal part + num_low_diags = ab.shape[0] - 1 + main_diag_idx = num_low_diags + n_cols = ab.shape[1] + ab_subdiags = np.zeros(shape=(num_low_diags, n_cols), dtype=ab.dtype) + + for offset in range(1, num_low_diags + 1): + ab_subdiags[offset - 1, 0 : n_cols - offset] = ab[ + main_diag_idx - offset, offset:None + ] + + # the subdiagonal part is then concatenated to the original array and the result is + # returned + l_and_u = (num_low_diags, num_low_diags) + return l_and_u, np.row_stack((ab, ab_subdiags)) + + def conv_to_lu_banded_storage( a: np.ndarray | spmatrix, l_and_u: tuple[int, int], ) -> np.ndarray: """Converts a (sparse) square banded matrix A to its banded storage required for - LU-decomposition in LAPACK-routines like the function ``lu_banded`` or SciPy's + LU decomposition in LAPACK-routines like the function ``lu_banded`` or SciPy's ``solve_banded``. This format is identical for pentapy where it is referred to as "column-wise flattened". Cholesky-decompositions require a different format. @@ -116,7 +229,7 @@ def conv_to_lu_banded_storage( Notes ----- - For LAPACK LU-decomposition, the matrix `a` is stored in `ab` using the matrix + For LAPACK's LU decomposition, the matrix `a` is stored in `ab` using the matrix diagonal ordered form: ```python @@ -172,15 +285,18 @@ def conv_to_lu_banded_storage( return ab +### LAPACK-Wrappers for banded LU decomposition ### + + def lu_banded( - l_and_u: tuple[int, int], + l_and_u: LAndUBandCounts, ab: ArrayLike, *, check_finite: bool = True, ) -> BandedLUFactorization: """ - Computes the LU-decomposition of a banded matrix ``A`` using LAPACK-routines. - This function is a wrapper of the LAPACK-routine ``gbtrf`` which computes the LU- + Computes the LU decomposition of a banded matrix ``A`` using LAPACK-routines. + This function is a wrapper of the LAPACK-routine ``gbtrf`` which computes the LU decomposition of a banded matrix ``A`` in-place. It wraps the routine in an analogous way to SciPy's ``scipy.linalg.cholesky_banded``. @@ -206,8 +322,8 @@ def lu_banded( Returns ------- lub_factorization : BandedLUFactorization - A dataclass containing the LU-factorization of the matrix ``A`` as follows: - ``lub``: The LU-decomposition of ``A`` in banded storage format (see Notes). + A dataclass containing the LU factorization of the matrix ``A`` as follows: + ``lub``: The LU decomposition of ``A`` in banded storage format (see Notes). ``ipiv``: The pivoting indices. ``l_and_u``: The number of sub- and superdiagonals of the matrix ``A`` that are non-zero. @@ -215,8 +331,8 @@ def lu_banded( Notes ----- - For LAPACK LU-decomposition, the matrix ``a`` is stored in ``ab`` using the matrix - diagonal ordered form: + For LAPACK's banded LU decomposition, the matrix ``a`` is stored in ``ab`` using the + matrix diagonal ordered form: ```python ab[u + i - j, j] == a[i,j] # see below for u @@ -234,12 +350,13 @@ def lu_banded( a20 a31 a42 a53 a64 * * ``` - where all entries marked with ``*`` are arbitrary values when returned by this - function. + where all entries marked with `*` are zero elements although they will be set to + arbitrary values by this function. + Internally LAPACK relies on an expanded version of this format to perform inplace operations that adds another ``l`` superdiagonals to the matrix in order to overwrite them for the purpose of pivoting. The output is thus an expanded version - of the LU-decomposition of ``A`` in the same format where the main diagonal of + of the LU decomposition of ``A`` in the same format where the main diagonal of ``L`` is implicitly taken to be a vector of ones. The output can directly be used for the LAPACK-routine ``gbtrs`` to solve linear systems of equations based on this decomposition. @@ -302,14 +419,14 @@ def lu_solve_banded( ) -> np.ndarray: """ Solves a linear system of equations ``Ax=b`` with a banded matrix ``A`` using its - precomputed LU-decomposition. + precomputed LU decomposition. This function wraps the LAPACK-routine ``gbtrs`` in an analogous way to SciPy's ``scipy.linalg.cho_solve_banded``. Parameters ---------- lub_factorization : BandedLUFactorization - The LU-decomposition of the matrix ``A`` in banded storage format as returned by + The LU decomposition of the matrix ``A`` in banded storage format as returned by the function :func:`lu_banded`. b : ndarray of shape (n,) A 1D-Array containing the right-hand side of the linear system of equations. @@ -349,7 +466,7 @@ def lu_solve_banded( overwrite_b = overwrite_b or _datacopied(b_inter, b) - # then, the shapes of the LU-decomposition and ``b`` need to be validated against + # then, the shapes of the LU decomposition and ``b`` need to be validated against # each other if lub_factorization.n_cols != b_inter.shape[0]: raise ValueError( @@ -393,15 +510,15 @@ def slogdet_lu_banded( ) -> tuple[float, float]: """ Computes the logarithm of the absolute value and the sign of the determinant of a - banded matrix A using its LU-decomposition. This is way more efficient than - computing the determinant directly because the LU-decompositions main diagonals + banded matrix A using its LU decomposition. This is way more efficient than + computing the determinant directly because the LU decompositions main diagonals already encode the determinant as the product of the diagonal entries of the factors. Parameters ---------- lub_factorization : BandedLUFactorization - The LU-decomposition of the matrix ``A`` in banded storage format as returned by + The LU decomposition of the matrix ``A`` in banded storage format as returned by the function :func:`lu_banded`. Returns @@ -416,7 +533,7 @@ def slogdet_lu_banded( Raises ------ OverflowError - If any of the diagonal entries of the LU-decomposition leads to an overflow in + If any of the diagonal entries of the LU decomposition leads to an overflow in the natural logarithm. """ @@ -463,6 +580,9 @@ def slogdet_lu_banded( return -sign, logabsdet +### SciPy-Wrappers for banded Cholesky-decomposition ### + + def cholesky_banded( ab: np.ndarray, overwrite_ab: bool = False, From 67815f9d902e35cbcd457999f9dd11c3baf55147 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 22 Apr 2024 18:00:59 +0200 Subject: [PATCH 024/118] refactor: removed Cholesky support --- chemotools/utils/banded_linalg.py | 124 +----------------------------- chemotools/utils/models.py | 37 --------- 2 files changed, 3 insertions(+), 158 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index d0c0e70d..4aeb7377 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -4,11 +4,8 @@ - conversion from the upper banded storage for LAPACK's banded Cholesky decomposition to the banded storage for LAPACK's banded LU decomposition, -- wrappers for SciPy's LAPACK-routines for the banded Cholesky decomposition and the - corresponding linear solver, - LU decomposition of a banded matrix and the corresponding linear solver, -- computation of the log-determinant of a banded matrix using its Cholesky or LU - decomposition +- computation of the log-determinant of a banded matrix using its LU decomposition The decomposition functions return dataclasses that facilitate the handling of the factorizations. @@ -21,13 +18,11 @@ import numpy as np from numpy.typing import ArrayLike -from scipy.linalg import cho_solve_banded as scipy_cho_solve_banded -from scipy.linalg import cholesky_banded as scipy_cholesky_banded from scipy.linalg import lapack from scipy.sparse import spmatrix from sklearn.utils import check_array, check_scalar -from chemotools.utils.models import BandedCholeskyFactorization, BandedLUFactorization +from chemotools.utils.models import BandedLUFactorization ### Type Aliases ### @@ -152,7 +147,7 @@ def conv_upper_chol_banded_to_lu_banded_storage( ab[u + i - j, j] == a[i,j] ``` - The example from above would then look like this where basicall, all the + The example from above would then look like this where basically, all the superdiagonal rows are just copied to the subdiagonal rows and moved to the left so that the first non-zero element of each row is in the first column: @@ -578,116 +573,3 @@ def slogdet_lu_banded( return sign, logabsdet return -sign, logabsdet - - -### SciPy-Wrappers for banded Cholesky-decomposition ### - - -def cholesky_banded( - ab: np.ndarray, - overwrite_ab: bool = False, - lower: bool = False, - check_finite: bool = True, -) -> BandedCholeskyFactorization: - """ - A drop-in replacement for SciPy's ``cholesky_banded`` that stores the factorization - in a dataclass. - - Please refer to the SciPy documentation for further information that is not - mentioned here. - - Returns - ------- - chob_factorization : BandedCholeskyFactorization - A dataclass containing the Cholesky-factorization of the matrix ``A`` as - follows: - ``lb``: The Cholesky-decomposition of ``A`` in banded storage format. - ``lower``: A boolean indicating whether the Cholesky-decomposition is in - lower triangular form (``True``) or in upper triangular form - (``False``). - """ - - return BandedCholeskyFactorization( - lb=scipy_cholesky_banded(**locals()), - lower=lower, - ) - - -def cho_solve_banded( - chob_factorization: BandedCholeskyFactorization, - b: np.ndarray, - overwrite_b: bool = False, - check_finite: bool = True, -) -> np.ndarray: - """ - A drop-in replacement for SciPy's ``cho_solve_banded`` that relies on the - factorization being stored in a dataclass. - - Please refer to the SciPy documentation for further information that is not - mentioned here. - - Parameters - ---------- - chob_factorization : BandedCholeskyFactorization - The Cholesky-factorization of the matrix ``A`` in banded storage format as - returned by the function :func:`cholesky_banded`. - - """ - - return scipy_cho_solve_banded( - cb_and_lower=(chob_factorization.lb, chob_factorization.lower), - b=b, - overwrite_b=overwrite_b, - check_finite=check_finite, - ) - - -def slodget_cho_banded( - chob_factorization: BandedCholeskyFactorization, -) -> tuple[float, float]: - """Computes the logarithm of the absolute value of the determinant of a banded - hermitian matrix `A` using its Cholesky-decomposition. This is way more efficient - than computing the determinant directly because the Cholesky factors' main - diagonals already encode the determinant as the product of the diagonal entries. - - Parameters - ---------- - chob_factorization : BandedCholeskyFactorization - The Cholesky-factorization of the matrix `A` in banded storage format as - returned by the function :func:`cholesky_banded`. - - Returns - ------- - sign : float - A number representing the sign of the determinant. It is always +1 since - the matrix under consideration is positive definite. - logabsdet : float - The natural log of the absolute value of the determinant. It cannot be zero - since the matrix under consideration is positive definite. - - Raises - ------ - OverflowError - If any of the diagonal entries of the Cholesky-decomposition leads to an - overflow in the natural logarithm. - - """ - - # the sign-prefactor of the determinant is always +1 since the matrix is positive - # definite, so only the diagonal product of the Cholesky-decomposition is required - main_diag = chob_factorization.lb[chob_factorization.main_diag_row_idx, ::] - with np.errstate(divide="ignore", over="ignore"): - logabsdet = 2.0 * np.sum(np.log(main_diag)) - - # logarithms of zero are already properly handled, so there is not reason to worry - # about, since they are -inf which will result in a zero determinant in exp(); - # overflow however needs to lead to a raise and in this case the log(det) is either - # +inf in case of overflow only or NaN in case of the simultaneous occurrence of - # zero and overflow - if np.isnan(logabsdet) or np.isposinf(logabsdet): - raise OverflowError( - "\nFloating point overflow in natural logarithm. At least 1 main diagonal " - "entry results in overflow, thereby corrupting the determinant." - ) - - return 1.0, logabsdet diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 1855e0f2..1541e57f 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -22,13 +22,11 @@ class BandedSolvers(str, Enum): Defines the types of solvers that can be used to solve linear systems involving banded matrices, i.e., - - ``CHOLESKY``: Cholesky decomposition - ``PIVOTED_LU``: LU decomposition with partial pivoting - ``PENTAPY``: Pentadiagonal "decomposition" (it's actually a direct solve) """ - CHOLESKY = "Cholesky decomposition" PIVOTED_LU = "pivoted LU decomposition" PENTAPY = "direct pentadiagonal solver" @@ -44,41 +42,6 @@ class BandedPentapyFactorization: pass -@dataclass() -class BandedCholeskyFactorization: - """ - A dataclass that holds the Cholesky factorization of a symmetric positive-definite - matrix. - - Attributes - ---------- - lb: ndarray of shape (n_low_bands + 1, n_cols) or (1 + n_upp_bands, n_cols) - The lower or upper Cholesky factor of the matrix ``A`` in banded storage format. - lower : bool - If ``True``, the lower Cholesky factor is stored, otherwise the upper one. - shape : (int, int) - The shape of the matrix ``A`` in dense form. - n_rows, n_cols : int - The number of rows and columns of the matrix ``A`` in dense form. - main_diag_row_idx : int - The index of the main diagonal in the banded storage format. - - """ - - lb: np.ndarray - lower: bool - - shape: tuple[int, int] = field(default=(-1, -1), init=False) - n_rows: int = field(default=-1, init=False) - n_cols: int = field(default=-1, init=False) - main_diag_row_idx: int = field(default=-1, init=False) - - def __post_init__(self): - self.shape = self.lb.shape # type: ignore - self.n_rows, self.n_cols = self.shape - self.main_diag_row_idx = 0 if self.lower else self.n_rows - 1 - - @dataclass() class BandedLUFactorization: """ From 8cfa2f5a14f46d98f9cefb18c02ebe8aec6f8663 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 22 Apr 2024 18:57:06 +0200 Subject: [PATCH 025/118] feat: added submodule header; organized submodule; added automated smoother names enum --- chemotools/utils/models.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 1541e57f..0c480e57 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -1,3 +1,11 @@ +""" +This utility submodule implements important models, i.e., constants, Enums, and +dataclasses used throughout the package. + +""" + +### Imports ### + from dataclasses import dataclass, field from enum import Enum @@ -13,8 +21,11 @@ except ImportError: _PENTAPY_AVAILABLE = False -# an Enum class for the decomposition types used for solving linear systems that involve -# banded matrices + +### Enums ### + +# an Enum class for the solve types used for solving linear systems that involve banded +# matrices class BandedSolvers(str, Enum): @@ -23,7 +34,7 @@ class BandedSolvers(str, Enum): banded matrices, i.e., - ``PIVOTED_LU``: LU decomposition with partial pivoting - - ``PENTAPY``: Pentadiagonal "decomposition" (it's actually a direct solve) + - ``PENTAPY``: pentadiagonal "decomposition" (it's actually a direct solve) """ @@ -31,6 +42,26 @@ class BandedSolvers(str, Enum): PENTAPY = "direct pentadiagonal solver" +# an Enum class for the kinds of automated smoothing by the Whittaker-Henderson smoother +# that can be applied to the data + + +class AutoSmoothMethods(str, Enum): + """ + Defines the types of automated smoothing methods that can be applied to the data + using the Whittaker-Henderson smoother, i.e., + + - ``LOG_MARGINAL_LIKELIHOOD``: smoothing based on the maximization of the log + marginal likelihood + + """ + + LOG_MARGINAL_LIKELIHOOD = "log marginal likelihood" + + +### (Data) Classes ### + + class BandedPentapyFactorization: """ A class that resembles the factorization of a pentadiagonal matrix with ``pentapy``. From f7f663823199d4c4639055ae9ac5847e4ad58682 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 22 Apr 2024 19:17:16 +0200 Subject: [PATCH 026/118] refactor: major refactoring of Whittaker smoother base class; removed Cholesky; adapted models; used iterators; used more efficient numerical operations; incorporated fast pre-computations; got rid off sparse matrix representations via SciPy --- chemotools/utils/whittaker_base.py | 733 ++++++++++++++++++----------- 1 file changed, 453 insertions(+), 280 deletions(-) diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 2d4164c4..c4a821ff 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -1,69 +1,94 @@ +""" +This utility submodule provides the base class for the Whittaker-like smoothing +algorithm. It is used to solve linear systems of equations that involve banded +matrices as they occur in applications like the Whittaker-Henderson-smoothing or +derived methods like Asymmetric Least Squares (ALS) baseline correction. + +""" + +### Imports ### + +from typing import Generator, Optional, Union, overload + import numpy as np -from scipy.linalg import cho_solve_banded, cholesky_banded -from scipy.sparse import csr_matrix, dia_matrix -from chemotools.utils.banded_linalg import conv_to_lu_banded_storage -from chemotools.utils.finite_differences import ( - forward_finite_diff_conv_matrix, - posdef_mod_squared_fw_fin_diff_conv_matrix, +from chemotools.utils.banded_linalg import ( + LAndUBandCounts, + conv_upper_chol_banded_to_lu_banded_storage, + lu_banded, + lu_solve_banded, + slogdet_lu_banded, +) +from chemotools.utils.finite_differences import gen_squ_fw_fin_diff_mat_cho_banded +from chemotools.utils.models import ( + _PENTAPY_AVAILABLE, + AutoSmoothMethods, + BandedLUFactorization, + BandedPentapyFactorization, + BandedSolvers, ) -from chemotools.utils.models import _PENTAPY_AVAILABLE, BandedSolveDecompositions if _PENTAPY_AVAILABLE: import pentapy as pp -# else nothing -_CholeskyDecomposition = tuple[np.ndarray, bool] -_PentapyDecomposition = None -_Decomposition = _PentapyDecomposition | _CholeskyDecomposition +### Type Aliases ### + +_Decomposition = Union[BandedLUFactorization, BandedPentapyFactorization] + + +### Class Implementation ### class WhittakerLikeSolver: - """This class can be used to solve linear systems of equations that involve banded + """ + This class can be used to solve linear systems of equations that involve banded matrices as they occur in applications like the Whittaker-Henderson-smoothing or derived methods like Asymmetric Least Squares (ALS) baseline correction. It support weights and tries to use the most efficient method available. Attributes ---------- - series_size_ : int - The size of the series to smooth. It is equivalent to `n_features_in_`, but it - was renamed to be allow for definition after the initialisation. - lam_ : int or float - The lambda parameter to use for the Whittaker smooth. + n_data_ : int + The number of data points within the series to smooth. It is equivalent to + ``n_features_in_``, but it was renamed to be allow for definition after the + initialisation. + lam_ : int or float or AutoSmoothMethods + The lambda parameter to use for the smoothing, a.k.a. the penalty weight or + smoothing parameter. + If a member of :class:`AutoSmoothMethods` is provided, the lambda parameter is + fitted automatically, but then the pre-computations in meth:`_setup_for_fit` + and/or :meth:`_whittaker_solve` might take significantly longer because more + pre-computations are required and multiple penalty weights are tested. differences_ : int - The number of differences to use for the Whittaker smooth. If the aim is to - obtain a smooth estimate of the ``m``-th order derivative, this should be set to + The number of differences to use for the smoothing. If the aim is to obtain a + smooth estimate of the ``m``-th order derivative, this should be set to at least ``m + 2``. - l_and_u_ : tuple[int, int] - The number of sub- (first) and superdiagonals (second element). Both will equal - ``differences_``. - fw_fin_diff_mat_ : dia_matrix - The finite difference matrix, which serves as a precursor for the penalty matrix - in its sparse representation as DIA-matrix. - base_squ_fw_fin_diff_mat_ : csr_matrix - The squared finite difference matrix, which serves as the penalty matrix in its - sparse representation as CSR-matrix. It is already made positive definite by - adding a multiple of the identity matrix to the main diagonal, but otherwise it - is its original form. It can be used directly for baseline correction algorithms - that do not require sophisticated modifications of the penalty matrix. - base_squ_fw_fin_diff_mat_lub_ : np.ndarray - The banded storage version of ``base_squ_fw_fin_diff_mat_`` for LAPACK LU - decomposition. It is stored this way because it is efficient for Pentapy while - being slightly inefficient for the Cholesky decomposition. Since the conversion - for the latter only required row access in a C-order array, this should not be - a major time sink though. + For higher orders, the systems to solve tend to get numerically instable, + especially when ``n_data_`` grows large and high values for ``lam_`` are used. + Values below 1 are not allowed. + _auto_fit_lam_ : bool + Whether the lambda parameter is fitted automatically (``True``) or fixed + (``False``). + _l_and_u_ : (int, int) + The number of sub- (first) and superdiagonals (second element) of the final + matrix to solve for smoothing. Both elements will equal ``differences_``. + _penalty_matb_ : ndarray of shape (n_data - differences + 1, n_data - differences + 1) + The squared forward finite differences matrix ``D.T @ D`` stored in the banded + storage format used for LAPACK's banded LU decomposition. + _penalty_mat_log_pseudo_det_ : float + The natural logarithm of the pseudo-determinant of the squared forward finite + differences matrix ``D.T @ D`` which is used for the automatic fitting of the + lambda parameter by maximizing the log marginal likelihood, i.e., when + ``lam_ == AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. + If ``lam_`` is fixed, this is a NaN-value. + _pentapy_enabled_ : bool + Whether the Pentapy solver is enabled for the smoothing (``True``) or not + (``False``). + It can only be used if the number of differences is 2 and the lambda parameter + is fixed (and of course if ``pentapy`` is available). __dtype : type, default=np.float64 The data type to which the series to be smoothed will be converted to. To avoid numerical issues, all series are converted to double precision. - __machine_min_tol_mult : int, default=10 - The multiple of the machine epsilon that is used to make the penalty matrix - positive definite. It is only relevant if it exceeds ``series_size``. - Positive definiteness is assured by lifting the main diagonal by a small - multiple of the identity matrix whose scale depends on the machine precision - as well as the maximum eigenvalue of the squared forward finite difference - matrix. Please refer to the documentation of - ``posdef_mod_squared_fw_fin_diff_conv_matrix`` for more details. __allow_pentapy : bool, default=True Whether to enable the Pentapy solver if available. This is only used for debugging and testing purposes. @@ -71,7 +96,6 @@ class WhittakerLikeSolver: """ # noqa: E501 __dtype: type = np.float64 - __machine_min_tol_mult: int = 10 __allow_pentapy: bool = True def __init__( @@ -79,67 +103,155 @@ def __init__( ) -> None: pass + def _calc_penalty_log_pseudo_det(self) -> float: + """ + Computes the natural logarithm of the pseudo-determinant of the squared forward + finite differences matrix ``D.T @ D`` which is necessary for the calculation of + the log marginal likelihood for the automatic fitting of the penalty weight. + + Returns + ------- + log_pseudo_det : float + The natural logarithm of the pseudo-determinant of the penalty matrix. + + Raises + ------ + RuntimeError + If the pseudo-determinant of the penalty matrix is negative, thereby + indicating that the system is extremely ill-conditioned and the automatic + fitting of the penalty weight is not possible. + + Notes + ----- + Basically, this could be solved by evaluation of the eigenvalues of ``D.T @ D`` + with a banded eigensolver, but this is computationally expensive and not + necessary. + The pseudo-determinant of ``D.T @ D`` is the determinant of ``D @ D.T`` because + ``D.T @ D`` is rank-deficient with ``differences`` zero eigenvalues while + ``D @ D.T`` has full rank. + Since both matrices share the same non-zero eigenvalues, the pseudo-determinant + is easily computed as the determinant of ``D @ D.T`` via a partially pivoted + LU decomposition. + + Throughout this method, the matrix ``D.T @ D`` is referred to as the "flipped + penalty matrix" even though it is not actually flipped. + + """ + + # the flipped penalty matrix D @ D.T is computed + # NOTE: the matrix is returned with integer entries because integer computations + # can be carried out at maximum precision; this has to be converted to + # double precision for the LU decomposition + flipped_penalty_matb = gen_squ_fw_fin_diff_mat_cho_banded( + n_data=self.n_data_, + differences=self.differences_, + orig_first=True, + ).astype(np.float64) + + # the pseudo-determinant is computed from the partially pivoted LU decomposition + # of the flipped penalty matrix + flipped_l_and_u, flipped_penalty_matb = ( + conv_upper_chol_banded_to_lu_banded_storage(ab=flipped_penalty_matb) + ) + log_pseudo_det_sign, log_pseudo_det = slogdet_lu_banded( + lub_factorization=lu_banded( + l_and_u=flipped_l_and_u, + ab=flipped_penalty_matb, + check_finite=False, + ), + ) + + # if the sign of the pseudo-determinant is positive, the log pseudo-determinant + # is returned + if log_pseudo_det_sign > 0.0: + return log_pseudo_det + + # otherwise, if is negative, the penalty matrix is extremely ill-conditioned and + # the automatic fitting of the penalty weight is not possible + raise RuntimeError( + f"\nThe pseudo-determinant of the penalty matrix is negative, indicating " + f"that the system is extremely ill-conditioned.\n" + f"Automatic fitting for {self.n_data_} data points and difference order " + f"{self.differences_} is not possible.\n" + f"Please consider reducing the number of data points to smooth by, e.g., " + f"binning or lowering the difference order." + ) + def _setup_for_fit( self, - series_size: int, - lam: int | float, + n_data: int, + lam: Union[int, float, AutoSmoothMethods], differences: int, ) -> None: - """Pre-computes everything that can be computed for the smoothing in general as + """ + Pre-computes everything that can be computed for the smoothing in general as well as for fitting the lambda parameter itself. + + For the parameters, please refer to the documentation of the class. + """ # the input arguments are stored - self.series_size_: int = series_size - self.lam_: int | float = lam + self.n_data_: int = n_data + self.lam_: Union[int, float, AutoSmoothMethods] = lam self.differences_: int = differences - self.max_eigval_mult_: float = ( # type: ignore - np.finfo(self.__dtype).eps * max(self.__machine_min_tol_mult, series_size) - ) - # the forward finite difference matrix is computed ... - self.l_and_u_: tuple[int, int] = (self.differences_, self.differences_) - self.fw_fin_diff_mat_: dia_matrix = forward_finite_diff_conv_matrix( - differences=self.differences_, series_size=self.series_size_ + # the squared forward finite difference matrix D.T @ D is computed ... + # NOTE: the matrix is returned with integer entries because integer computations + # can be carried out at maximum precision; this has to be converted to + # double precision for the LU decomposition + self._l_and_u_: LAndUBandCounts + self._penalty_matb_: np.ndarray = gen_squ_fw_fin_diff_mat_cho_banded( + n_data=self.n_data_, + differences=self.differences_, + orig_first=False, + ).astype(np.float64) + + # ... and cast to the banded storage format for LAPACK's LU decomposition + self._l_and_u_, self._penalty_matb_ = ( + conv_upper_chol_banded_to_lu_banded_storage(ab=self._penalty_matb_) ) - # ... followed by the squared forward finite difference matrix - self.base_squ_fw_fin_diff_mat_: csr_matrix = ( - posdef_mod_squared_fw_fin_diff_conv_matrix( - fw_fin_diff_mat=self.fw_fin_diff_mat_, - differences=self.differences_, - dia_mod_matrix=None, - max_eigval_mult=self.max_eigval_mult_, - dtype=self.__dtype, + + # if the penalty weight is fitted automatically by maximization of the + # log marginal likelihood, the natural logarithm of the pseudo-determinant of + # D.T @ D is pre-computed + self._auto_fit_lam_: bool = isinstance(self.lam_, AutoSmoothMethods) + self._penalty_mat_log_pseudo_det_: float = float("nan") + if ( + self._auto_fit_lam_ + and self.lam_ == AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD + ): + self._penalty_mat_log_pseudo_det_: float = ( + self._calc_penalty_log_pseudo_det() ) - ) - self.base_squ_fw_fin_diff_mat_lub_: np.ndarray = conv_to_lu_banded_storage( - a=self.base_squ_fw_fin_diff_mat_, - l_and_u=self.l_and_u_, - ) # finally, Pentapy is enabled if available, the number of differences is 2, # and the lambda parameter is not fitted automatically - self._pentapy_enabled: bool = ( - _PENTAPY_AVAILABLE and self.differences_ == 2 and self.__allow_pentapy + self._pentapy_enabled_: bool = ( + _PENTAPY_AVAILABLE + and self.differences_ == 2 + and self.__allow_pentapy + and not self._auto_fit_lam_ ) - def _pentapy_solve(self, ab: np.ndarray, bw: np.ndarray) -> np.ndarray: - """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` - with the Pentapy package. This is written as the system ``A @ x = b`` where - ``A = W + lam * D^T @ D`` and ``b = W @ b``. + def _solve_pentapy(self, ab: np.ndarray, b_pen_weighted: np.ndarray) -> np.ndarray: + """ + Solves the linear system of equations ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` + with the ``pentapy`` package. This is the same as solving the linear system + ``A @ x = b`` where ``A = (1.0 / lam) * W + D.T @ D`` and ``b = (1.0 / lam) * W @ b``. Notes ----- Pentapy does not (maybe yet) allow for 2D right-hand side matrices, so the solution is computed for each column of ``bw`` separately. - """ + """ # noqa: E501 # for 1-dimensional right-hand side vectors, the solution is computed directly - if bw.ndim == 1: + if b_pen_weighted.ndim == 1: return pp.solve( mat=ab, - rhs=bw, + rhs=b_pen_weighted, is_flat=True, index_row_wise=False, solver=1, @@ -148,11 +260,16 @@ def _pentapy_solve(self, ab: np.ndarray, bw: np.ndarray) -> np.ndarray: # for 2-dimensional right-hand side matrices, the solution is computed for each # column separately else: - solution = np.empty(shape=(bw.shape[1], bw.shape[0])) - for iter_j in range(0, bw.shape[1]): + # NOTE: the solutions are first written into the rows of the solution matrix + # because row-access is more efficient for C-contiguous arrays; + # afterwards, the solution matrix is transposed + solution = np.empty( + shape=(b_pen_weighted.shape[1], b_pen_weighted.shape[0]) + ) + for iter_j in range(0, b_pen_weighted.shape[1]): solution[iter_j, ::] = pp.solve( mat=ab, - rhs=bw[::, iter_j], + rhs=b_pen_weighted[::, iter_j], is_flat=True, index_row_wise=False, solver=1, @@ -160,300 +277,356 @@ def _pentapy_solve(self, ab: np.ndarray, bw: np.ndarray) -> np.ndarray: return solution.transpose() - def _cholesky_solve( - self, ab: np.ndarray, bw: np.ndarray - ) -> tuple[np.ndarray, tuple[np.ndarray, bool]]: - """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` - with the Cholesky decomposition. This is written as the system ``A @ x = b`` - where ``A = W + lam * D^T @ D`` and ``b = W @ b``. + def _solve_pivoted_lu( + self, + ab: np.ndarray, + b_pen_weighted: np.ndarray, + ) -> tuple[np.ndarray, BandedLUFactorization]: + """ + Solves the linear system of equations ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` + with the LU decomposition. This is the same as solving the linear system + ``A @ x = b`` where ``A = (1.0 / lam) * W + D.T @ D`` and ``b = (1.0 / lam) * W @ b``. - Even though it is mathematically guaranteed that ``A`` is positive definite, - numerical errors can lead to a non-positive definite matrix. In this case, the - Cholesky decomposition fails and a ``LinAlgError`` is raised. + If the LU decomposition fails, a ``LinAlgError`` is raised which is fatal since + the next level of escalation would be using a QR-decomposition which is not + implemented (yet). - """ + """ # noqa: E501 - lower = True - cb = cholesky_banded(ab, lower=lower, check_finite=False) - decomposition = (cb, lower) + lub_factorization = lu_banded( + l_and_u=self._l_and_u_, + ab=ab, + check_finite=False, + ) return ( - cho_solve_banded(cb_and_lower=decomposition, b=bw, check_finite=False), - decomposition, + lu_solve_banded( + lub_factorization=lub_factorization, + b=b_pen_weighted, + check_finite=False, + overwrite_b=True, + ), + lub_factorization, ) def _solve( self, - bw: np.ndarray, - log_lam: float, - w: np.ndarray | None, - mod_squ_fin_diff_mat_lub: np.ndarray, - ) -> tuple[np.ndarray, _Decomposition, BandedSolveDecompositions]: - """Solves the linear system of equations ``(W + lam * D^T @ D) @ x = W @ b`` + b_pen_weighted: np.ndarray, + w_pen: np.ndarray, + ) -> tuple[np.ndarray, BandedSolvers, _Decomposition]: + """ + Solves the linear system of equations ``((1.0 / lam) * W + D^T @ D) @ x = (1.0 / lam) * W @ b`` where ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and ``D`` is the finite difference matrix of order ``differences``. + For details on why the system was formulated like this and not as usually done + in the literature, please refer to the Notes section. Parameters ---------- - bw : np.ndarray of shape (n,) or (n, m) - The weighted right-hand side vector or matrix of the linear system of - equations. + b_pen_weighted : ndarray of shape (m,) or (m, n) + The penalized-weighted right-hand side vector or matrix of the linear system + of equations given by ``(1.0 / lam) * W @ b``. log_lam : float - The logarithm of the lambda parameter to use for the Whittaker-like smooth. - w : np.ndarray of shape (n,) - The weights to use for the linear system of equations. It must be a vector - even if ``bw`` is a matrix because having ``bw`` as a matrix is only - possible if lambda is fixed and the same weights are applied to all series. - mod_squ_fin_diff_mat_lub : np.ndarray of shape (n, n) - The positive definite (modified) squared forward finite difference matrix - stored in the banded storage for LAPACK LU decomposition. + The logarithm of the penalty weight lambda to use for the smoothing. + w_pen : ndarray of shape (m,) + The penalized weights to use for the linear system of equations given by + ``(1.0 / lam) * W``. + It must be a vector even if ``bw`` is a matrix because having ``bw`` as a + matrix is only possible if lambda is fixed and the same weight vector has + to be applied to all series Returns ------- - x : np.ndarray of shape (n,) + x : np.ndarray of shape (m,) The solution vector of the linear system of equations. - decomposition : tuple - The decomposition used to solve the linear system of equations. - For the Cholesky decomposition, this is a tuple ``(cb, lower)`` where ``cb`` - is the banded storage of the Cholesky decomposition and ``lower`` is a - boolean flag indicating whether the lower or upper triangular matrix is - stored. - For the Pentapy solver this is ``None``. decomposition_type : BandedSolveDecompositions The type of decomposition used to solve the linear system of equations. + decomposition : BandedLUFactorization or BandedPentapyFactorization + The decomposition used to solve the linear system of equations which is + stored as a class instance specifying everything required to solve the + system with the ``decomposition_type`` used. - """ + Raises + ------ + RuntimeError + If all available solvers failed to solve the linear system of equations + which indicates a highly ill-conditioned system. - # the banded storage for a LAPACK LU decomposition is computed by updating the - # diagonal of the squared forward finite difference matrix D^T @ D with the - # weights - ab = np.exp(log_lam) * mod_squ_fin_diff_mat_lub - if w is not None: - ab[self.differences_, ::] += w - else: - ab[self.differences_, ::] += 1.0 + Notes + ----- + Using the multiplication of the weight matrix ``W`` with the reciprocal of the + penalty weight lambda ``1.0 / lam`` is way more efficient because ``W`` only + possesses a single diagonal of non-zero elements while ``D.T @ D`` is a banded + matrix with at least 3 diagonals for ``differences >= 1``. ``D.T @ D`` is even + symmetric, so roughly 50% of the multiplications with ``D.T @ D`` would be + redundant. + Given a pre-computed ``(1.0) / lam * W``, the weighted right-hand side vector + ``(1.0 / lam) * W @ b`` is computed by element-wise multiplication. + So, instead of at least 3 * ``n_data`` only 2 * ``n_data`` multiplications are + required. If the number of bands in ``D.T @ D`` is 5 (``differences == 2``), the + number of multiplications is reduced by 60% already. + + """ # noqa: E501 + + # the banded storage format for the LAPACK LU decomposition is computed by + # updating the main diagonal of the penalty matrix with the penalized weights + ab = self._penalty_matb_.copy() + ab[self.differences_, ::] += w_pen - # the linear system of equations is solved with the most efficient method with - # Cholesky decomposition as fallback + # the linear system of equations is solved with the most efficient method # Case 1: Pentapy can be used - if self._pentapy_enabled: - x = self._pentapy_solve(ab=ab, bw=bw) - if np.all(np.isfinite(x)): + if self._pentapy_enabled_: + x = self._solve_pentapy(ab=ab, b_pen_weighted=b_pen_weighted) + if np.isfinite(x).all(): return ( x, - None, - BandedSolveDecompositions.PENTAPY, + BandedSolvers.PENTAPY, + BandedPentapyFactorization(), ) - else: - # if Pentapy fails, the Cholesky decomposition is used as fallback - x, decomposition = self._cholesky_solve( - ab=ab[self.differences_ : :, ::], bw=bw - ) - return x, decomposition, BandedSolveDecompositions.CHOLESKY - - # Case 2: Pentapy cannot be used, but the matrix is NUMERICALLY positive - # definite - else: - x, decomposition = self._cholesky_solve( - ab=ab[self.differences_ : :, ::], bw=bw + # Case 2: LU decomposition (final fallback for pentapy) + try: + x, lub_factorization = self._solve_pivoted_lu( + ab=ab, b_pen_weighted=b_pen_weighted + ) + return x, BandedSolvers.PIVOTED_LU, lub_factorization + + except np.linalg.LinAlgError: + available_solvers = f"{BandedSolvers.PIVOTED_LU}" + if self._pentapy_enabled_: + available_solvers = f"{BandedSolvers.PENTAPY}, {available_solvers}" + + raise RuntimeError( + f"\nAll available solvers ({available_solvers}) failed to solve the " + f"linear system of equations which indicates a highly ill-conditioned " + f"system.\n" + f"Please consider reducing the number of data points to smooth by, " + f"e.g., binning or lowering the difference order." ) - return x, decomposition, BandedSolveDecompositions.CHOLESKY - def _solve_single_x_fixed_lam( + @overload + def _get_penalized_weights(self, w: None) -> float: ... + + @overload + def _get_penalized_weights(self, w: np.ndarray) -> np.ndarray: ... + + def _get_penalized_weights( + self, w: Optional[np.ndarray] + ) -> Union[float, np.ndarray]: + """ + Computes the penalized weights to be used for the linear system of equations, + i.e., ``(1.0 / lam) * W`` where ``W`` is a diagonal matrix with the weights + ``w`` on the main diagonal. + + """ + + # if no weights are provided, the penalized weights are simply the reciprocal of + # the penalty weight lambda + if w is None: + return 1.0 / self.lam_ # type: ignore + + # otherwise, the penalized weights are the product of the reciprocal of the + # penalty weight lambda and the weights + # NOTE: instead of using divisions, the weights are multiplied with the + # reciprocal of the penalty weight lambda which is less numerically + # accurate but way faster + return w * (1.0 / self.lam_) # type: ignore + + def _solve_single_b_fixed_lam( self, - x_weighted: np.ndarray, - w: np.ndarray | None, - mod_squ_fin_diff_mat_lub: np.ndarray, + b: np.ndarray, + w: Optional[np.ndarray], ) -> tuple[np.ndarray, float]: - """Fits the Whittaker-like smooth with a fixed lambda parameter. + """ + Solves for the Whittaker-like smoother solution for a single series with a fixed + penalty weight lambda. For the parameters, please refer to the documentation of ``_solve``. Instead of - a 2D-Array, a 1D-Array is expected for ``x`` and ``w``. Besides, it expects - the product ``x * w`` to be passed as ``x_weighted`` since this is more - efficient than computing it inside the solver. + a 2D-Array, a 1D-Array is expected for ``b`` and ``w``. """ - # the solution of the linear system of equations is computed - x_smooth, _, _ = self._solve( - bw=x_weighted, - log_lam=np.log(self.lam_), # type: ignore - w=w, - mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub, - ) + # the penalized weights are computed + w_pen = self._get_penalized_weights(w=w) # finally, the solution is returned together with the lambda parameter - return x_smooth, self.lam_ # type: ignore + return self._solve(b_pen_weighted=b * w_pen, w_pen=w_pen)[0], self.lam_ # type: ignore - def _solve_single_x( + def _solve_single_b( self, - x: np.ndarray, - w: np.ndarray | None, - mod_squ_fin_diff_mat_lub: np.ndarray, + b: np.ndarray, + w: Optional[np.ndarray], ) -> tuple[np.ndarray, float]: - """Fits the Whittaker-like smooth to a single series for a fixed or fitted - lambda parameter. + """ + Solves for the Whittaker-like smoother solution for a single series with a fixed + or fitted lambda parameter. For the parameters, please refer to the documentation of ``solve``. Instead of 2D-Arrays, 1D-Arrays are expected for ``x`` and ``w``. """ - - # first, the weights need to be ensured to be invertible by using the relative - # condition number and then the weighted series is computed - # NOTE: this numerical trick ensures that the smoothing also works in the - # limiting case that ``lam`` is vanishing. Since the diagonal matrix W has - # eigenvalues that correspond to the main diagonal entries, this problem - # is readily solved by bounding the minimum weight to ``rcond * w.max()`` - # which works since a maximum weight of zero has already been excluded - if w is not None: - w_lifted = np.maximum(w, self.max_eigval_mult_ * w.max()) - x_wavg = np.average(x, weights=w_lifted) - x_weighted = w_lifted * (x - x_wavg) - else: - w_lifted = None - x_wavg = np.average(x) - x_weighted = x - x_wavg - # then, the solution of the linear system of equations is computed # NOTE: this is a placeholder where an if-else-statement needs to be inserted # for then the lambda parameter needs to be evaluated automatically - x_smooth, lam = self._solve_single_x_fixed_lam( - x_weighted=x_weighted, - w=w_lifted, - mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub, - ) - return x_smooth + x_wavg, lam + if not self._auto_fit_lam_: + return self._solve_single_b_fixed_lam(b=b, w=w) - def _solve_multiple_x( + def _solve_multiple_b( self, X: np.ndarray, - w: np.ndarray | None, - mod_squ_fin_diff_mat_lub: np.ndarray, + w: Optional[np.ndarray], ) -> tuple[np.ndarray, np.ndarray]: - """Fits the Whittaker-like smooth to multiple series when the lambda parameter - is fixed and the same weights are applied to all series. - It leverages the ability of LAPACK (not pentapy) to solve multiple linear - systems of equations at once from the same inversion. + """ + Solves for the Whittaker-like smoother solution for multiple series when the + lambda parameter is fixed and the same weights are applied to all series. + It leverages the ability of LAPACK (not ``pentapy``) to solve multiple linear + systems of equations at once from the same factorization. For the parameters, please refer to the documentation of ``_solve``. """ - # in this special case, the solution of the linear system of equations can be - # computed with a single matrix inversion - # first, the weights need to be ensured to be invertible by using the relative - # condition number and then the weighted series is computed - # NOTE: this numerical trick ensures that the smoothing also works in the - # limiting case that ``lam`` is vanishing. Since the diagonal matrix W has - # eigenvalues that correspond to the main diagonal entries, this problem - # is readily solved by bounding the minimum weight to ``rcond * w.max()`` - # which works since a maximum weight of zero has already been excluded - if w is not None: - w_lifted = np.maximum(w, self.max_eigval_mult_ * w.max()).ravel() - x_wavg = np.average(X, weights=w_lifted, axis=1) - x_weighted = np.transpose( - w_lifted[np.newaxis, ::] * (X - x_wavg[::, np.newaxis]) - ) - else: - x_wavg = np.average(X, axis=1) - x_weighted = np.transpose(X) - x_wavg[np.newaxis, ::] - w_lifted = None + # the penalized weights are computed + w_pen = self._get_penalized_weights(w=w) + if isinstance(w_pen, float): + w_pen = np.array([w_pen], dtype=self.__dtype) - # then, the solution of the linear system of equations is computed + # then, the solution of the linear system of equations is computed for the + # transposed series matrix (expected right-hand side format for the solvers) + # FIXME: ``w_pen`` somehow becomes an integer for the type checker X_smooth, _, _ = self._solve( - bw=x_weighted, - log_lam=np.log(self.lam_), # type: ignore - w=w_lifted, - mod_squ_fin_diff_mat_lub=mod_squ_fin_diff_mat_lub, + b_pen_weighted=(X * w_pen[np.newaxis, ::]).transpose(), # type: ignore + w_pen=w_pen, # type: ignore ) return ( - np.transpose(X_smooth + x_wavg[np.newaxis, ::]), + X_smooth.transpose(), np.full(shape=(X.shape[0],), fill_value=self.lam_), # type: ignore ) + def _get_weight_generator( + self, w: Optional[np.ndarray], n_series: int + ) -> Generator[Optional[np.ndarray], None, None]: + """ + Generates a generator that yields the weights for each series in a series matrix + ``X``. + + """ + + # Case 1: No weights + if w is None: + for _ in range(n_series): + yield None + + # Case 2: 1D weights + elif w.ndim == 1: + for _ in range(n_series): + yield w + + # Case 3: 2D weights + elif w.ndim == 2: + for w_vect in w: + yield w_vect + def _whittaker_solve( self, X: np.ndarray, *, - w: np.ndarray | None = None, + w_vect: np.ndarray | None = None, use_same_w_for_all: bool = False, ) -> tuple[np.ndarray, np.ndarray]: - """Solves the linear equations for Whittaker-Henderson smoothing for Arrays that - are stored in 2D format, i.e., each series is stored as a row. + """ + Solves for the Whittaker-like smoother solution for Arrays that are stored in + 2D format, i.e., each series is stored as a row. Internally it chooses the most appropriate method and solver depending on the - data dimensionality, the weights, and the system's available packages (pentapy). + data dimensionality, the weights, and the system's available packages + (``pentapy``). Parameters ---------- - X : np.ndarray of shape (n, m) + X : ndarray of shape (m, n) The series to be smoothed stored as individual rows. - w : np.ndarray of shape(1, m), shape(n, m), or None + w : ndarray of shape(1, n) or shape(m, n) or None The weights to be applied for smoothing. If only a single row is provided - and ``use_same_w_for_all``, the same weights can be applied for all series - in `X`, which enhances the smoothing a lot for fixed smoothing parameters - `lam`. + and ``use_same_w_for_all`` is ``True``, the same weights can be applied + for all series in ``X``, which enhances the smoothing a lot for fixed + smoothing parameters ``lam``. If ``None``, no weights are applied and each datapoint is assumed to have equal importance. This allows for ``use_same_w_for_all`` to be ``True`` as well. use_same_w_for_all - Whether to use the same weights for all series in `X`. This is only possible - if `w` is a single row or ``None``. + Whether to use the same weights for all series in ``X``. This is only + possible if ``w`` is a single row or ``None``. Returns ------- - X_smooth : np.ndarray of shape(n, m) + X_smooth : ndarray of shape(m, n) The smoothed series stored as individual rows. - lam : np.ndarray of shape(n,) - The lambda parameter used for the smoothing of each series. If `lam` was - fixed, this is a vector of length `n` with the same value for each series. + lam : np.ndarray of shape(m, ) + The lambda parameter used for the smoothing of each series. If ``lam`` was + fixed, this is a vector of length ``m`` with the same value for each series. """ # noqa: E501 - # a nested function is defined for updating the weights - # TODO: add zero-weight protection (eigenvalues are weights themselves) - def update_to_next_weights(iter_i: int) -> None: - nonlocal w_curr - if iter_i > 0: - if w is None: - w_curr = None - return - elif not use_same_w_for_all: - w_curr = w[iter_i, ::].copy() - else: - return - - else: - if w is None: - w_curr = None - else: - w_curr = w[iter_i, ::].copy() - - assert ( - X.dtype == self.__dtype - ), f"Internal error: Promotion to {self.__dtype} failed." - # if multiple x with the same weights are to be solved for fixed lambda, this - # can be done more efficiently by leveraging Pentapy's and LAPACK'S ability to + # can be done more efficiently by leveraging LAPACK'S (not pentapy's) ability to # perform multiple solves from the same inversion at once if use_same_w_for_all: - return self._solve_multiple_x( - X=X, w=w, mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_ - ) - # else nothing + return self._solve_multiple_b(X=X, w=w_vect) # otherwise, the solution of the linear system of equations is computed for # each series X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) - w_curr = None - for iter_i, x in enumerate(X): - update_to_next_weights(iter_i=iter_i) - X_smooth[iter_i], lam[iter_i] = self._solve_single_x( - x=x, - w=w_curr, - mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_, - ) + w_gen = self._get_weight_generator(w=w_vect, n_series=X.shape[0]) + for iter_i, (x_vect, w_vect) in enumerate(zip(X, w_gen)): + X_smooth[iter_i], lam[iter_i] = self._solve_single_b(b=x_vect, w=w_vect) return X_smooth, lam + + +if __name__ == "__main__": + + import time + + from matplotlib import pyplot as plt + + NOISE_STDDEV = 0.05 + N_DATA = 1000 + N_NOISE_REALIZATIONS = 10 + + x = np.linspace(0, 2 * np.pi, N_DATA) + np.random.seed(42) + y_singles = np.empty(shape=(N_NOISE_REALIZATIONS, N_DATA)) + noise_level = NOISE_STDDEV * (1 + 2 * np.abs(x - np.pi)) + for iter_i in range(N_NOISE_REALIZATIONS): + y_singles[iter_i, ::] = np.cos(x) + np.random.normal(scale=noise_level) + + y_stddev = y_singles.std(axis=0, ddof=1) + y = np.tile(y_singles.mean(axis=0)[np.newaxis, ::], reps=(2, 1)) + y += np.array([0.0, 1.0])[::, np.newaxis] + + start = time.time() + tt = WhittakerLikeSolver() + tt._setup_for_fit(n_data=x.size, lam=1e3, differences=1) + weights = 1.0 / np.square(y_stddev) + y_smooth, lam = tt._whittaker_solve( + X=y, + w_vect=np.array([weights, np.concatenate((weights[500:], weights[:500]))]), + use_same_w_for_all=False, + ) + print(f"Time: {(time.time() - start):.3f} seconds") + + fig, ax = plt.subplots() + + ax.plot(x, y.T, label="Original") + for idx in range(0, y.shape[0]): + ax.fill_between( + x, + y_smooth[idx, ::] - 2 * y_stddev, + y_smooth[idx, ::] + 2 * y_stddev, + alpha=0.5, + label="Confidence Interval", + ) + ax.plot(x, y_smooth.T, label="Smoothed") + + plt.show() From 60988488c6ffefd8ae005ed2befb39fb7152b2b6 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 22 Apr 2024 19:41:18 +0200 Subject: [PATCH 027/118] refactor: went for more secure way of specifying the penalty weight --- chemotools/utils/models.py | 71 +++++++++++++++++++++++++++++- chemotools/utils/whittaker_base.py | 40 ++++++++++------- 2 files changed, 95 insertions(+), 16 deletions(-) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 0c480e57..f1be47ae 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -8,6 +8,7 @@ from dataclasses import dataclass, field from enum import Enum +from typing import Union import numpy as np @@ -38,7 +39,7 @@ class BandedSolvers(str, Enum): """ - PIVOTED_LU = "pivoted LU decomposition" + PIVOTED_LU = "partially pivoted LU decomposition" PENTAPY = "direct pentadiagonal solver" @@ -62,6 +63,70 @@ class AutoSmoothMethods(str, Enum): ### (Data) Classes ### +# a dataclass for specification of the smoothing penalty weight lambda for the +# Whittaker-Henderson smoother + + +@dataclass() +class WhittakerSmoothLambda: + """ + A dataclass that holds the specification of the smoothing penalty weight or + smoothing parameter lambda for the Whittaker-Henderson smoother. + + Attributes + ---------- + low_bound, upp_bound: int or float + The lower and upper bound of the search space for the penalty weight. + Flipped bounds are automatically corrected, but they have to differ by at least + a factor of 10. + method: AutoSmoothMethods + The method to use for the automatic selection of the penalty weight. + + Raises + ------ + ValueError + If ``upp_bound`` is not greater than 10 times ``low_bound`` after eventually + flipping the bounds. + + """ + + low_bound: Union[int, float] + upp_bound: Union[int, float] + method: AutoSmoothMethods + + def __post_init__(self): + # firs, the input types are checked + if not isinstance(self.low_bound, (int, float)) or not isinstance( + self.upp_bound, (int, float) + ): + raise TypeError( + f"\nThe lower bound ({self.low_bound}) and upper bound " + f"({self.upp_bound}) have to be integers or floats." + ) + + if not isinstance(self.method, AutoSmoothMethods): + raise TypeError( + f"\nThe method ({self.method}) has to be a member of the " + f"AutoSmoothMethods." + ) + + # then, the lower and upper bound are sanitized by swapping them if necessary + # and checking if the upper bound is at least 10 times the lower bound + if self.low_bound >= self.upp_bound: + self.low_bound, self.upp_bound = self.upp_bound, self.low_bound + + if self.upp_bound < 10 * self.low_bound: + raise ValueError( + f"\nThe upper bound ({self.upp_bound}) has to be at least 10 times the " + f"lower bound ({self.low_bound})." + ) + + +# a fake class for representing the factorization of a pentadiagonal matrix with +# pentapy which is empty since pentapy does not factorize the matrix but directly solves +# the system of equations + + class BandedPentapyFactorization: """ A class that resembles the factorization of a pentadiagonal matrix with ``pentapy``. @@ -73,6 +138,10 @@ class BandedPentapyFactorization: pass +# a dataclass for the factorization of a banded matrix with LU decomposition with p +# partial pivoting + + @dataclass() class BandedLUFactorization: """ diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index c4a821ff..3b62ab99 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -8,7 +8,7 @@ ### Imports ### -from typing import Generator, Optional, Union, overload +from typing import Generator, Optional, Tuple, Union, overload import numpy as np @@ -26,6 +26,7 @@ BandedLUFactorization, BandedPentapyFactorization, BandedSolvers, + WhittakerSmoothLambda, ) if _PENTAPY_AVAILABLE: @@ -34,6 +35,10 @@ ### Type Aliases ### _Decomposition = Union[BandedLUFactorization, BandedPentapyFactorization] +_WhittakerSmoothLambdaPlain = Tuple[ + Union[int, float], Union[int, float], AutoSmoothMethods +] +_LambdaInternal = Union[int, float, WhittakerSmoothLambda] ### Class Implementation ### @@ -52,11 +57,11 @@ class WhittakerLikeSolver: The number of data points within the series to smooth. It is equivalent to ``n_features_in_``, but it was renamed to be allow for definition after the initialisation. - lam_ : int or float or AutoSmoothMethods + lam_ : int or float or WhittakerSmoothLambda The lambda parameter to use for the smoothing, a.k.a. the penalty weight or smoothing parameter. - If a member of :class:`AutoSmoothMethods` is provided, the lambda parameter is - fitted automatically, but then the pre-computations in meth:`_setup_for_fit` + If a member of :class:`WhittakerSmoothLambda` is provided, the lambda parameter + is fitted automatically, but then the pre-computations in meth:`_setup_for_fit` and/or :meth:`_whittaker_solve` might take significantly longer because more pre-computations are required and multiple penalty weights are tested. differences_ : int @@ -180,7 +185,7 @@ def _calc_penalty_log_pseudo_det(self) -> float: def _setup_for_fit( self, n_data: int, - lam: Union[int, float, AutoSmoothMethods], + lam: Union[int, float, _WhittakerSmoothLambdaPlain, WhittakerSmoothLambda], differences: int, ) -> None: """ @@ -193,7 +198,11 @@ def _setup_for_fit( # the input arguments are stored self.n_data_: int = n_data - self.lam_: Union[int, float, AutoSmoothMethods] = lam + if isinstance(lam, (int, float, WhittakerSmoothLambda)): + self.lam_: _LambdaInternal = lam + elif isinstance(lam, tuple): + self.lam_: _LambdaInternal = WhittakerSmoothLambda(*lam) + self.differences_: int = differences # the squared forward finite difference matrix D.T @ D is computed ... @@ -215,15 +224,16 @@ def _setup_for_fit( # if the penalty weight is fitted automatically by maximization of the # log marginal likelihood, the natural logarithm of the pseudo-determinant of # D.T @ D is pre-computed - self._auto_fit_lam_: bool = isinstance(self.lam_, AutoSmoothMethods) + self._auto_fit_lam_: bool = isinstance(self.lam_, WhittakerSmoothLambda) self._penalty_mat_log_pseudo_det_: float = float("nan") - if ( - self._auto_fit_lam_ - and self.lam_ == AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD - ): - self._penalty_mat_log_pseudo_det_: float = ( - self._calc_penalty_log_pseudo_det() - ) + try: + if self._auto_fit_lam_ and self.lam_.method in { # type: ignore + AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD, + }: + self._penalty_mat_log_pseudo_det_ = self._calc_penalty_log_pseudo_det() + + except AttributeError: + pass # finally, Pentapy is enabled if available, the number of differences is 2, # and the lambda parameter is not fitted automatically @@ -531,7 +541,7 @@ def _whittaker_solve( self, X: np.ndarray, *, - w_vect: np.ndarray | None = None, + w_vect: Optional[np.ndarray] = None, use_same_w_for_all: bool = False, ) -> tuple[np.ndarray, np.ndarray]: """ From 254f90600bdb9cb6d95aae607171fd64c85039d8 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 22 Apr 2024 19:47:58 +0200 Subject: [PATCH 028/118] refactor: replaced intermediate solver distributed method by a dictionary to remove one indirection with one branched logic --- chemotools/utils/whittaker_base.py | 31 +++++++++++------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 3b62ab99..07917ade 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -461,25 +461,6 @@ def _solve_single_b_fixed_lam( # finally, the solution is returned together with the lambda parameter return self._solve(b_pen_weighted=b * w_pen, w_pen=w_pen)[0], self.lam_ # type: ignore - def _solve_single_b( - self, - b: np.ndarray, - w: Optional[np.ndarray], - ) -> tuple[np.ndarray, float]: - """ - Solves for the Whittaker-like smoother solution for a single series with a fixed - or fitted lambda parameter. - - For the parameters, please refer to the documentation of ``solve``. Instead of - 2D-Arrays, 1D-Arrays are expected for ``x`` and ``w``. - - """ - # then, the solution of the linear system of equations is computed - # NOTE: this is a placeholder where an if-else-statement needs to be inserted - # for then the lambda parameter needs to be evaluated automatically - if not self._auto_fit_lam_: - return self._solve_single_b_fixed_lam(b=b, w=w) - def _solve_multiple_b( self, X: np.ndarray, @@ -585,11 +566,21 @@ def _whittaker_solve( # otherwise, the solution of the linear system of equations is computed for # each series + # first, the smoothing method is specified depending on whether the penalty + # weight lambda is fitted automatically or not + smooth_method = self._solve_single_b_fixed_lam + if self._auto_fit_lam_: + smooth_method_assignment = { + AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD: self._solve_single_b_fixed_lam, + } + smooth_method = smooth_method_assignment[self.lam_.method] # type: ignore + + # then, the solution is computed for each series by means of a loop X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) w_gen = self._get_weight_generator(w=w_vect, n_series=X.shape[0]) for iter_i, (x_vect, w_vect) in enumerate(zip(X, w_gen)): - X_smooth[iter_i], lam[iter_i] = self._solve_single_b(b=x_vect, w=w_vect) + X_smooth[iter_i], lam[iter_i] = smooth_method(b=x_vect, w=w_vect) return X_smooth, lam From 7b5385f1a4066a855faa9779f3b6c1726ffea9ba Mon Sep 17 00:00:00 2001 From: MothNik Date: Wed, 1 May 2024 19:06:02 +0200 Subject: [PATCH 029/118] style: went for using `ndarray`-methods --- chemotools/utils/banded_linalg.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 4aeb7377..69b461f9 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -536,7 +536,7 @@ def slogdet_lu_banded( # first, the number of actual row exchanges needs to be counted unchanged_row_idxs = np.arange( start=0, - stop=lub_factorization.n_rows, + stop=lub_factorization.n_cols, step=1, dtype=lub_factorization.ipiv.dtype, ) @@ -550,9 +550,9 @@ def slogdet_lu_banded( # product of L and the diagonal product of U, the calculation simplifies. As the # main diagonal of L is a vector of ones, only the diagonal product of U is required main_diag = lub_factorization.lub[lub_factorization.main_diag_row_idx, ::] - u_diaprod_sign = np.prod(np.sign(main_diag)) + u_diaprod_sign = np.sign(main_diag).prod() with np.errstate(divide="ignore", over="ignore"): - logabsdet = np.sum(np.log(np.abs(main_diag))) + logabsdet = np.log(np.abs(main_diag)).sum() # logarithms of zero are already properly handled, so there is not reason to worry # about, since they are -inf which will result in a zero determinant in exp(); @@ -569,7 +569,7 @@ def slogdet_lu_banded( # returned together with its sign if np.isneginf(logabsdet): return 0.0, logabsdet - elif float(u_diaprod_sign) > 0.0: + elif u_diaprod_sign > 0.0: return sign, logabsdet return -sign, logabsdet From 51261d3aef83d54650d84add64cfd883a151d0dc Mon Sep 17 00:00:00 2001 From: MothNik Date: Wed, 1 May 2024 19:10:11 +0200 Subject: [PATCH 030/118] feat: added fixed lambda to models --- chemotools/utils/models.py | 198 ++++++++++++++++++++++++++++--------- 1 file changed, 152 insertions(+), 46 deletions(-) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index f1be47ae..21ef85db 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -8,7 +8,8 @@ from dataclasses import dataclass, field from enum import Enum -from typing import Union +from math import log +from typing import Literal, Optional, Tuple, Union import numpy as np @@ -43,22 +44,30 @@ class BandedSolvers(str, Enum): PENTAPY = "direct pentadiagonal solver" -# an Enum class for the kinds of automated smoothing by the Whittaker-Henderson smoother -# that can be applied to the data +# an Enum class for the kinds of smoothing by the Whittaker-Henderson smoother that can +# be applied to the data -class AutoSmoothMethods(str, Enum): +class WhittakerSmoothMethods(str, Enum): """ - Defines the types of automated smoothing methods that can be applied to the data - using the Whittaker-Henderson smoother, i.e., + Defines the types of smoothing methods that can be applied to the data using the + Whittaker-Henderson smoother, i.e., - - ``LOG_MARGINAL_LIKELIHOOD``: smoothing based on the maximization of the log - marginal likelihood + - ``FIXED``: fixed penalty weight (shorthand "fixed") + - ``LOGML``: smoothing based on the maximization of the log marginal likelihood + (shorthand "logml") + + Except for ``FIXED``, the penalty weight is automatically determined when using the + other methods. """ - LOG_MARGINAL_LIKELIHOOD = "log marginal likelihood" + FIXED = "fixed" + LOGML = "logml" + +# a type hint is defined for the Whittaker-Henderson smoother specification +_WhittakerSmoothMethodsAll = Union[WhittakerSmoothMethods, Literal["fixed", "logml"]] ### (Data) Classes ### @@ -75,52 +84,149 @@ class WhittakerSmoothLambda: Attributes ---------- - low_bound, upp_bound: int or float - The lower and upper bound of the search space for the penalty weight. - Flipped bounds are automatically corrected, but they have to differ by at least - a factor of 10. - method: AutoSmoothMethods - The method to use for the automatic selection of the penalty weight. + bounds: int or float or (int or float, int or float) + The bounds for the search space of the penalty weight lambda. The specification + can be either + + - a single value for a fixed penalty weight (requires ``method`` to be set to + ``WhittakerSmoothMethods.FIXED``), or + - a tuple of two values for the lower and upper bounds of the search space + (then ``method`` may not be set to ``WhittakerSmoothMethods.FIXED`` unless + the bounds are too close to each other as described below). + + Independently of the specification, the values have to be greater than or equal + to the zero tolerance ``1e-25``. + If a lower and an upper bound are provided, they are flipped if necessary. + After that, the difference ``abs(upp_bound - low_bound)`` has to be at least + ``1e-5 * upp_bound`` for any method other than ``WhittakerSmoothMethods.FIXED``. + Otherwise, the method is set to ``WhittakerSmoothMethods.FIXED`` and the + ``fixed_lambda`` is set to the upper bound. + method: WhittakerSmoothMethods or {"fixed", "logml"} + The method to use for the selection of the penalty weight. If the bounds are too + close to each other, this will be set to ``WhittakerSmoothMethods.FIXED``. Raises ------ ValueError - If ``upp_bound`` is not greater than 10 times ``low_bound`` after eventually - flipping the bounds. + If ``method`` is invalid, i.e., it does not correspond to any of the + ``WhittakerSmoothMethods`` or their shorthands, or if it cannot be used in + combination with ``bounds``. + ValueError + If the bounds are invalid, i.e., they are not greater than or equal to the zero + tolerance ``1e-25``. """ - low_bound: Union[int, float] - upp_bound: Union[int, float] - method: AutoSmoothMethods - - def __post_init__(self): - # firs, the input types are checked - if not isinstance(self.low_bound, (int, float)) or not isinstance( - self.upp_bound, (int, float) - ): - raise TypeError( - f"\nThe lower bound ({self.low_bound}) and upper bound " - f"({self.upp_bound}) have to be integers or floats." - ) - - if not isinstance(self.method, AutoSmoothMethods): - raise TypeError( - f"\nThe method ({self.method}) has to be a member of the " - f"AutoSmoothMethods." - ) - - # then, the lower and upper bound are sanitized by swapping them if necessary - # and checking if the upper bound is at least 10 times the lower bound - if self.low_bound >= self.upp_bound: - self.low_bound, self.upp_bound = self.upp_bound, self.low_bound - - if self.upp_bound < 10 * self.low_bound: + bounds: Union[int, float, tuple[Union[int, float], Union[int, float]]] + method: _WhittakerSmoothMethodsAll + + fixed_lambda: float = field(default=float("nan"), init=False) + auto_bounds: tuple[float, float] = field( + default=(float("nan"), float("nan")), init=False + ) + method_used: WhittakerSmoothMethods = field( + default=WhittakerSmoothMethods.FIXED, init=False + ) + fit_auto: bool = field(default=False, init=False) + + __zero_tol: float = field(default=1e-25, init=False, repr=False) + __diff_tol: float = field(default=1e-5, init=False, repr=False) + + def _validate_n_set_method(self) -> None: + try: + self.method_used = WhittakerSmoothMethods(self.method) + except ValueError: raise ValueError( - f"\nThe upper bound ({self.upp_bound}) has to be at least 10 times the " - f"lower bound ({self.low_bound})." + f"\nThe method '{self.method}' is not valid. " + f"Please choose one of the following: " + f"'fixed', 'logml', {WhittakerSmoothMethods.FIXED.name}, " + f"{WhittakerSmoothMethods.LOGML.name}." ) + def __post_init__(self): + # the bounds are checked for validity + # Case 1: a single value is provided + if isinstance(self.bounds, (int, float)): + # first, the method is validated + self._validate_n_set_method() + + # in this case, the method has to be set to FIXED + if self.method_used != WhittakerSmoothMethods.FIXED: + raise ValueError( + f"\nThe method '{self.method_used.name}' was selected for a fixed " + f"penalty weight (i.e., bounds are just a scalar)." + ) + + # the bound has to be greater than or equal to the zero tolerance + if self.bounds < self.__zero_tol: + raise ValueError( + f"\nThe penalty weight lambda has to be greater than or equal to " + f"the zero tolerance {self.__zero_tol}." + ) + + # the fixed lambda is set to the bound + self.fixed_lambda = float(self.bounds) + self.fit_auto = False + + # Case 2: a tuple of two values is provided + elif isinstance(self.bounds, tuple): + + # the bounds are flipped if necessary + low_bound, upp_bound = sorted(self.bounds) + + # the bounds have to be greater than or equal to the zero tolerance + if low_bound < self.__zero_tol or upp_bound < self.__zero_tol: + raise ValueError( + f"\nThe bounds for the penalty weight lambda have to be greater " + f"than or equal to the zero tolerance {self.__zero_tol}, but " + f"they are {low_bound} and {upp_bound}." + ) + + # the difference has to be at least 1e-5 * upp_bound to be considered + # as a search space + if abs(upp_bound - low_bound) >= self.__diff_tol * upp_bound: + # for this, the method is validated + self._validate_n_set_method() + + # if the method is not FIXED, the bounds are set as the search space + if self.method_used != WhittakerSmoothMethods.FIXED: + self.auto_bounds = (float(low_bound), float(upp_bound)) + self.fit_auto = True + return + + # if the bounds are a search space, but the method is set to FIXED, + # an error is raised + raise ValueError( + f"\nThe bounds for the penalty weight lambda are a search space " + f"({low_bound}, {upp_bound}), but the method is set to FIXED." + ) + + # otherwise, if the penalty weights is fixed, the method is set to FIXED as + # well + self.method_used = WhittakerSmoothMethods.FIXED + self.fixed_lambda = float(upp_bound) + self.fit_auto = False + + # Case 3: the bounds are neither a scalar nor a tuple of two values + raise TypeError( + f"\nThe bounds for the penalty weight lambda have to be either a scalar " + f"or a tuple of two values, but they are {self.bounds}." + ) + + @property + def log_auto_bounds(self) -> Tuple[float, float]: + """ + The natural logarithms of the search space bounds for the penalty weight lambda. + + Returns + ------- + log_auto_bounds : (float, float) + The natural logarithms of the lower and upper bounds of the search space. + + """ + + return (log(self.auto_bounds[0]), log(self.auto_bounds[1])) + # a fake class for representing the factorization of a pentadiagonal matrix with # pentapy which is empty since pentapy does not factorize the matrix but directly solves @@ -179,4 +285,4 @@ class BandedLUFactorization: def __post_init__(self): self.shape = self.lub.shape # type: ignore self.n_rows, self.n_cols = self.shape - self.main_diag_row_idx = self.l_and_u[1] + self.main_diag_row_idx = self.n_rows - 1 - self.l_and_u[0] From a857ed20f8ca5e3a6651374e8f114ceb49f8aff8 Mon Sep 17 00:00:00 2001 From: MothNik Date: Wed, 1 May 2024 19:32:20 +0200 Subject: [PATCH 031/118] refactor/feat: added automatic smoothing based on log marginal likelihood; re-formulated linear system for consistency with literature (without performance penalty); re-formulated weight handling --- chemotools/utils/whittaker_base.py | 607 ++++++++++++++++++++--------- 1 file changed, 422 insertions(+), 185 deletions(-) diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 07917ade..c844df76 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -8,9 +8,11 @@ ### Imports ### -from typing import Generator, Optional, Tuple, Union, overload +from math import ceil, exp +from typing import Generator, Optional, Tuple, Union import numpy as np +from scipy.optimize import minimize_scalar from chemotools.utils.banded_linalg import ( LAndUBandCounts, @@ -19,14 +21,17 @@ lu_solve_banded, slogdet_lu_banded, ) -from chemotools.utils.finite_differences import gen_squ_fw_fin_diff_mat_cho_banded +from chemotools.utils.finite_differences import ( + calc_forward_diff_kernel, + gen_squ_fw_fin_diff_mat_cho_banded, +) from chemotools.utils.models import ( _PENTAPY_AVAILABLE, - AutoSmoothMethods, BandedLUFactorization, BandedPentapyFactorization, BandedSolvers, WhittakerSmoothLambda, + WhittakerSmoothMethods, ) if _PENTAPY_AVAILABLE: @@ -34,11 +39,11 @@ ### Type Aliases ### -_Decomposition = Union[BandedLUFactorization, BandedPentapyFactorization] +_Factorization = Union[BandedLUFactorization, BandedPentapyFactorization] +_FactorizationForLogMarginalLikelihood = BandedLUFactorization _WhittakerSmoothLambdaPlain = Tuple[ - Union[int, float], Union[int, float], AutoSmoothMethods + Union[int, float], Union[int, float], WhittakerSmoothMethods ] -_LambdaInternal = Union[int, float, WhittakerSmoothLambda] ### Class Implementation ### @@ -57,13 +62,6 @@ class WhittakerLikeSolver: The number of data points within the series to smooth. It is equivalent to ``n_features_in_``, but it was renamed to be allow for definition after the initialisation. - lam_ : int or float or WhittakerSmoothLambda - The lambda parameter to use for the smoothing, a.k.a. the penalty weight or - smoothing parameter. - If a member of :class:`WhittakerSmoothLambda` is provided, the lambda parameter - is fitted automatically, but then the pre-computations in meth:`_setup_for_fit` - and/or :meth:`_whittaker_solve` might take significantly longer because more - pre-computations are required and multiple penalty weights are tested. differences_ : int The number of differences to use for the smoothing. If the aim is to obtain a smooth estimate of the ``m``-th order derivative, this should be set to @@ -71,12 +69,18 @@ class WhittakerLikeSolver: For higher orders, the systems to solve tend to get numerically instable, especially when ``n_data_`` grows large and high values for ``lam_`` are used. Values below 1 are not allowed. - _auto_fit_lam_ : bool - Whether the lambda parameter is fitted automatically (``True``) or fixed - (``False``). + _lam_inter_ : WhittakerSmoothLambda + The internal representation of the lambda parameter to use for the smoothing, + a.k.a. the penalty weight or smoothing parameter. + It is internally stored as an instance of the dataclass :class:`WhittakerSmoothLambda`. _l_and_u_ : (int, int) The number of sub- (first) and superdiagonals (second element) of the final matrix to solve for smoothing. Both elements will equal ``differences_``. + _diff_kernel_flipped_ : ndarray of shape (0, ) or (differences + 1,) + The flipped kernel to use for the forward finite differences. It is only + required for the automatic fitting of the lambda parameter by maximizing the log + marginal likelihood, i.e., when ``lam_ == WhittakerSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. + Flipping is required due to NumPy's definition of convolution. _penalty_matb_ : ndarray of shape (n_data - differences + 1, n_data - differences + 1) The squared forward finite differences matrix ``D.T @ D`` stored in the banded storage format used for LAPACK's banded LU decomposition. @@ -84,7 +88,7 @@ class WhittakerLikeSolver: The natural logarithm of the pseudo-determinant of the squared forward finite differences matrix ``D.T @ D`` which is used for the automatic fitting of the lambda parameter by maximizing the log marginal likelihood, i.e., when - ``lam_ == AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. + ``lam_ == WhittakerSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. If ``lam_`` is fixed, this is a NaN-value. _pentapy_enabled_ : bool Whether the Pentapy solver is enabled for the smoothing (``True``) or not @@ -97,17 +101,25 @@ class WhittakerLikeSolver: __allow_pentapy : bool, default=True Whether to enable the Pentapy solver if available. This is only used for debugging and testing purposes. + __zero_weight_tol : float, default=1e-10 + If any of the weights drops below ``weights.max() * __zero_weight_tol``, the + weight is considered zero for the evaluation of the log marginal likelihood. """ # noqa: E501 + __LN_TWO_PI: float = 1.8378770664093453 + __LN_TEN: float = 2.302585092994046 __dtype: type = np.float64 __allow_pentapy: bool = True + __zero_weight_tol: float = 1e-10 def __init__( self, ) -> None: pass + ### Initialization and Setup Methods ### + def _calc_penalty_log_pseudo_det(self) -> float: """ Computes the natural logarithm of the pseudo-determinant of the squared forward @@ -151,7 +163,7 @@ def _calc_penalty_log_pseudo_det(self) -> float: n_data=self.n_data_, differences=self.differences_, orig_first=True, - ).astype(np.float64) + ).astype(self.__dtype) # the pseudo-determinant is computed from the partially pivoted LU decomposition # of the flipped penalty matrix @@ -174,8 +186,8 @@ def _calc_penalty_log_pseudo_det(self) -> float: # otherwise, if is negative, the penalty matrix is extremely ill-conditioned and # the automatic fitting of the penalty weight is not possible raise RuntimeError( - f"\nThe pseudo-determinant of the penalty matrix is negative, indicating " - f"that the system is extremely ill-conditioned.\n" + f"\nThe pseudo-determinant of the penalty D.T @ D matrix is negative, " + f"indicating that the system is extremely ill-conditioned.\n" f"Automatic fitting for {self.n_data_} data points and difference order " f"{self.differences_} is not possible.\n" f"Please consider reducing the number of data points to smooth by, e.g., " @@ -185,8 +197,8 @@ def _calc_penalty_log_pseudo_det(self) -> float: def _setup_for_fit( self, n_data: int, - lam: Union[int, float, _WhittakerSmoothLambdaPlain, WhittakerSmoothLambda], differences: int, + lam: Union[int, float, _WhittakerSmoothLambdaPlain, WhittakerSmoothLambda], ) -> None: """ Pre-computes everything that can be computed for the smoothing in general as @@ -196,14 +208,36 @@ def _setup_for_fit( """ - # the input arguments are stored + # the input arguments are stored and validated self.n_data_: int = n_data - if isinstance(lam, (int, float, WhittakerSmoothLambda)): - self.lam_: _LambdaInternal = lam + self.differences_: int = differences + + self._lam_inter_: WhittakerSmoothLambda + if isinstance(lam, (int, float)): + self._lam_inter_ = WhittakerSmoothLambda( + bounds=lam, + method=WhittakerSmoothMethods.FIXED, + ) + elif isinstance(lam, WhittakerSmoothLambda): + self._lam_inter_ = lam elif isinstance(lam, tuple): - self.lam_: _LambdaInternal = WhittakerSmoothLambda(*lam) + if len(lam) != 3: + raise ValueError( + f"\nThe lambda parameter must be a tuple of three elements (lower " + f"bound, upper bound, method), but it has {len(lam)} elements " + f"instead." + ) - self.differences_: int = differences + self._lam_inter_ = WhittakerSmoothLambda( + bounds=(lam[0], lam[1]), + method=lam[2], + ) + else: + raise TypeError( + f"\nThe lambda parameter must be an integer, a float, a tuple of " + f"(lower bound, upper bound, method), or an instance of " + f"WhittakerSmoothLambda, but it is {type(lam)} instead." + ) # the squared forward finite difference matrix D.T @ D is computed ... # NOTE: the matrix is returned with integer entries because integer computations @@ -214,7 +248,7 @@ def _setup_for_fit( n_data=self.n_data_, differences=self.differences_, orig_first=False, - ).astype(np.float64) + ).astype(self.__dtype) # ... and cast to the banded storage format for LAPACK's LU decomposition self._l_and_u_, self._penalty_matb_ = ( @@ -223,17 +257,18 @@ def _setup_for_fit( # if the penalty weight is fitted automatically by maximization of the # log marginal likelihood, the natural logarithm of the pseudo-determinant of - # D.T @ D is pre-computed - self._auto_fit_lam_: bool = isinstance(self.lam_, WhittakerSmoothLambda) + # D.T @ D is pre-computed together with the forward finite difference kernel + self._diff_kernel_flipped_: np.ndarray = np.ndarray([], dtype=self.__dtype) self._penalty_mat_log_pseudo_det_: float = float("nan") - try: - if self._auto_fit_lam_ and self.lam_.method in { # type: ignore - AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD, - }: - self._penalty_mat_log_pseudo_det_ = self._calc_penalty_log_pseudo_det() - - except AttributeError: - pass + if self._lam_inter_.fit_auto and self._lam_inter_.method_used in { + WhittakerSmoothMethods.LOGML, + }: + # NOTE: the kernel is also returned with integer entries because integer + # computations can be carried out at maximum precision + self._diff_kernel_flipped_ = np.flip( + calc_forward_diff_kernel(differences=self.differences_) + ).astype(self.__dtype) + self._penalty_mat_log_pseudo_det_ = self._calc_penalty_log_pseudo_det() # finally, Pentapy is enabled if available, the number of differences is 2, # and the lambda parameter is not fitted automatically @@ -241,14 +276,16 @@ def _setup_for_fit( _PENTAPY_AVAILABLE and self.differences_ == 2 and self.__allow_pentapy - and not self._auto_fit_lam_ + and not self._lam_inter_.fit_auto ) - def _solve_pentapy(self, ab: np.ndarray, b_pen_weighted: np.ndarray) -> np.ndarray: + ### Solver Methods ### + + def _solve_pentapy(self, ab: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: """ - Solves the linear system of equations ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with the ``pentapy`` package. This is the same as solving the linear system - ``A @ x = b`` where ``A = (1.0 / lam) * W + D.T @ D`` and ``b = (1.0 / lam) * W @ b``. + ``A @ x = b`` where ``A = W + lam * D.T @ D`` and ``b = W @ b``. Notes ----- @@ -258,10 +295,10 @@ def _solve_pentapy(self, ab: np.ndarray, b_pen_weighted: np.ndarray) -> np.ndarr """ # noqa: E501 # for 1-dimensional right-hand side vectors, the solution is computed directly - if b_pen_weighted.ndim == 1: + if b_weighted.ndim == 1: return pp.solve( mat=ab, - rhs=b_pen_weighted, + rhs=b_weighted, is_flat=True, index_row_wise=False, solver=1, @@ -273,13 +310,11 @@ def _solve_pentapy(self, ab: np.ndarray, b_pen_weighted: np.ndarray) -> np.ndarr # NOTE: the solutions are first written into the rows of the solution matrix # because row-access is more efficient for C-contiguous arrays; # afterwards, the solution matrix is transposed - solution = np.empty( - shape=(b_pen_weighted.shape[1], b_pen_weighted.shape[0]) - ) - for iter_j in range(0, b_pen_weighted.shape[1]): + solution = np.empty(shape=(b_weighted.shape[1], b_weighted.shape[0])) + for iter_j in range(0, b_weighted.shape[1]): solution[iter_j, ::] = pp.solve( mat=ab, - rhs=b_pen_weighted[::, iter_j], + rhs=b_weighted[::, iter_j], is_flat=True, index_row_wise=False, solver=1, @@ -290,12 +325,12 @@ def _solve_pentapy(self, ab: np.ndarray, b_pen_weighted: np.ndarray) -> np.ndarr def _solve_pivoted_lu( self, ab: np.ndarray, - b_pen_weighted: np.ndarray, + b_weighted: np.ndarray, ) -> tuple[np.ndarray, BandedLUFactorization]: """ - Solves the linear system of equations ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with the LU decomposition. This is the same as solving the linear system - ``A @ x = b`` where ``A = (1.0 / lam) * W + D.T @ D`` and ``b = (1.0 / lam) * W @ b``. + ``A @ x = b`` where ``A = W + lam * D.T @ D`` and ``b = W @ b``. If the LU decomposition fails, a ``LinAlgError`` is raised which is fatal since the next level of escalation would be using a QR-decomposition which is not @@ -311,7 +346,7 @@ def _solve_pivoted_lu( return ( lu_solve_banded( lub_factorization=lub_factorization, - b=b_pen_weighted, + b=b_weighted, check_finite=False, overwrite_b=True, ), @@ -320,29 +355,30 @@ def _solve_pivoted_lu( def _solve( self, - b_pen_weighted: np.ndarray, - w_pen: np.ndarray, - ) -> tuple[np.ndarray, BandedSolvers, _Decomposition]: + lam: float, + b_weighted: np.ndarray, + w: Union[float, np.ndarray], + ) -> tuple[np.ndarray, BandedSolvers, _Factorization]: """ - Solves the linear system of equations ``((1.0 / lam) * W + D^T @ D) @ x = (1.0 / lam) * W @ b`` + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` where ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and - ``D`` is the finite difference matrix of order ``differences``. - For details on why the system was formulated like this and not as usually done - in the literature, please refer to the Notes section. + ``D`` is the finite difference matrix of order ``differences``. ``lam`` + represents the penalty weight for the smoothing. + For details on why the system is not formulated in a more efficient way, please + refer to the Notes section. Parameters ---------- - b_pen_weighted : ndarray of shape (m,) or (m, n) - The penalized-weighted right-hand side vector or matrix of the linear system - of equations given by ``(1.0 / lam) * W @ b``. - log_lam : float - The logarithm of the penalty weight lambda to use for the smoothing. - w_pen : ndarray of shape (m,) - The penalized weights to use for the linear system of equations given by - ``(1.0 / lam) * W``. - It must be a vector even if ``bw`` is a matrix because having ``bw`` as a - matrix is only possible if lambda is fixed and the same weight vector has - to be applied to all series + lam : float + The penalty weight lambda to use for the smoothing. + b_weighted : ndarray of shape (m,) or (m, n) + The weighted right-hand side vector or matrix of the linear system of + equations given by ``W @ b``. + w : float or ndarray of shape (m,) + The weights to use for the linear system of equations given in terms of the + main diagonal of the weight matrix ``W``. + It can either be a vector of weights for each data point or a single + scalar - namely ``1.0`` - if no weights are provided. Returns ------- @@ -363,29 +399,30 @@ def _solve( Notes ----- - Using the multiplication of the weight matrix ``W`` with the reciprocal of the - penalty weight lambda ``1.0 / lam`` is way more efficient because ``W`` only - possesses a single diagonal of non-zero elements while ``D.T @ D`` is a banded - matrix with at least 3 diagonals for ``differences >= 1``. ``D.T @ D`` is even - symmetric, so roughly 50% of the multiplications with ``D.T @ D`` would be - redundant. - Given a pre-computed ``(1.0) / lam * W``, the weighted right-hand side vector - ``(1.0 / lam) * W @ b`` is computed by element-wise multiplication. - So, instead of at least 3 * ``n_data`` only 2 * ``n_data`` multiplications are - required. If the number of bands in ``D.T @ D`` is 5 (``differences == 2``), the - number of multiplications is reduced by 60% already. + It might seem more efficient to solve the linear system ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` + because this only requires a multiplication of ``m`` weights with the reciprocal + of the penalty weight whereas the multiplication with ``D.T @ D`` requires + roughly ``m * (1 + 2 * differences)`` multiplications with ``m`` as the number + of data points and ``differences`` as the difference order. On top of that, + ``m * differences`` multiplications - so roughly 50% - would be redundant given + that the penalty ``D.T @ D`` matrix is symmetric. + However, NumPy's scalar multiplication is so highly optimized that the + multiplication with ``D.T @ D`` without considering symmetry is almost as fast + as the multiplication with the diagonal matrix ``W``, especially when compared + to the computational load of the banded solvers. """ # noqa: E501 # the banded storage format for the LAPACK LU decomposition is computed by - # updating the main diagonal of the penalty matrix with the penalized weights - ab = self._penalty_matb_.copy() - ab[self.differences_, ::] += w_pen + # scaling the penalty matrix with the penalty weight lambda and then adding the + # diagonal matrix with the weights + ab = lam * self._penalty_matb_ + ab[self.differences_, ::] += w # the linear system of equations is solved with the most efficient method # Case 1: Pentapy can be used if self._pentapy_enabled_: - x = self._solve_pentapy(ab=ab, b_pen_weighted=b_pen_weighted) + x = self._solve_pentapy(ab=ab, b_weighted=b_weighted) if np.isfinite(x).all(): return ( x, @@ -395,9 +432,7 @@ def _solve( # Case 2: LU decomposition (final fallback for pentapy) try: - x, lub_factorization = self._solve_pivoted_lu( - ab=ab, b_pen_weighted=b_pen_weighted - ) + x, lub_factorization = self._solve_pivoted_lu(ab=ab, b_weighted=b_weighted) return x, BandedSolvers.PIVOTED_LU, lub_factorization except np.linalg.LinAlgError: @@ -413,53 +448,296 @@ def _solve( f"e.g., binning or lowering the difference order." ) - @overload - def _get_penalized_weights(self, w: None) -> float: ... + ### Auxiliary Methods to prepare the data for the solver ### - @overload - def _get_penalized_weights(self, w: np.ndarray) -> np.ndarray: ... + def calc_wrss( + self, b: np.ndarray, b_smooth: np.ndarray, w: Union[float, np.ndarray] + ) -> float: + """ + Computes the (weighted) Sum of Squared Residuals (w)RSS between the original and + the smoothed series. - def _get_penalized_weights( - self, w: Optional[np.ndarray] - ) -> Union[float, np.ndarray]: """ - Computes the penalized weights to be used for the linear system of equations, - i.e., ``(1.0 / lam) * W`` where ``W`` is a diagonal matrix with the weights - ``w`` on the main diagonal. + # Case 1: no weights are provided + if isinstance(w, float): + return np.square(b - b_smooth).sum() + + # Case 2: weights are provided + return (w * np.square(b - b_smooth)).sum() + + def _calc_log_marginal_likelihood( + self, + factorization: _FactorizationForLogMarginalLikelihood, + log_lam: float, + lam: float, + b: np.ndarray, + b_smooth: np.ndarray, + w: Union[float, np.ndarray], + w_plus_penalty_plus_n_samples_term: float, + ) -> float: """ + Computes the log marginal likelihood for the automatic fitting of the penalty + weight lambda. For the definitions used (and manipulated here), please refer to + the Notes section. - # if no weights are provided, the penalized weights are simply the reciprocal of - # the penalty weight lambda - if w is None: - return 1.0 / self.lam_ # type: ignore + Parameters + ---------- + factorization : BandedLUFactorization + The factorization of the matrix to solve the linear system of equations, + i.e., ``W + lambda * D.T @ D`` from the description above. + Currently, only partially pivoted banded LU decompositions can be used to + compute the log marginal likelihood. + log_lam : float + The natural logarithm of the penalty weight lambda used for the smoothing. + lam : float + The penalty weight lambda used for the smoothing, i.e., ``exp(log_lam)``. + b, b_smooth : ndarray of shape (m,) + The original series and its smoothed counterpart. + w : float or ndarray of shape (m,) + The weights to use for the smoothing. + w_plus_penalty_plus_n_samples_term : float + The last term of the log marginal likelihood that is constant since it + involves the weights, the penalty matrix, and the number of data points + which are all constant themselves (see the Notes for details). + + Notes + ----- + The log marginal likelihood is given by: + + ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - ln(pseudo_det(lambda * D.T @ D)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` + + or better + + ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - (n - d) * ln(lambda) - ln(det(D @ D.T)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` + + where: + + - ``wRSS`` is the weighted Sum of Squared Residuals between the original and the + smoothed series, + - ``PSS`` is the Penalty Sum of Squares which is given by the sum of the squared + elements of the ``d``-th order forward finite differences of the smoothed + series, + - ``d`` is the difference order used for the smoothing. + - ``ln`` as the natural logarithm, + - ``pseudo_det(A)`` is the pseudo-determinant of the matrix ``A``, i.e., the + product of its non-zero eigenvalues, + - ``det(A)`` is the determinant of the matrix ``A``, i.e., the product of its + eigenvalues, + - ``W`` is the diagonal matrix with the weights on the main diagonal, + - ``D.T @ D`` is the squared forward finite differences matrix, and + - ``n`` is the number of data points in the series to smooth, + - ``n^`` is the number of data points with non-zero weights in the series to + smooth. + + It should be noted that ``pseudo_det(D.T @ D)`` is replaced by ``det(D @ D.T)`` + here because the latter is not rank-deficient. + + """ # noqa: E501 + + # first, the weighted Sum of Squared Residuals is computed ... + wrss = self.calc_wrss(b=b, b_smooth=b_smooth, w=w) + # ... followed by the Penalty Sum of Squares which requires the squared forward + # finite differences of the smoothed series + # NOTE: ``np.convolve`` is used to compute the forward finite differences and + # since it flips the provided kernel, an already flipped kernel is used + pss = ( + lam + * np.square( + np.convolve(b_smooth, self._diff_kernel_flipped_, mode="valid") + ).sum() + ) + + # besides the determinant of the combined left hand side matrix has to be + # computed from its decomposition + lhs_logdet_sign, lhs_logabsdet = slogdet_lu_banded( + lub_factorization=factorization, + ) + + # if the sign of the determinant is positive, the log marginal likelihood is + # computed and returned + if lhs_logdet_sign > 0.0: + return -0.5 * ( + wrss + + pss + - (b.size - self.differences_) * log_lam + + lhs_logabsdet + + w_plus_penalty_plus_n_samples_term + ) - # otherwise, the penalized weights are the product of the reciprocal of the - # penalty weight lambda and the weights - # NOTE: instead of using divisions, the weights are multiplied with the - # reciprocal of the penalty weight lambda which is less numerically - # accurate but way faster - return w * (1.0 / self.lam_) # type: ignore + # otherwise, if the determinant is negative, the system is extremely + # ill-conditioned and the log marginal likelihood cannot be computed + raise RuntimeError( + "\nThe determinant of the combined left hand side matrix " + "W + lambda * D.T @ D is negative, indicating that the system is extremely " + "ill-conditioned.\n" + "The log marginal likelihood cannot be computed.\n" + "Please consider reducing the number of data points to smooth by, e.g., " + "binning or lowering the difference order." + ) + + def _marginal_likelihood_objective( + self, + log_lam: float, + b: np.ndarray, + w: Union[float, np.ndarray], + w_plus_penalty_plus_n_samples_term: float, + ) -> float: + """ + The objective function to minimize for the automatic fitting of the penalty + weight lambda by maximizing the log marginal likelihood. + For the definition of the log marginal likelihood, please refer to the + description of the method :meth:`_calc_log_marginal_likelihood`. + + """ + + # first, the linear system of equations is solved with the given penalty weight + # lambda + lam = exp(log_lam) + + # Case 1: no weights are provided + if isinstance(w, float): + b_smooth, _, factorization = self._solve( + lam=lam, + b_weighted=b, + w=w, + ) + + # Case 2: weights are provided + else: + b_smooth, _, factorization = self._solve( + lam=lam, + b_weighted=b * w, + w=w, + ) + + # finally, the log marginal likelihood is computed and returned (negative since + # the objective function is minimized, but the log marginal likelihood is + # to be maximized) + return (-1.0) * self._calc_log_marginal_likelihood( + factorization=factorization, # type: ignore + log_lam=log_lam, + lam=lam, + b=b, + b_smooth=b_smooth, + w=w, + w_plus_penalty_plus_n_samples_term=w_plus_penalty_plus_n_samples_term, + ) + + ### Solver management methods ### def _solve_single_b_fixed_lam( self, b: np.ndarray, - w: Optional[np.ndarray], + w: Union[float, np.ndarray], + lam: Optional[float] = None, ) -> tuple[np.ndarray, float]: """ Solves for the Whittaker-like smoother solution for a single series with a fixed penalty weight lambda. - For the parameters, please refer to the documentation of ``_solve``. Instead of - a 2D-Array, a 1D-Array is expected for ``b`` and ``w``. + """ + + # if no value was provided for the penalty weight lambda, the respective class + # attribute is used instead + lam = self._lam_inter_.fixed_lambda if lam is None else lam + + # the weights and the weighted series are computed depending on whether weights + # are provided or not + # Case 1: no weights are provided + if isinstance(w, float): + return ( + self._solve( + lam=lam, + b_weighted=b, + w=w, + )[0], + lam, + ) + + # Case 2: weights are provided + return ( + self._solve( + lam=lam, + b_weighted=b * w, + w=w, + )[0], + lam, + ) + + def _solve_single_b_auto_lam_lml( + self, + b: np.ndarray, + w: Union[float, np.ndarray], + ) -> tuple[np.ndarray, float]: + """ + Solves for the Whittaker-like smoother solution for a single series with an + automatically fitted penalty weight lambda by maximizing the log marginal + likelihood. """ - # the penalized weights are computed - w_pen = self._get_penalized_weights(w=w) + # first, the constant terms of the log marginal likelihood are computed starting + # from the log pseudo-determinant of the weight matrix, i.e., the product of the + # non-zero elements of the weight vector + nnz_w = self.n_data_ + log_pseudo_det_w = 0.0 # ln(1**nnz_w) = 0.0 + if isinstance(w, np.ndarray): + nonzero_w_idxs = np.where(w > w.max() * self.__zero_weight_tol)[0] + nnz_w = nonzero_w_idxs.size + log_pseudo_det_w = np.log(w[nonzero_w_idxs]).sum() + + # the constant term of the log marginal likelihood is computed + w_plus_n_samples_term = ( + (nnz_w - self.differences_) * self.__LN_TWO_PI + - log_pseudo_det_w + - self._penalty_mat_log_pseudo_det_ + ) + + # unless the search space spans less than 1 decade, i.e., ln(10) ~= 2.3, a grid + # search is carried out to shrink the search space for the final optimization; + # the grid is spanned with an integer number of steps of half a decade + log_low_bound, log_upp_bound = self._lam_inter_.log_auto_bounds + bound_log_diff = log_upp_bound - log_low_bound + if bound_log_diff > self.__LN_TEN: + half_decade = 0.5 * self.__LN_TEN + target_best = float("inf") + n_steps = 1 + ceil(bound_log_diff / half_decade) # + # NOTE: the following ensures that the upper bound is not exceeded + step_size = bound_log_diff / (n_steps - 1) + + # all the trial values are evaluated and the best one is stored + for trial in range(0, n_steps): + log_lam_curr = log_low_bound + trial * step_size + target_curr = self._marginal_likelihood_objective( + log_lam=log_lam_curr, + b=b, + w=w, + w_plus_penalty_plus_n_samples_term=w_plus_n_samples_term, + ) + + if target_curr < target_best: + log_lam_best = log_lam_curr + target_best = target_curr + + # then, the bounds for the final optimization are shrunk to plus/minus half + # a decade around the best trial value + # NOTE: the following ensures that the bounds are not violated + log_low_bound = max(log_lam_best - half_decade, log_low_bound) + log_upp_bound = min(log_lam_best + half_decade, log_upp_bound) + + # the optimization of the log marginal likelihood is carried out + opt_res = minimize_scalar( + fun=self._marginal_likelihood_objective, + bounds=(log_low_bound, log_upp_bound), + args=(b, w, w_plus_n_samples_term), + method="bounded", + options={"xatol": 0.05}, + ) - # finally, the solution is returned together with the lambda parameter - return self._solve(b_pen_weighted=b * w_pen, w_pen=w_pen)[0], self.lam_ # type: ignore + # the optimal penalty weight lambda is returned together with the smoothed + # series + return self._solve_single_b_fixed_lam(b=b, w=w, lam=exp(opt_res.x)) def _solve_multiple_b( self, @@ -476,27 +754,32 @@ def _solve_multiple_b( """ - # the penalized weights are computed - w_pen = self._get_penalized_weights(w=w) - if isinstance(w_pen, float): - w_pen = np.array([w_pen], dtype=self.__dtype) - # then, the solution of the linear system of equations is computed for the # transposed series matrix (expected right-hand side format for the solvers) - # FIXME: ``w_pen`` somehow becomes an integer for the type checker - X_smooth, _, _ = self._solve( - b_pen_weighted=(X * w_pen[np.newaxis, ::]).transpose(), # type: ignore - w_pen=w_pen, # type: ignore - ) + # Case 1: no weights are provided + if w is None: + X_smooth, _, _ = self._solve( + lam=self._lam_inter_.fixed_lambda, + b_weighted=X.transpose(), + w=1.0, + ) + + # Case 2: weights are provided + else: + X_smooth, _, _ = self._solve( + lam=self._lam_inter_.fixed_lambda, + b_weighted=(X * w[np.newaxis, ::]).transpose(), + w=w, + ) return ( X_smooth.transpose(), - np.full(shape=(X.shape[0],), fill_value=self.lam_), # type: ignore + np.full(shape=(X.shape[0],), fill_value=self._lam_inter_.fixed_lambda), ) def _get_weight_generator( self, w: Optional[np.ndarray], n_series: int - ) -> Generator[Optional[np.ndarray], None, None]: + ) -> Generator[Union[float, np.ndarray], None, None]: """ Generates a generator that yields the weights for each series in a series matrix ``X``. @@ -506,7 +789,7 @@ def _get_weight_generator( # Case 1: No weights if w is None: for _ in range(n_series): - yield None + yield 1.0 # Case 2: 1D weights elif w.ndim == 1: @@ -518,11 +801,13 @@ def _get_weight_generator( for w_vect in w: yield w_vect + ### Main Solver Entry Point ### + def _whittaker_solve( self, X: np.ndarray, *, - w_vect: Optional[np.ndarray] = None, + w: Optional[np.ndarray] = None, use_same_w_for_all: bool = False, ) -> tuple[np.ndarray, np.ndarray]: """ @@ -561,73 +846,25 @@ def _whittaker_solve( # if multiple x with the same weights are to be solved for fixed lambda, this # can be done more efficiently by leveraging LAPACK'S (not pentapy's) ability to # perform multiple solves from the same inversion at once - if use_same_w_for_all: - return self._solve_multiple_b(X=X, w=w_vect) + if use_same_w_for_all and not self._lam_inter_.fit_auto: + return self._solve_multiple_b(X=X, w=w) # otherwise, the solution of the linear system of equations is computed for # each series # first, the smoothing method is specified depending on whether the penalty # weight lambda is fitted automatically or not - smooth_method = self._solve_single_b_fixed_lam - if self._auto_fit_lam_: - smooth_method_assignment = { - AutoSmoothMethods.LOG_MARGINAL_LIKELIHOOD: self._solve_single_b_fixed_lam, - } - smooth_method = smooth_method_assignment[self.lam_.method] # type: ignore + smooth_method_assignment = { + WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, + WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_lml, + } + smooth_method = smooth_method_assignment[self._lam_inter_.method_used] # then, the solution is computed for each series by means of a loop X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) - w_gen = self._get_weight_generator(w=w_vect, n_series=X.shape[0]) + w_gen = self._get_weight_generator(w=w, n_series=X.shape[0]) for iter_i, (x_vect, w_vect) in enumerate(zip(X, w_gen)): X_smooth[iter_i], lam[iter_i] = smooth_method(b=x_vect, w=w_vect) return X_smooth, lam - -if __name__ == "__main__": - - import time - - from matplotlib import pyplot as plt - - NOISE_STDDEV = 0.05 - N_DATA = 1000 - N_NOISE_REALIZATIONS = 10 - - x = np.linspace(0, 2 * np.pi, N_DATA) - np.random.seed(42) - y_singles = np.empty(shape=(N_NOISE_REALIZATIONS, N_DATA)) - noise_level = NOISE_STDDEV * (1 + 2 * np.abs(x - np.pi)) - for iter_i in range(N_NOISE_REALIZATIONS): - y_singles[iter_i, ::] = np.cos(x) + np.random.normal(scale=noise_level) - - y_stddev = y_singles.std(axis=0, ddof=1) - y = np.tile(y_singles.mean(axis=0)[np.newaxis, ::], reps=(2, 1)) - y += np.array([0.0, 1.0])[::, np.newaxis] - - start = time.time() - tt = WhittakerLikeSolver() - tt._setup_for_fit(n_data=x.size, lam=1e3, differences=1) - weights = 1.0 / np.square(y_stddev) - y_smooth, lam = tt._whittaker_solve( - X=y, - w_vect=np.array([weights, np.concatenate((weights[500:], weights[:500]))]), - use_same_w_for_all=False, - ) - print(f"Time: {(time.time() - start):.3f} seconds") - - fig, ax = plt.subplots() - - ax.plot(x, y.T, label="Original") - for idx in range(0, y.shape[0]): - ax.fill_between( - x, - y_smooth[idx, ::] - 2 * y_stddev, - y_smooth[idx, ::] + 2 * y_stddev, - alpha=0.5, - label="Confidence Interval", - ) - ax.plot(x, y_smooth.T, label="Smoothed") - - plt.show() From 8569b6f7f3088feb125e8c0aaf616f5269445995 Mon Sep 17 00:00:00 2001 From: MothNik Date: Wed, 1 May 2024 19:35:19 +0200 Subject: [PATCH 032/118] fix: fixed missing early returns --- chemotools/utils/models.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 21ef85db..c9b268c1 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -168,6 +168,8 @@ def __post_init__(self): self.fixed_lambda = float(self.bounds) self.fit_auto = False + return + # Case 2: a tuple of two values is provided elif isinstance(self.bounds, tuple): @@ -207,6 +209,8 @@ def __post_init__(self): self.fixed_lambda = float(upp_bound) self.fit_auto = False + return + # Case 3: the bounds are neither a scalar nor a tuple of two values raise TypeError( f"\nThe bounds for the penalty weight lambda have to be either a scalar " From 70145e0f04a085ff0978c66e902e1d34b7c60292 Mon Sep 17 00:00:00 2001 From: MothNik Date: Wed, 1 May 2024 19:35:49 +0200 Subject: [PATCH 033/118] fix: removed unused import --- chemotools/utils/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index c9b268c1..a0e73709 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -9,7 +9,7 @@ from dataclasses import dataclass, field from enum import Enum from math import log -from typing import Literal, Optional, Tuple, Union +from typing import Literal, Tuple, Union import numpy as np From bdc7c34be6e6686e3f2d1388778124e06181f92c Mon Sep 17 00:00:00 2001 From: MothNik Date: Wed, 1 May 2024 20:23:41 +0200 Subject: [PATCH 034/118] fix: simplified weight checker; fixed broken multi-signal-single-weight-functionality --- chemotools/utils/check_inputs.py | 9 ++++----- chemotools/utils/whittaker_base.py | 5 ++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index ac146ef9..ed64424e 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -20,14 +20,13 @@ def check_input(X, y=None, dtype: type | Literal["numeric"] | None = "numeric"): def check_weights( weights: np.ndarray | None, n_samples: int, n_features: int ) -> tuple[np.ndarray | None, bool]: - # if the weights are None, return None and a flag that the same weights should be - # applied for all samples + # if the weights are None, None is returned and a flag that the same weights should + # be applied for all samples if weights is None: return None, True - # else nothing - # if the weights are an effectively 1D-array, make them a 2D-array - if weights.ndim == 1 or (weights.ndim == 2 and weights.shape[0] == 1): + # if the weights are a 1D array, they are reshaped to a 2D array with one row + if weights.ndim == 1: weights_checked = weights.reshape((1, -1)) else: weights_checked = weights diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index c844df76..51df29b3 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -768,8 +768,8 @@ def _solve_multiple_b( else: X_smooth, _, _ = self._solve( lam=self._lam_inter_.fixed_lambda, - b_weighted=(X * w[np.newaxis, ::]).transpose(), - w=w, + b_weighted=(X * w).transpose(), + w=w[0, ::], ) return ( @@ -867,4 +867,3 @@ def _whittaker_solve( X_smooth[iter_i], lam[iter_i] = smooth_method(b=x_vect, w=w_vect) return X_smooth, lam - From 2664245cc7213b68eb594bf62d794cb8348ddd57 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 12:02:44 +0200 Subject: [PATCH 035/118] refactor: made log determinant sign computation less floating point dependent --- chemotools/utils/banded_linalg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 69b461f9..1cb34cd9 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -550,7 +550,7 @@ def slogdet_lu_banded( # product of L and the diagonal product of U, the calculation simplifies. As the # main diagonal of L is a vector of ones, only the diagonal product of U is required main_diag = lub_factorization.lub[lub_factorization.main_diag_row_idx, ::] - u_diaprod_sign = np.sign(main_diag).prod() + u_diag_sign_is_pos = np.count_nonzero(main_diag < 0.0) % 2 == 0 with np.errstate(divide="ignore", over="ignore"): logabsdet = np.log(np.abs(main_diag)).sum() @@ -569,7 +569,7 @@ def slogdet_lu_banded( # returned together with its sign if np.isneginf(logabsdet): return 0.0, logabsdet - elif u_diaprod_sign > 0.0: + elif u_diag_sign_is_pos: return sign, logabsdet return -sign, logabsdet From c72de74e4a0ad65454042255116f89b7f6dd7566 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 12:07:04 +0200 Subject: [PATCH 036/118] tests: added parallelizable, parametrized tests for utility functions; removed old utility tests --- tests/test_for_utils.py | 400 ------------ tests/test_for_utils/__init__.py | 0 tests/test_for_utils/test_banded_linalg.py | 222 +++++++ .../test_for_utils/test_finite_differences.py | 144 +++++ tests/test_for_utils/utils.py | 598 ++++++++++++++++++ 5 files changed, 964 insertions(+), 400 deletions(-) delete mode 100644 tests/test_for_utils.py create mode 100644 tests/test_for_utils/__init__.py create mode 100644 tests/test_for_utils/test_banded_linalg.py create mode 100644 tests/test_for_utils/test_finite_differences.py create mode 100644 tests/test_for_utils/utils.py diff --git a/tests/test_for_utils.py b/tests/test_for_utils.py deleted file mode 100644 index b01e6bcd..00000000 --- a/tests/test_for_utils.py +++ /dev/null @@ -1,400 +0,0 @@ -from typing import Literal - -import numpy as np -import pytest -from scipy.linalg import cholesky_banded, eigvals_banded, solve_banded, solveh_banded -from scipy.sparse import eye as speye - -from chemotools.utils.banded_linalg import ( - conv_to_lu_banded_storage, - lu_banded, - lu_solve_banded, - slodget_cho_banded, -) -from chemotools.utils.finite_differences import ( - calc_forward_diff_kernel, - calc_limit_max_eigval_fin_diff_mat, - forward_finite_diff_conv_matrix, - posdef_mod_squared_fw_fin_diff_conv_matrix, -) -from chemotools.utils.whittaker_base import WhittakerLikeSolver -from tests.fixtures import reference_finite_differences # noqa: F401 - - -def test_forward_diff_kernel( - reference_finite_differences: list[tuple[int, int, np.ndarray]] # noqa: F811 -) -> None: - # Arrange - for differences, _, reference in reference_finite_differences: - # Act - kernel = calc_forward_diff_kernel(differences=differences) - - # Assert - assert kernel.size == reference.size, ( - f"Difference order {differences} with accuracy 1 expected kernel size " - f"{reference.size} but got {kernel.size}" - ) - assert np.allclose(kernel, reference, atol=1e-8), ( - f"Difference order {differences} with accuracy 1 expected kernel " - f"{reference.tolist()} but got {kernel.tolist()}" - ) - - -@pytest.mark.parametrize("difference", list(range(0, 21))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) -def test_forward_finite_diff_conv_matrix(size: int, difference: int) -> None: - """Tests the generated convolution matrix for forward finite differences by - comparing it to NumPy's ``convolve``. - """ - - # the test is skipped if the kernel is too large - if difference + 1 > size: - pytest.skip( - f"Test skipped because the kernel size {difference + 1} is larger than the " - f"series size {size}." - ) - # else nothing - - # the random signal is generated - np.random.seed(seed=42) - series = np.random.rand(size) - - # the kernel is computed ... - kernel = calc_forward_diff_kernel(differences=difference) - # ... and the random series is convolved with the kernel ... - # NOTE: the kernel is flipped because of the way NumPy's convolve works - numpy_convolved_series = np.convolve(series, np.flip(kernel), mode="valid") - - # the convolution matrix is computed ... - conv_matrix = forward_finite_diff_conv_matrix( - differences=difference, series_size=series.size - ) - # ... and the series is convolved with the convolution matrix - matrix_convolved_series = conv_matrix @ series - - # the actual test is performed - assert conv_matrix.dtype == np.int64, ( - f"Convolution matrix for difference order {difference} with accuracy 1 for " - f"series of size {size} expected data type np.int64 but got " - f"{conv_matrix.dtype}." - ) - - assert np.allclose(matrix_convolved_series, numpy_convolved_series), ( - f"Differences by matrix product for difference order {difference} with " - f"accuracy 1 for series of size {size} failed." - ) - - -@pytest.mark.parametrize("with_finite_check", [True, False]) -@pytest.mark.parametrize("overwrite_b", [True, False]) -@pytest.mark.parametrize("n_rhs", [0, 1, 2]) -@pytest.mark.parametrize("n_upp_bands", [1, 2, 3, 4, 5, 6]) -@pytest.mark.parametrize("n_low_bands", [1, 2, 3, 4, 5, 6]) -@pytest.mark.parametrize( - "n_rows", [1, 2, 10, 11, 50, 51, 100, 101, 500, 501, 1_000, 1001, 5_000, 5001] -) -def test_lu_banded_solve( - n_rows: int, - n_low_bands: int, - n_upp_bands: int, - n_rhs: int, - overwrite_b: bool, - with_finite_check: bool, -) -> None: - """ - Tests the separate LU decomposition followed by solving a system of linear equations - for banded matrices. - - NOTE: A number of 0 right-hand sides are used for making the vector to solve a - NOTE: 1D-Array. - - """ - - # if the matrix cannot exist with the given shape, the test is skipped - n_rows_min = n_low_bands + n_upp_bands + 1 - if n_rows < n_rows_min: - pytest.skip( - f"Test skipped because the number of rows {n_rows} is smaller than the " - f"minimum number of rows {n_rows_min} required by the number of sub- " - f"{n_low_bands} and superdiagonals {n_upp_bands}." - ) - - # a random banded matrix and right-hand-side-vector/-matrix are generated - np.random.seed(seed=42) - ab = np.random.rand(n_low_bands + n_upp_bands + 1, n_rows) - b = np.random.rand(n_rows) if n_rhs == 0 else np.random.rand(n_rows, n_rhs) - - # first, the Scipy solution is computed because if this fails due to singularity, - # the test has to not test for equivalent results, but for failure - # NOTE: failure is indicated by the solution being ``None`` - # NOTE: this order of evaluation is also better for testing if the overwrite flag - # NOTE: is working correctly because otherwise SciPy would get the overwritten b - l_and_u = (n_low_bands, n_upp_bands) - x_ref = None - try: - x_ref = solve_banded( - l_and_u=l_and_u, - ab=ab, - b=b, - check_finite=True, - ) - - # NOTE: even if SciPy computes the solution "successfully", there might be NaNs - # NOTE: in the result, so the test has to check for that as well - if np.any(np.isnan(x_ref)): - x_ref = None - - except np.linalg.LinAlgError: - pass - - # the banded matrix is LU decomposed with the respective Chemotools function - lu_fact = lu_banded( - l_and_u=l_and_u, - ab=ab, - check_finite=with_finite_check, - ) - - # the linear system is solved with the respective Chemotools function - # Case 1: Scipy failed - if x_ref is None: - # in this case, the Chemotools function has to raise an exception as well - with pytest.raises(np.linalg.LinAlgError): - x = lu_solve_banded( - lub_factorization=lu_fact, - b=b, - overwrite_b=overwrite_b, - check_finite=with_finite_check, - ) - return - - # Case 2: Scipy succeeded - # in this case, the Chemotools function has to return the same result as Scipy - x = lu_solve_banded( - lub_factorization=lu_fact, - b=b, - overwrite_b=overwrite_b, - check_finite=with_finite_check, - ) - - # NOTE: the following check has to be fairly strict when it comes to equivalence - # NOTE: since the SciPy and Chemotools are basically doing the same under the hood - # NOTE: when it comes to the solution process (first LU, then triangular solve) - assert np.allclose(x, x_ref, atol=1e-10, rtol=1e-10) - - -@pytest.mark.parametrize("difference", list(range(0, 21))) -def test_limit_eigval_squ_fin_diff_mat(difference: int) -> None: - """Tests the computation of the limit of the maximum eigenvalue of the squared - forward finite difference matrix. - """ - - # the limit of the maximum eigenvalue is computed empirically - series_size = 10000 - eigval_max_empirical = calc_limit_max_eigval_fin_diff_mat(differences=difference) - # ... and compared to the reference value - squ_diff_mat = forward_finite_diff_conv_matrix( - differences=difference, series_size=series_size - ) - squ_diff_mat = squ_diff_mat.T @ squ_diff_mat - squ_diff_mat_b = conv_to_lu_banded_storage( - a=squ_diff_mat, l_and_u=(difference, difference) - ).astype(np.float64)[difference:, ::] - eigval_max_reference = eigvals_banded( - a_band=squ_diff_mat_b, - lower=True, - select="i", - select_range=(series_size - 1, series_size - 1), - )[0] - - assert np.isclose(eigval_max_empirical, eigval_max_reference), ( - f"Empirical limit of the maximum eigenvalue for difference order {difference} " - f"with accuracy 1 for series of size {series_size} failed." - ) - - -@pytest.mark.parametrize("difference", list(range(0, 21))) -@pytest.mark.parametrize( - "size", - np.arange(start=1, stop=1001, step=1).tolist() - + np.arange(start=1000, stop=100000, step=2500).tolist(), -) -def test_posdef_squ_fin_diff_conv_matrix(size: int, difference: int) -> None: - """Tests the generated convolution matrix for forward finite differences by - comparing it against itself after SciPy's ``solveh_banded`` has been applied. - """ - - # the test is skipped if the kernel is too large - if difference + 1 > size: - pytest.skip( - f"Test skipped because the kernel size {difference + 1} is larger than the " - f"series size {size}." - ) - # else nothing - - # the random signal is generated - min_eigval_size = 5000 - np.random.seed(seed=42) - series = np.random.rand(size) - - # this is solved against a finite difference matrix with an updated diagonal to - # ensure positive definiteness - l_and_u = (difference, difference) - max_eigval_mult = max(min_eigval_size, size) * np.finfo(np.float64).eps # type: ignore # noqa: E501 - fw_fin_diff_mat = forward_finite_diff_conv_matrix( - differences=difference, - series_size=size, - ) - squ_diff_mat = posdef_mod_squared_fw_fin_diff_conv_matrix( - fw_fin_diff_mat=fw_fin_diff_mat, - differences=difference, - dia_mod_matrix=None, - max_eigval_mult=max_eigval_mult, - dtype=np.float64, - ) - - assert squ_diff_mat.dtype == np.float64, ( # type: ignore - f"Convolution matrix for difference order {difference} with accuracy 1 for " - f"series of size {size} expected data type np.float64 but got " - f"{squ_diff_mat.dtype}." - ) - - # now, the linear system is solved - ab = conv_to_lu_banded_storage(a=squ_diff_mat, l_and_u=l_and_u) - x = solveh_banded( - ab=ab[difference:, ::], - b=series, - lower=True, - ) - - # NOTE: ``atol`` is set relatively high because the matrix is not very well - # conditioned and an approximate result is expected but also fine - assert np.allclose(series, squ_diff_mat @ x, atol=5e-4), ( - f"Positive definite squared convolution matrix for difference order " - f"{difference} with accuracy 1 for series of size {size} failed." - ) - - -@pytest.mark.parametrize("with_finite_check", [True, False]) -@pytest.mark.parametrize("difference", list(range(0, 11))) -@pytest.mark.parametrize("size", [1, 2, 10, 50, 100, 500, 1_000, 5_000]) -def test_cho_banded_slogdet( - size: int, difference: int, with_finite_check: bool -) -> None: - """Tests the computation of the sign and log determinant of a banded matrix from - its Cholesky decomposition by comparing it to NumPy's ``slogdet``. - """ - - # the test is skipped if the kernel is too large - if difference + 1 > size: - pytest.skip( - f"Test skipped because the kernel size {difference + 1} is larger than the " - f"series size {size}." - ) - - # a finite difference matrix is generated with an updated diagonal to - # ensure positive definiteness - l_and_u = (difference, difference) - fw_fin_diff_mat = forward_finite_diff_conv_matrix( - differences=difference, - series_size=size, - ) - a = posdef_mod_squared_fw_fin_diff_conv_matrix( - fw_fin_diff_mat=fw_fin_diff_mat, - differences=difference, - dia_mod_matrix=None, - max_eigval_mult=0.0, - dtype=np.float64, - ) - a += speye(size, dtype=np.int64) # type: ignore - - # it is converted to LU banded storage ... - ab = conv_to_lu_banded_storage(a=a, l_and_u=l_and_u).astype(np.float64) - # ... its Cholesky decomposition is computed ... - lower = False - chob = cholesky_banded( - ab=ab[0 : difference + 1, ::], lower=lower, check_finite=with_finite_check - ) - # ... and the sign and log determinant are determined - sign, logabsdet = slodget_cho_banded(decomposition=(chob, lower)) - - # the sign and log determinant are compared to the values obtained by - # NumPy's slogdet - np_sign, np_logabsdet = np.linalg.slogdet(a=a.toarray()) # type: ignore - - assert np.isclose(sign, np_sign), ( - f"Sign of log determinant for matrix of size {size} with {difference} sub- and " - f"superdiagonals failed." - ) - assert np.isclose(logabsdet, np_logabsdet), ( - f"Log determinant for matrix of size {size} with {difference} sub- and " - f"superdiagonals failed." - ) - - -@pytest.mark.parametrize("same_weights_for_all", [True, False]) -@pytest.mark.parametrize("with_weights", [True, False, "bad"]) -@pytest.mark.parametrize("with_pentapy", [True, False]) -@pytest.mark.parametrize("log10_lam", np.arange(-50.0, 170.0, step=20.0).tolist()) -@pytest.mark.parametrize("nrows", [1, 2]) -@pytest.mark.parametrize( - "size", [3, 11, 50, 100, 500, 1_000, 5_000, 10_000, 50_000, 100_000] -) -@pytest.mark.parametrize("difference", [2, 10]) -def test_whittaker_solve( - difference: int, - size: int, - nrows: int, - log10_lam: float, - with_pentapy: bool, - with_weights: bool | Literal["bad"], - same_weights_for_all: bool, -) -> None: - """Tests if the Whittaker smoothing still works for very low and large values of the - smoothing parameter combined with different numerically challenging weights. If it - survives this, arbitrary combinations can be considered safe. - """ - - # the test is skipped if the kernel is too large - if difference + 1 > size: - pytest.skip( - f"Test skipped because the kernel size {difference + 1} is larger than the " - f"series size {size}." - ) - # else nothing - - # a Whittaker solver is instantiated ... - whittaker_solver = WhittakerLikeSolver() - # ... pentapy is enabled if requested ... - whittaker_solver._WhittakerLikeSolver__allow_pentapy = with_pentapy # type: ignore - whittaker_solver._setup_for_fit( - series_size=size, - lam=10.0**log10_lam, - differences=difference, - ) - - # ... weights are generated ... - np.random.seed(seed=42) - if with_weights: - weights = np.random.rand(1, size) - if with_weights == "bad": - idxs = np.arange(start=0, stop=size, step=1, dtype=np.int64) - weights[0, np.random.choice(idxs, size=int(size / 2), replace=False)] = 0.0 - # else nothing - else: - weights = None - - if not same_weights_for_all and weights is not None: - weights = np.tile(weights.reshape((1, -1)), reps=(nrows, 1)) - # else nothing - - # ... and the linear system is solved - z = whittaker_solver._whittaker_solve( - X=np.random.rand(nrows, size), - w=weights, - use_same_w_for_all=same_weights_for_all, - )[0] - - assert np.all(np.isfinite(z)), ( - f"Whittaker solver for series of size {size} with smoothing parameter " - f"{10.0 ** log10_lam} and weights {weights} failed." - ) diff --git a/tests/test_for_utils/__init__.py b/tests/test_for_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_for_utils/test_banded_linalg.py b/tests/test_for_utils/test_banded_linalg.py new file mode 100644 index 00000000..f6e692ab --- /dev/null +++ b/tests/test_for_utils/test_banded_linalg.py @@ -0,0 +1,222 @@ +""" +Test suite for the utility functions in the :mod:`chemotools.utils.banded_linalg` +module. + +""" + +### Imports ### + +import numpy as np +import pytest +from scipy.linalg import eigvals_banded +from scipy.linalg import solve_banded as scipy_solve_banded + +from chemotools.utils.banded_linalg import ( + conv_upper_chol_banded_to_lu_banded_storage, + lu_banded, + lu_solve_banded, + slogdet_lu_banded, +) + +### Test Suite ### + + +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("overwrite_b", [True, False]) +@pytest.mark.parametrize("n_rhs", [0, 1, 2]) +@pytest.mark.parametrize("n_upp_bands", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize("n_low_bands", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize( + "n_rows", + [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 50, + 51, + 100, + 101, + 500, + 501, + 1_000, + 1001, + 5_000, + 5001, + ], +) +def test_lu_banded_solve( + n_rows: int, + n_low_bands: int, + n_upp_bands: int, + n_rhs: int, + overwrite_b: bool, + with_finite_check: bool, +) -> None: + """ + Tests the separate LU decomposition followed by solving a system of linear equations + for banded matrices against the SciPy solution. + + NOTE: A number of 0 right-hand sides are used for making the vector to solve a + NOTE: 1D-Array. + + """ + + # if the matrix cannot exist with the given shape, the test is skipped + n_rows_min = n_low_bands + n_upp_bands + 1 + if n_rows < n_rows_min: + pytest.skip( + f"Test skipped because the number of rows {n_rows} is smaller than the " + f"minimum number of rows {n_rows_min} required by the number of sub- " + f"{n_low_bands} and superdiagonals {n_upp_bands}." + ) + + # a random banded matrix and right-hand-side-vector/-matrix are generated + np.random.seed(seed=42) + ab = np.random.rand(n_low_bands + n_upp_bands + 1, n_rows) + b = np.random.rand(n_rows) if n_rhs == 0 else np.random.rand(n_rows, n_rhs) + + # first, the Scipy solution is computed because if this fails due to singularity, + # the test has to not test for equivalent results, but for failure + # NOTE: failure is indicated by the solution being ``None`` + # NOTE: this order of evaluation is also better for testing if the overwrite flag + # is working correctly because otherwise SciPy would get the overwritten b + l_and_u = (n_low_bands, n_upp_bands) + x_ref = None + try: + x_ref = scipy_solve_banded( + l_and_u=l_and_u, + ab=ab, + b=b, + check_finite=True, + ) + + # NOTE: even if SciPy computes the solution "successfully", there might be NaNs + # NOTE: in the result, so the test has to check for that as well + if np.any(np.isnan(x_ref)): + x_ref = None + + except np.linalg.LinAlgError: + pass + + # the banded matrix is LU decomposed with the respective Chemotools function + lu_fact = lu_banded( + l_and_u=l_and_u, + ab=ab, + check_finite=with_finite_check, + ) + + # the linear system is solved with the respective Chemotools function + # Case 1: Scipy failed + if x_ref is None: + # in this case, the Chemotools function has to raise an exception as well + with pytest.raises(np.linalg.LinAlgError): + x = lu_solve_banded( + lub_factorization=lu_fact, + b=b, + overwrite_b=overwrite_b, + check_finite=with_finite_check, + ) + return + + # Case 2: Scipy succeeded + # in this case, the Chemotools function has to return the same result as Scipy + x = lu_solve_banded( + lub_factorization=lu_fact, + b=b, + overwrite_b=overwrite_b, + check_finite=with_finite_check, + ) + + # NOTE: the following check has to be fairly strict when it comes to equivalence + # NOTE: since the SciPy and Chemotools are basically doing the same under the hood + # NOTE: when it comes to the solution process (first LU, then triangular solve) + assert np.allclose(x, x_ref, atol=1e-10, rtol=1e-10) + + +@pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("n_upp_low_bands", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize( + "n_rows", + [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 50, + 51, + 100, + 101, + 500, + 501, + 1_000, + 1_001, + 5_000, + 5_001, + ], +) +def test_lu_banded_slogdet( + n_rows: int, + n_upp_low_bands: int, + with_finite_check: bool, +) -> None: + """ + Tests the computation of the sign and log determinant of a banded matrix from + its LU decomposition by comparing it to NumPy's ``slogdet``. + + """ + + # if the matrix cannot exist with the given shape, the test is skipped + n_rows_min = 2 * n_upp_low_bands + 1 + if n_rows < n_rows_min: + pytest.skip( + f"Test skipped because the number of rows {n_rows} is smaller than the " + f"minimum number of rows {n_rows_min} required by the number of sub- " + f"{n_upp_low_bands} and superdiagonals {n_upp_low_bands}." + ) + + # a random banded matrix is generated in the upper banded storage used for Cholesky + # decomposition + np.random.seed(seed=42) + # NOTE: the following ensures that the matrix is diagonally dominant + ab_chol = np.abs(np.random.rand(n_upp_low_bands + 1, n_rows)) + ab_chol[n_upp_low_bands, ::] += 1.0 + 2.0 * float(n_upp_low_bands) + l_and_u, ab_lu = conv_upper_chol_banded_to_lu_banded_storage(ab=ab_chol) + + # first, the log determinant is computed with the literal definition as the sum of + # the logarithms of the eigenvalues of the matrix + eigvals_ref = eigvals_banded(a_band=ab_chol, lower=False, select="a") + if np.count_nonzero(eigvals_ref < 0.0) % 2 == 0: # type: ignore + sign_ref = 1.0 + else: + sign_ref = -1.0 + + with np.errstate(divide="ignore", over="ignore"): + logabsdet_ref = np.log(np.abs(eigvals_ref)).sum() + + # the banded matrix is LU decomposed with the respective Chemotools function ... + lu_fact = lu_banded( + l_and_u=l_and_u, + ab=ab_lu, + check_finite=with_finite_check, + ) + # ... and the sign and log determinant are computed + sign, logabsdet = slogdet_lu_banded(lub_factorization=lu_fact) + + # the results are compared + assert np.isclose(sign, sign_ref, atol=1e-5, rtol=1e-5) + assert np.isclose(logabsdet, logabsdet_ref, atol=1e-5, rtol=1e-5) diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py new file mode 100644 index 00000000..4d74886b --- /dev/null +++ b/tests/test_for_utils/test_finite_differences.py @@ -0,0 +1,144 @@ +""" +Test suite for the utility functions in the :mod:`chemotools.utils.finite_differences` +module. + +""" + +### Imports ### + +from typing import List, Tuple + +import numpy as np +import pytest + +from chemotools.utils.finite_differences import ( + calc_forward_diff_kernel, + gen_squ_fw_fin_diff_mat_cho_banded, +) +from tests.fixtures import reference_finite_differences # noqa: F401 +from tests.test_for_utils.utils import ( + conv_upper_cho_banded_storage_to_sparse, + multiply_vect_with_squ_fw_fin_diff_orig_first, + multiply_vect_with_squ_fw_fin_diff_transpose_first, +) + +### Test Suite ### + + +def test_forward_diff_kernel( + reference_finite_differences: List[Tuple[int, int, np.ndarray]] # noqa: F811 +) -> None: + # each kernel is calculated and compared to the reference + for differences, _, reference in reference_finite_differences: + kernel = calc_forward_diff_kernel(differences=differences) + + assert kernel.size == reference.size, ( + f"Difference order {differences} with accuracy 1 expected kernel size " + f"{reference.size} but got {kernel.size}" + ) + assert np.allclose(kernel, reference, atol=1e-8), ( + f"Difference order {differences} with accuracy 1 expected kernel " + f"{reference.tolist()} but got {kernel.tolist()}" + ) + + +@pytest.mark.parametrize( + "n_add_size", + list(range(0, 11)) + list(range(20, 101, 10)) + list(range(200, 1001, 100)), +) +@pytest.mark.parametrize("differences", list(range(1, 11))) +def test_squ_fw_fin_diff_mat_cho_banded_orig_first( + differences: int, n_add_size: int +) -> None: + """ + Tests the generation of the squared forward finite difference matrix ``D @ D.T`` + where ``D`` is the forward finite difference matrix. + Here, the original matrix ``D`` and not its transpose is used first. + + It can be effectively tested by means of a convolution of the matrix with a vector + after it was converted from the banded storage to a sparse matrix. + + """ + + # first, the finite difference kernel is calculated + kernel = calc_forward_diff_kernel(differences=differences) + + # then, the banded matrix D @ D.T is generated ... + n_data = kernel.size + n_add_size + ddt_banded = gen_squ_fw_fin_diff_mat_cho_banded( + n_data=n_data, + differences=differences, + orig_first=True, + ) + # ... and converted to a sparse matrix + ddt_sparse = conv_upper_cho_banded_storage_to_sparse(ab=ddt_banded) + + # a random vector is created + np.random.seed(42) + vector = np.random.rand(n_add_size + 1) + + # this vector is multiplied with the matrix + result = ddt_sparse @ vector + + # afterwards, the result is compared to the result of the convolution + result_conv = multiply_vect_with_squ_fw_fin_diff_orig_first( + differences=differences, + kernel=kernel, + vector=vector, + ) + + # the results are compared + # NOTE: the following check has to be fairly strict when it comes to equivalence + # since the NumPy and Chemotools are basically doing the same under the hood + assert np.allclose(result, result_conv, atol=1e-10, rtol=1e-10) + + +@pytest.mark.parametrize( + "n_add_size", + list(range(0, 11)) + list(range(20, 101, 10)) + list(range(200, 1001, 100)), +) +@pytest.mark.parametrize("differences", list(range(1, 11))) +def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( + differences: int, n_add_size: int +) -> None: + """ + Tests the generation of the squared forward finite difference matrix ``D.T @ D`` + where ``D`` is the forward finite difference matrix. + Here, the transpose matrix ``D.T`` and not the original matrix is used first. + + It can be effectively tested by means of a convolution of the matrix with a vector + after it was converted from the banded storage to a sparse matrix. + + """ + + # first, the finite difference kernel is calculated + kernel = calc_forward_diff_kernel(differences=differences) + + # then, the banded matrix D.T @ D is generated ... + n_data = kernel.size + n_add_size + dtd_banded = gen_squ_fw_fin_diff_mat_cho_banded( + n_data=n_data, + differences=differences, + orig_first=False, + ) + # ... and converted to a sparse matrix + dtd_sparse = conv_upper_cho_banded_storage_to_sparse(ab=dtd_banded) + + # a random vector is created + np.random.seed(42) + vector = np.random.rand(n_data) + + # this vector is multiplied with the matrix + result = dtd_sparse @ vector + + # afterwards, the result is compared to the result of the convolution + result_conv = multiply_vect_with_squ_fw_fin_diff_transpose_first( + differences=differences, + kernel=kernel, + vector=vector, + ) + + # the results are compared + # NOTE: the following check has to be fairly strict when it comes to equivalence + # since the NumPy and Chemotools are basically doing the same under the hood + assert np.allclose(result, result_conv, atol=1e-10, rtol=1e-10) diff --git a/tests/test_for_utils/utils.py b/tests/test_for_utils/utils.py new file mode 100644 index 00000000..5a929a60 --- /dev/null +++ b/tests/test_for_utils/utils.py @@ -0,0 +1,598 @@ +""" +This script implements utility functions required for testing the +:mod:`chemotools.utils` module. + +It contains doctests itself that are executed when running the script, but they are +automatically tested when running the whole test suite as well. This ensures that the +test utilities are working as expected as well. + +""" + +### Imports ### + +from typing import Tuple + +import numpy as np +from scipy.sparse import csr_matrix +from scipy.sparse import diags as sp_diags + +### Utility Functions ### + + +def conv_upper_cho_banded_storage_to_sparse( + ab: np.ndarray, +) -> csr_matrix: + """ + Converts a banded matrix stored in the upper banded storage used for LAPACK's banded + Cholesky decomposition to a sparse ``CSR`` matrix. + For more information on the banded storage, please see the documentation of + :func:`chemotools.utils.banded_linalg.conv_upper_chol_banded_to_lu_banded_storage`. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from numpy import nan + >>> from tests.test_for_utils.utils import conv_upper_cho_banded_storage_to_sparse + + >>> # Generating a set of test matrices + >>> # Matrix 1 + >>> ab = np.array( + ... [ + ... [nan, nan, 1., 2., 3.], + ... [nan, 4., 5., 6., 7.], + ... [ 8., 9., 10., 11., 12.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[ 8., 4., 1., 0., 0.], + [ 4., 9., 5., 2., 0.], + [ 1., 5., 10., 6., 3.], + [ 0., 2., 6., 11., 7.], + [ 0., 0., 3., 7., 12.]]) + + >>> # Matrix 2 + >>> ab = np.array( + ... [ + ... [nan, nan, nan, 1.], + ... [nan, nan, 2., 3.], + ... [nan, 4., 5., 6.], + ... [ 7., 8., 9., 10.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[ 7., 4., 2., 1.], + [ 4., 8., 5., 3.], + [ 2., 5., 9., 6.], + [ 1., 3., 6., 10.]]) + + >>> # Matrix 3 + >>> ab = np.array( + ... [ + ... [1., 2., 3., 4., 5.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[1., 0., 0., 0., 0.], + [0., 2., 0., 0., 0.], + [0., 0., 3., 0., 0.], + [0., 0., 0., 4., 0.], + [0., 0., 0., 0., 5.]]) + + >>> # Matrix 4 + >>> ab = np.array( + ... [ + ... [nan, 1.], + ... [ 2., 3.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[2., 1.], + [1., 3.]]) + + >>> # Matrix 5 + >>> ab = np.array( + ... [ + ... [nan, nan, nan, nan, nan, nan, nan, nan, nan, 1.], + ... [nan, nan, nan, nan, nan, nan, nan, nan, 2., 3.], + ... [nan, nan, nan, nan, nan, nan, nan, 4., 5., 6.], + ... [nan, nan, nan, nan, nan, nan, 7., 8., 9., 10.], + ... [nan, nan, nan, nan, nan, 11., 12., 13., 14., 15.], + ... [nan, nan, nan, nan, 16., 17., 18., 19., 20., 21.], + ... [nan, nan, nan, 22., 23., 24., 25., 26., 27., 28.], + ... [nan, nan, 29., 30., 31., 32., 33., 34., 35., 36.], + ... [nan, 37., 38., 39., 40., 41., 42., 43., 44., 45.], + ... [46., 47., 48., 49., 50., 51., 52., 53., 54., 55.], + ... ] + ... ) + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab).toarray() + array([[46., 37., 29., 22., 16., 11., 7., 4., 2., 1.], + [37., 47., 38., 30., 23., 17., 12., 8., 5., 3.], + [29., 38., 48., 39., 31., 24., 18., 13., 9., 6.], + [22., 30., 39., 49., 40., 32., 25., 19., 14., 10.], + [16., 23., 31., 40., 50., 41., 33., 26., 20., 15.], + [11., 17., 24., 32., 41., 51., 42., 34., 27., 21.], + [ 7., 12., 18., 25., 33., 42., 52., 43., 35., 28.], + [ 4., 8., 13., 19., 26., 34., 43., 53., 44., 36.], + [ 2., 5., 9., 14., 20., 27., 35., 44., 54., 45.], + [ 1., 3., 6., 10., 15., 21., 28., 36., 45., 55.]]) + + >>> conv_upper_cho_banded_storage_to_sparse(ab=ab[6::]).toarray() + array([[46., 37., 29., 22., 0., 0., 0., 0., 0., 0.], + [37., 47., 38., 30., 23., 0., 0., 0., 0., 0.], + [29., 38., 48., 39., 31., 24., 0., 0., 0., 0.], + [22., 30., 39., 49., 40., 32., 25., 0., 0., 0.], + [ 0., 23., 31., 40., 50., 41., 33., 26., 0., 0.], + [ 0., 0., 24., 32., 41., 51., 42., 34., 27., 0.], + [ 0., 0., 0., 25., 33., 42., 52., 43., 35., 28.], + [ 0., 0., 0., 0., 26., 34., 43., 53., 44., 36.], + [ 0., 0., 0., 0., 0., 27., 35., 44., 54., 45.], + [ 0., 0., 0., 0., 0., 0., 28., 36., 45., 55.]]) + + """ + + # the offset vector is initialised + n_diags, n_cols = ab.shape + n_diags -= 1 + main_diag_idx = n_diags + offsets = np.arange(start=-n_diags, stop=n_diags + 1, step=1, dtype=np.int64) + + # then, the list of diagonals is created + diagonals = [] + # the subdiagonals are added first ... + for offset in range(n_diags, 0, -1): + diagonals.append(ab[main_diag_idx - offset, offset:n_cols]) + + # ... followed by the main diagonal ... + diagonals.append(ab[main_diag_idx, ::]) + + # ... and finally the superdiagonals + for offset in range(1, n_diags + 1): + diagonals.append(ab[main_diag_idx - offset, offset:n_cols]) + + # the sparse matrix is created + return sp_diags( # type: ignore + diagonals=diagonals, + offsets=offsets, # type: ignore + shape=(n_cols, n_cols), + format="csr", + ) + + +def conv_lu_banded_storage_to_sparse( + ab: np.ndarray, l_and_u: Tuple[int, int] +) -> csr_matrix: + """ + Converts a banded matrix stored in the banded storage used for LAPACK's banded LU + decomposition into a sparse ``CSR`` matrix. + For more information on the banded storage, please see the documentation of + :func:`chemotools.utils.banded_linalg.conv_upper_chol_banded_to_lu_banded_storage`. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from numpy import nan + >>> from tests.test_for_utils.utils import conv_lu_banded_storage_to_sparse + + >>> # Generating a set of test matrices + >>> # Matrix 1 + >>> l_and_u = (1, 2) + >>> ab = np.array( + ... [ + ... [nan, nan, 1., 2., 3.], + ... [nan, 4., 5., 6., 7.], + ... [ 8., 9., 10., 11., 12.], + ... [13., 14., 15., 16., nan], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[ 8., 4., 1., 0., 0.], + [13., 9., 5., 2., 0.], + [ 0., 14., 10., 6., 3.], + [ 0., 0., 15., 11., 7.], + [ 0., 0., 0., 16., 12.]]) + + >>> # Matrix 2 + >>> l_and_u = (2, 1) + >>> ab = np.array( + ... [ + ... [nan, 1., 2., 3., 4.], + ... [ 5., 6., 7., 8., 9.], + ... [10., 11., 12., 13., nan], + ... [14., 15., 16., nan, nan], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[ 5., 1., 0., 0., 0.], + [10., 6., 2., 0., 0.], + [14., 11., 7., 3., 0.], + [ 0., 15., 12., 8., 4.], + [ 0., 0., 16., 13., 9.]]) + + >>> # Matrix 3 + >>> l_and_u = (0, 0) + >>> ab = np.array( + ... [ + ... [1., 2., 3., 4., 5.], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[1., 0., 0., 0., 0.], + [0., 2., 0., 0., 0.], + [0., 0., 3., 0., 0.], + [0., 0., 0., 4., 0.], + [0., 0., 0., 0., 5.]]) + + >>> # Matrix 5 + >>> l_and_u = (5, 4) + >>> ab = np.array( + ... [ + ... [nan, nan, nan, nan, 1., 2., 3., 4., 5.], + ... [nan, nan, nan, 6., 7., 8., 9., 10., 11.], + ... [nan, nan, 12., 13., 14., 15., 16., 17., 18.], + ... [nan, 19., 20., 21., 22., 23., 24., 25., 26.], + ... [27., 28., 29., 30., 31., 32., 33., 34., 35.], + ... [36., 37., 38., 39., 40., 41., 42., 43., nan], + ... [44., 45., 46., 47., 48., 49., 50., nan, nan], + ... [51., 52., 53., 54., 55., 56., nan, nan, nan], + ... [57., 58., 59., 60., 61., nan, nan, nan, nan], + ... [62., 63., 64., 65., nan, nan, nan, nan, nan], + ... ] + ... ) + >>> conv_lu_banded_storage_to_sparse(ab=ab, l_and_u=l_and_u).toarray() + array([[27., 19., 12., 6., 1., 0., 0., 0., 0.], + [36., 28., 20., 13., 7., 2., 0., 0., 0.], + [44., 37., 29., 21., 14., 8., 3., 0., 0.], + [51., 45., 38., 30., 22., 15., 9., 4., 0.], + [57., 52., 46., 39., 31., 23., 16., 10., 5.], + [62., 58., 53., 47., 40., 32., 24., 17., 11.], + [ 0., 63., 59., 54., 48., 41., 33., 25., 18.], + [ 0., 0., 64., 60., 55., 49., 42., 34., 26.], + [ 0., 0., 0., 65., 61., 56., 50., 43., 35.]]) + + >>> l_and_u = (1, 4) + >>> conv_lu_banded_storage_to_sparse(ab=ab[0:6, ::], l_and_u=l_and_u).toarray() + array([[27., 19., 12., 6., 1., 0., 0., 0., 0.], + [36., 28., 20., 13., 7., 2., 0., 0., 0.], + [ 0., 37., 29., 21., 14., 8., 3., 0., 0.], + [ 0., 0., 38., 30., 22., 15., 9., 4., 0.], + [ 0., 0., 0., 39., 31., 23., 16., 10., 5.], + [ 0., 0., 0., 0., 40., 32., 24., 17., 11.], + [ 0., 0., 0., 0., 0., 41., 33., 25., 18.], + [ 0., 0., 0., 0., 0., 0., 42., 34., 26.], + [ 0., 0., 0., 0., 0., 0., 0., 43., 35.]]) + + >>> l_and_u = (2, 1) + >>> conv_lu_banded_storage_to_sparse(ab=ab[3:7, ::], l_and_u=l_and_u).toarray() + array([[27., 19., 0., 0., 0., 0., 0., 0., 0.], + [36., 28., 20., 0., 0., 0., 0., 0., 0.], + [44., 37., 29., 21., 0., 0., 0., 0., 0.], + [ 0., 45., 38., 30., 22., 0., 0., 0., 0.], + [ 0., 0., 46., 39., 31., 23., 0., 0., 0.], + [ 0., 0., 0., 47., 40., 32., 24., 0., 0.], + [ 0., 0., 0., 0., 48., 41., 33., 25., 0.], + [ 0., 0., 0., 0., 0., 49., 42., 34., 26.], + [ 0., 0., 0., 0., 0., 0., 50., 43., 35.]]) + + """ + + # the offset vector is initialised + n_low_bands, n_upp_bands = l_and_u + main_diag_idx = n_upp_bands + n_cols = ab.shape[1] + offsets = np.arange( + start=-n_low_bands, + stop=n_upp_bands + 1, + step=1, + dtype=np.int64, + ) + + # then, the list of diagonals is created + diagonals = [] + # the subdiagonals are added first ... + for offset in range(n_low_bands, 0, -1): + diagonals.append(ab[main_diag_idx + offset, 0 : n_cols - offset]) + + # ... followed by the main diagonal ... + diagonals.append(ab[main_diag_idx, ::]) + + # ... and finally the superdiagonals + for offset in range(1, n_upp_bands + 1): + diagonals.append(ab[main_diag_idx - offset, offset:n_cols]) + + # the matrix is created from the diagonals + return sp_diags( # type: ignore + diagonals=diagonals, + offsets=offsets, # type: ignore + shape=(n_cols, n_cols), + format="csr", + ) + + +def multiply_vect_with_squ_fw_fin_diff_orig_first( + differences: int, + kernel: np.ndarray, + vector: np.ndarray, +) -> np.ndarray: + """ + Multiplies a vector with the squared forward finite difference matrix ``D @ D.T`` + where ``D`` is the forward finite difference matrix. + Here, the original matrix ``D`` and not its transpose is used first. + + This is the same operation as a convolution with the flipped kernel after zero- + padding the vector. Then, the result is again convolved with the kernel, but this + time there is neither zero-padding nor flipping involved. + ``y = D.T @ x`` is the zero-padding and flipping operation, and ``D @ y`` is the + convolution without zero-padding and flipping. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from tests.test_for_utils.utils import ( + ... multiply_vect_with_squ_fw_fin_diff_orig_first, + ... ) + + >>> # Test 1 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([1, 2]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([0, 3]) + + >>> # Test 2 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([-10, 3, 11]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-23, 5, 19]) + + >>> # Test 3 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([ 25, 17, -13, -12]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 33, 22, -31, -11]) + + >>> # Test 4 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([1, 2, 3]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 1, -4, 11]) + + >>> # Test 5 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([-10, 3, 11, 27]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-61, 41, -64, 121]) + + >>> # Test 6 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 69, 42, -35, -155, 263]) + + >>> # Test 7 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([1, 2, 3, 4]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 4, 4, -24, 46]) + + >>> # Test 8 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([-10, 3, 11, 27, -5]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-206, 212, -320, 478, -442]) + + >>> # Test 9 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38, -8]) + >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 179, 50, 51, -586, 965, -789]) + + + """ + + # first, the zero-padded vector is convolved with the flipped kernel + vector_padded = np.pad( + vector, + pad_width=(differences, differences), + mode="constant", + constant_values=0, + ) + # NOTE: since NumPy already flips the kernel internally, there is no need to flip it + vector_conv = np.convolve(vector_padded, kernel, mode="valid") + + # then, the result is convolved with the kernel + # NOTE: here, the kernel has to be flipped to counteract NumPy's internal flipping + return np.convolve(vector_conv, np.flip(kernel), mode="valid") + + +def multiply_vect_with_squ_fw_fin_diff_transpose_first( + differences: int, + kernel: np.ndarray, + vector: np.ndarray, +) -> np.ndarray: + """ + Multiplies a vector with the squared forward finite difference matrix ``D.T @ D`` + where ``D`` is the forward finite difference matrix. + Here, the transpose matrix ``D.T`` and not the original matrix is used first. + + This is the same operation as a convolution with the kernel followed by another + convolution with the flipped kernel with an intermediate zero-padding. + ``y = D @ x`` is the convolution with the kernel, and ``D.T @ y`` is the convolution + with the flipped kernel and zero-padding. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from tests.test_for_utils.utils import ( + ... multiply_vect_with_squ_fw_fin_diff_transpose_first, + ... ) + + >>> # Test 1 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([1, 2]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-1, 1]) + + >>> # Test 2 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([-10, 3, 11]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-13, 5, 8]) + + >>> # Test 3 + >>> differences = 1 + >>> kernel = np.array([-1, 1]) + >>> vector = np.array([ 25, 17, -13, -12]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ 8, 22, -31, 1]) + + >>> # Test 4 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([1, 2, 3]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([0, 0, 0]) + + >>> # Test 5 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([-10, 3, 11, 27]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ -5, 18, -21, 8]) + + >>> # Test 6 + >>> differences = 2 + >>> kernel = np.array([1, -2, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([-22, 75, -35, -67, 49]) + + >>> # Test 7 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([1, 2, 3, 4]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([0, 0, 0, 0]) + + >>> # Test 8 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([-10, 3, 11, 27, -5]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ -13, 95, -207, 181, -56]) + + >>> # Test 9 + >>> differences = 3 + >>> kernel = np.array([-1, 3, -3, 1]) + >>> vector = np.array([ 25, 17, -13, -12, 38, -8]) + >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + ... differences=differences, + ... kernel=kernel, + ... vector=vector, + ... ) + array([ -53, 141, 40, -436, 453, -145]) + + """ + + # first, the vector is convolved with the kernel + # NOTE: here, the kernel has to be flipped to counteract NumPy's internal flipping + vector_conv = np.convolve(vector, np.flip(kernel), mode="valid") + + # then, the result is convolved with the flipped kernel and zero-padded + vector_padded = np.pad( + vector_conv, + pad_width=(differences, differences), + mode="constant", + constant_values=0, + ) + # NOTE: since NumPy already flips the kernel internally, there is no need to flip it + return np.convolve(vector_padded, kernel, mode="valid") + + +### Doctests ### + +if __name__ == "__main__": # pragma: no cover + import doctest + + doctest.testmod() From 50ec3979f58d61dcdfd5bf7ce2408d577383d0f2 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 12:31:23 +0200 Subject: [PATCH 037/118] feat: included doctests into `pytest` tests --- pytest.ini | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..2bed0f3a --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --doctest-modules \ No newline at end of file From 22476c3fbca5d563a3275e43ceec7d8e364b41d2 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 12:48:31 +0200 Subject: [PATCH 038/118] tests: wrote separate doctested utility function for banded log-determinant; covered more complete range for banded linalg tests --- tests/test_for_utils/test_banded_linalg.py | 33 +++--- tests/test_for_utils/utils.py | 123 +++++++++++++++++++++ 2 files changed, 141 insertions(+), 15 deletions(-) diff --git a/tests/test_for_utils/test_banded_linalg.py b/tests/test_for_utils/test_banded_linalg.py index f6e692ab..7fe84ed2 100644 --- a/tests/test_for_utils/test_banded_linalg.py +++ b/tests/test_for_utils/test_banded_linalg.py @@ -8,7 +8,6 @@ import numpy as np import pytest -from scipy.linalg import eigvals_banded from scipy.linalg import solve_banded as scipy_solve_banded from chemotools.utils.banded_linalg import ( @@ -17,6 +16,7 @@ lu_solve_banded, slogdet_lu_banded, ) +from tests.test_for_utils.utils import get_banded_slogdet ### Test Suite ### @@ -80,7 +80,7 @@ def test_lu_banded_solve( # a random banded matrix and right-hand-side-vector/-matrix are generated np.random.seed(seed=42) - ab = np.random.rand(n_low_bands + n_upp_bands + 1, n_rows) + ab = -1.0 + 2.0 * np.random.rand(n_low_bands + n_upp_bands + 1, n_rows) b = np.random.rand(n_rows) if n_rhs == 0 else np.random.rand(n_rows, n_rhs) # first, the Scipy solution is computed because if this fails due to singularity, @@ -142,6 +142,7 @@ def test_lu_banded_solve( @pytest.mark.parametrize("with_finite_check", [True, False]) +@pytest.mark.parametrize("ensure_posdef", [True, False]) @pytest.mark.parametrize("n_upp_low_bands", [1, 2, 3, 4, 5, 6]) @pytest.mark.parametrize( "n_rows", @@ -172,6 +173,7 @@ def test_lu_banded_solve( def test_lu_banded_slogdet( n_rows: int, n_upp_low_bands: int, + ensure_posdef: bool, with_finite_check: bool, ) -> None: """ @@ -192,26 +194,27 @@ def test_lu_banded_slogdet( # a random banded matrix is generated in the upper banded storage used for Cholesky # decomposition np.random.seed(seed=42) - # NOTE: the following ensures that the matrix is diagonally dominant - ab_chol = np.abs(np.random.rand(n_upp_low_bands + 1, n_rows)) - ab_chol[n_upp_low_bands, ::] += 1.0 + 2.0 * float(n_upp_low_bands) - l_and_u, ab_lu = conv_upper_chol_banded_to_lu_banded_storage(ab=ab_chol) + # NOTE: the diagonal lifting ensures that the matrix is positive and diagonally + # dominant, which makes it positive definite, but this is only done if the + # flag is set + # NOTE: for an indefinite matrix, the matrix is shifted and scaled to be in the + # interval [-1, 1] + ab_for_chol = np.random.rand(n_upp_low_bands + 1, n_rows) + if ensure_posdef: + ab_for_chol[n_upp_low_bands, ::] += 1.0 + 2.0 * float(n_upp_low_bands) + else: + ab_for_chol = -1.0 + 2.0 * ab_for_chol + + l_and_u, ab_for_lu = conv_upper_chol_banded_to_lu_banded_storage(ab=ab_for_chol) # first, the log determinant is computed with the literal definition as the sum of # the logarithms of the eigenvalues of the matrix - eigvals_ref = eigvals_banded(a_band=ab_chol, lower=False, select="a") - if np.count_nonzero(eigvals_ref < 0.0) % 2 == 0: # type: ignore - sign_ref = 1.0 - else: - sign_ref = -1.0 - - with np.errstate(divide="ignore", over="ignore"): - logabsdet_ref = np.log(np.abs(eigvals_ref)).sum() + sign_ref, logabsdet_ref = get_banded_slogdet(ab=ab_for_chol) # the banded matrix is LU decomposed with the respective Chemotools function ... lu_fact = lu_banded( l_and_u=l_and_u, - ab=ab_lu, + ab=ab_for_lu, check_finite=with_finite_check, ) # ... and the sign and log determinant are computed diff --git a/tests/test_for_utils/utils.py b/tests/test_for_utils/utils.py index 5a929a60..8019f37b 100644 --- a/tests/test_for_utils/utils.py +++ b/tests/test_for_utils/utils.py @@ -13,6 +13,7 @@ from typing import Tuple import numpy as np +from scipy.linalg import eigvals_banded from scipy.sparse import csr_matrix from scipy.sparse import diags as sp_diags @@ -590,6 +591,128 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( return np.convolve(vector_padded, kernel, mode="valid") +def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: + """ + Computes the sign and the logarithm of the determinant of a banded matrix stored + in the upper banded storage used for LAPACK's banded Cholesky decomposition. + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> from tests.test_for_utils.utils import ( + ... conv_upper_cho_banded_storage_to_sparse, + ... get_banded_slogdet, + ... ) + + >>> # Generating a set of test matrices + >>> np.random.seed(42) + + >>> # Matrix 1 (positive definite) + >>> semi_bw_plus_one = 3 + >>> # NOTE: the diagonal lifting makes the matrix positive definite + >>> ab_for_chol = np.random.rand(semi_bw_plus_one, 100) + >>> ab_for_chol[semi_bw_plus_one - 1, ::] += 1.0 + 2.0 * float(semi_bw_plus_one) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, 200.55218150013826) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, 200.55218150013826) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 2 (positive definite) + >>> semi_bw_plus_one = 5 + >>> ab_for_chol = np.random.rand(semi_bw_plus_one, 1000) + >>> ab_for_chol[semi_bw_plus_one - 1, ::] += 1.0 + 2.0 * float(semi_bw_plus_one) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, 2432.2672133727287) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, 2432.267213372733) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 3 (positive definite) + >>> semi_bw_plus_one = 1 + >>> ab_for_chol = np.random.rand(semi_bw_plus_one, 5000) + >>> ab_for_chol[semi_bw_plus_one - 1, ::] += 1.0 + 2.0 * float(semi_bw_plus_one) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, 6234.8131295042585) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, 6234.8131295042585) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 4 (indefinite) + >>> semi_bw_plus_one = 2 + >>> ab_for_chol = -1.0 + 2.0 * np.random.rand(semi_bw_plus_one, 1000) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (-1.0, -437.7731132082764) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (-1.0, -437.7731132082757) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + >>> # Matrix 5 (indefinite) + >>> semi_bw_plus_one = 1 + >>> ab_for_chol = -1.0 + 2.0 * np.random.rand(semi_bw_plus_one, 5000) + >>> # the sign and the log determinant are computed by the utility function ... + >>> sign, logabsdet = get_banded_slogdet(ab=ab_for_chol) + >>> sign, logabsdet + (1.0, -5001.0078551404185) + >>> # ... and by NumPy's dense log determinant function for comparison + >>> ab_dense = conv_upper_cho_banded_storage_to_sparse(ab=ab_for_chol).toarray() + >>> sign_ref, logabsdet_ref = np.linalg.slogdet(ab_dense) + >>> sign_ref, logabsdet_ref + (1.0, -5001.007855140422) + >>> np.isclose(sign, sign_ref) + True + >>> np.isclose(logabsdet, logabsdet_ref) + True + + """ + # since the log determinant can be expressed as the sum of the logarithms of the + # absolute eigenvalues, an eigenvalue evaluation is sufficient to determine the + # sign and the log determinant + eigvals = eigvals_banded(a_band=ab, lower=False, select="a") + if np.count_nonzero(eigvals < 0.0) % 2 == 0: # type: ignore + sign = 1.0 + else: + sign = -1.0 + + with np.errstate(divide="ignore", over="ignore"): + logabsdet = np.log(np.abs(eigvals)).sum() # type: ignore + + return sign, logabsdet + + ### Doctests ### if __name__ == "__main__": # pragma: no cover From d208e0d0262b7952ad8af8f98e8ed1b6873a4979 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 12:49:52 +0200 Subject: [PATCH 039/118] fix: removed now wrong Whittaker implementation details --- .../utils/WHITTAKER_IMPLEMENTATION_DETAILS.md | 200 ------------------ 1 file changed, 200 deletions(-) delete mode 100644 chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md diff --git a/chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md b/chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md deleted file mode 100644 index a5b3eea9..00000000 --- a/chemotools/utils/WHITTAKER_IMPLEMENTATION_DETAILS.md +++ /dev/null @@ -1,200 +0,0 @@ -# 🧮 Implementation of the Whittaker Henderson smoothing algorithm - -The internal linear system to be solved for the Whittaker Henderson smoothing algorithm -is given by the following set of equations:
- -$\mathbf{z}=(\mathbf{W}+\tau\cdot\mathbf{D}_{m}^{T}\mathbf{D}_{m})^{-1}\mathbf{W}\mathbf{y}$ - -where $\mathbf{z}$ is the vector of smoothed values, $\mathbf{W}$ is the diagonal matrix -of weights, $\tau$ is the smoothing parameter, $\mathbf{D}_{m}$ is the ${m}$-th order -forward finite difference matrix, and $\mathbf{y}$ is the vector of observed values. -The matrix $P=\mathbf{D}_{m}^{T}\mathbf{D}_{m}$ is often referred to as the -*penalty matrix*.

-The matrix to invert is symmetric, sparse, banded with ${2\cdot m + 1}$ non-zero -diagonals, and positive definite, i.e., all its eigenvalues are strictly positive -($>0$). From an algorithmic point of view, this means that the inversion can be performed -in ${\mathcal{O}\left(n\right)}$ time, where ${n}$ is the number of observations, by -using a banded Cholesky decomposition:
- -$\mathbf{W}+\tau\cdot\mathbf{P}=\mathbf{L}\mathbf{L}^T$ - -where $\mathbf{L}$ is a lower triangular matrix which shares the same sparsity pattern -as $\mathbf{W}+\tau\cdot\mathbf{P}$. Inversion of a lower triangular matrix is trivial -when backward and forward substitution are used. - -## ⚠️ Problem - -However, all this is only true from a mathematical point of view. In practice, -floating point arithmetics introduce numerical errors which can lead to an indefinite -matrix. In this case, the Cholesky decomposition fails and the algorithm cannot be used. -This happens for relatively small $n$ already when $m$ exceeds 4, but in spectroscopy -$n > 1000$ is not uncommon and $m = 6$ has been shown helpful in deriving an additional -set of weights fro $\tau$ to make the smooth spatially adaptive.
-Besides, there is another problem. The penalty matrix -$\mathbf{P}$ alone is only positive semi-definite since it possesses $m$ zero -eigenvalues by design. From a mathematical perspective, this would not pose a problem -since $\mathbf{W}+\tau\cdot\mathbf{P}$ would still be positive definite as $\tau$ tends -to $+\infty$. Numerically, this is by far not the case because $\tau$-values that are an -order of $10^{16}$ greater than the order of the weights are already sufficient to make -the matrix positive semi-definite or even indefinite if some of the small eigenvalues go -negative in the calculations (for 64-bit float precision).
-On the other hand, as $\tau$ tends to $0$, the matrix can also become ill-conditioned -as well if some weights are numerically zero due to zero division.

-All in all, the banded Cholesky decomposition is not a robust algorithm for solving the -system of linear equations and even pivoted LU-decomposition suffers from the same -problems - even though it can withstand a few more orders of magnitude in $\tau$. - -## 💡 Solution - -One way out of this dilemma is to make the matrix positive definite by adding a small -positive constant to the main diagonal: - -$\mathbf{W}+\tau\cdot\mathbf{P}+\epsilon\cdot\mathbf{I}$ - -where $\epsilon$ is a small positive constant and $\mathbf{I}$ is the identity matrix. -Despite its simplicity, this approach requires that $\epsilon$ is determined at runtime -which can be costly because it depends on the eigenvalues of $\mathbf{W}$, the -eigenvalues of $\mathbf{P}$, as well as $\tau$.
-Therefore, both $\mathbf{W}$ and $\mathbf{P}$ are made positive -definite by adding a small positive constant to their main diagonal before the -decomposition is performed: - -$\mathbf{W}+\epsilon_{w}\cdot\mathbf{I}+\tau\cdot\left(\mathbf{P}+\epsilon_{p}\cdot\mathbf{I}\right)=\mathbf{L}\mathbf{L}^T$ - -Now, the only thing that remains to be done is to determine $\epsilon_{w}$ and -$\epsilon_{p}$ at runtime in an efficient manner that does not require the calculation -of any of the eigenvalues because this would be too costly. On top of that, if -approximations are used, they need to be as close as possible to the actual values, -because too large values of $\epsilon_{w}$ and $\epsilon_{p}$ can obscure the smoothing -effect while too small values can lead to numerical instabilities. A typically applied -way of scaling looks like - -$\epsilon_{a}=\varepsilon\cdot n\cdot\lambda_{max}\left(\mathbf{A}\right)$ - -where $\varepsilon$ is the floating point machine imprecision, $n$ is the number of -observations, and $\lambda_{max}\left(\mathbf{A}\right)$ is the largest eigenvalue of -the matrix $\mathbf{A}$ in question.
-This scaling is used, e.g., in ``numpy.linalg.lstsq`` where singular values that are -numerically zero need to be removed (it is used as a threshold there). - -### 🏋️ Determination of the weight $\epsilon_{w}$ - -The largest eigenvalue of $\mathbf{W}$ is given by the largest weight since it is a -diagonal matrix. Therefore, $\epsilon_{w}$ can be determined by - -$\epsilon_{w}=\varepsilon\cdot n\cdot\max\left(diag\left(\mathbf{W}\right)\right)$ - -where $diag\left(\mathbf{W}\right)$ is the vector of diagonal elements of $\mathbf{W}$ -and $max$ extracts the maximum value. This is trivial and efficient to calculate. - -### ☄️ Determination of the penalty $\epsilon_{p}$ - -Finding the largest eigenvalue of $\mathbf{D}_{m}^{T}\mathbf{D}_{m}$ is more -complicated. However, some simulations have shown that the limit value of the largest -eigenvalue is given by - -$\lim_{n \to \infty} \lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{D}_{m}\right)=4^{m}$ - -which appears to be a strict upper limit and thus perfectly suited for the scaling -factor - -$\epsilon_{p}=\varepsilon\cdot n\cdot 4^{m}$ - -Such an approximation is also cheap to compute, thereby making the algorithm both -efficient and robust.
-❗❗❗
-Due to the power of $m$, this approximation scales badly with increasing $m$ and -$n$. It is therefore recommended not to use $m > 6$. Probably also $n$ needs to be -limited in the future by running multiple smooths on subsets of the data and then -combining the results.
-❗❗❗ - -### 🧑‍💻 Final Implementation - -The updated weights matrix is then given by - -$\mathbf{W^{+}}=\mathbf{W}+\epsilon_{1}\cdot\mathbf{I}=\mathbf{W}+\varepsilon\cdot max\left(n, 10\right)\cdot\max\left(diag\left(\mathbf{W}\right)\right)\cdot\mathbf{I}$ - -where the $max$-operator was included to prevent $\epsilon_{1}$ from becoming too small. -Lifting the weights is not a problem because weights that need to be lifted will still -be negligible compared to the other weights afterwards.
-Analogously, the updated penalty matrix is given by - -$\mathbf{P}^{+}=\mathbf{P}+\epsilon_{2}\cdot\mathbf{I}=\mathbf{P}+\varepsilon\cdot max\left(n, 10\right)\cdot 4^{m}\cdot\mathbf{I}$ - -From a mathematical point of view, this approach introduces a second penalty term which -is the classical Tikhonov regularization term. Yet, this term is very small and -therefore virtually negligible compared to the actual derivative penalty term.
-Nevertheless, the Tikhonov regularization term will penalize large absolute values of -the smoothed values $\mathbf{z}$ which is not desirable since this will pull -$\mathbf{z}$ towards zero. To resolve this, the weighted average of the original values -$\mathbf{y}$ is subtracted before the smoothing is performed and added again afterwards: - -$\bar{y}=\frac{\sum_{i=1}^{n}w_{i}\cdot y_{i}}{\sum_{i=1}^{n}w_{i}}$
-$\mathbf{z}=\bar{y}+\left(\mathbf{W^{+}}+\tau\cdot\mathbf{P^{+}}\right)^{-1}\mathbf{W^{+}}\left(\mathbf{y}-\bar{y}\right)$ - -Consequently, $\mathbf{z}$ is pulled towards the weighted average of the original values -$\mathbf{y}$ instead of zero as $\tau$ tends to $+\infty$ which is way more desirable -(note that as $\tau$ tends to $+\infty$ $\mathbf{z}$ becomes a flat line anyway and -making it become the weighted average of $\mathbf{y}$ is mathematically sound). - -## 🏄 Extensions - -To make the smoothing spatially adaptive, the smoothing parameter $\tau$ is replaced by -a individual smoothing parameters $\tau_{i}$ for each observation $y_{i}$. -Mathematically, this is equivalent to - -$\mathbf{z}=\left(\mathbf{W}+\tau\cdot\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right)^{-1}\mathbf{W}\mathbf{y}$ - -where $\mathbf{M}$ is a diagonal matrix of smoothing parameter weights.
-Now, the determination of $\epsilon_{p}$ becomes more complicated because the -eigenvalues of $\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}$ are not known. However, they -can be estimated via the spectral norm which is defined as - -$\left\Vert\mathbf{A}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{A}^{T}\mathbf{A}\right)}$ - -where $\mathbf{A}$ is a matrix and $\lambda_{max}\left(\mathbf{A}^{T}\mathbf{A}\right)$ -is the largest eigenvalue of the matrix product $\mathbf{A}^{T}\mathbf{A}$.
-This norm is sub-multiplicative, i.e., - -$\left\Vert \mathbf{A}\mathbf{B}\right\Vert _{2}\leq\left\Vert\mathbf{A}\right\Vert _{2}\cdot\left\Vert\mathbf{B}\right\Vert _{2}$ - -which means that an upper bound for the maximum eigenvalue of $\mathbf{A}\mathbf{B}$ can -be estimated when the spectral norms of $\mathbf{A}$ and $\mathbf{B}$ are known. This is -the case for $\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}$ since $\mathbf{M}$ is again a -diagonal matrix and the spectral norm of $\mathbf{D}_{m}$ has almost been calculated -above as the maximum eigenvalue of $\mathbf{D}_{m}^{T}\mathbf{D}_{m}$.
-It follows that - -$\left\Vert\mathbf{M}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{M}^{T}\mathbf{M}\right)}=\sqrt{max\left(\mathbf{M}^{T}\mathbf{M}\right)}=\sqrt{max\left(diag\left(\mathbf{M}\right)\right)^{2}}=max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)$ - -where $abs\left(diag\left(\mathbf{M}\right)\right)$ is the vector of absolute values of -the diagonal elements of $\mathbf{M}$.
-For $\mathbf{D}_{m}^{T}$ the spectral norm is given by - -$\left\Vert\mathbf{D}_{m}^{T}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{D}_{m}\right)}=\sqrt{4^{m}}=2^{m}$ - -Finally, the upper bound for the maximum eigenvalue of -$\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}$ is given by - -$\left\Vert\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right\Vert _{2}=\sqrt{\lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{M}^T\mathbf{D}_{m}\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right)}=\lambda_{max}\left(\mathbf{D}_{m}^{T}\mathbf{M}\mathbf{D}_{m}\right)\leq\left\Vert\mathbf{D}_{m}^{T}\right\Vert _{2}\cdot\left\Vert\mathbf{M}\right\Vert _{2}\cdot\left\Vert\mathbf{D}_{m}\right\Vert _{2}=2^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)\cdot 2^{m}=4^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)$ - -Combining all this $\mathbf{P}^{+}$ can be determined by - -$\epsilon_{p}=\varepsilon\cdot max\left(n, 10\right)\cdot 4^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)$
-$\mathbf{P}^{+}=\mathbf{P}+\epsilon_{p}\cdot\mathbf{I}=\mathbf{P}+\varepsilon\cdot max\left(n, 10\right)\cdot 4^{m}\cdot max\left(abs\left(diag\left(\mathbf{M}\right)\right)\right)\cdot\mathbf{I}$ - -This is again a cheap and robust approximation that does not require the calculation of -any eigenvalues.
-❗❗❗
-Due to the power of $m$, this approximation scales badly with increasing $m$ and -$n$. It is therefore recommended not to use $m > 6$. Probably also $n$ needs to be -limited in the future by running multiple smooths on subsets of the data and then -combining the results.
-❗❗❗
-Such an approach will be useful for a spatially adaptive smoothing algorithm like the -one provided in - -*A. Corbas, S.J. Choquette: "Automated Spectral Smoothing with Spatially Adaptive -Penalized Least Squares", Applied Spectroscopy Volume 65, Issue 6, pp.665-677, 2011 -[DOI](https://doi.org/10.1366/10-05971)* From 428948c4229c69a7ebdbd823f384189dd294de8b Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 20:59:37 +0200 Subject: [PATCH 040/118] fix: made error message compatible to the questionable Sklearn standard error message --- chemotools/utils/finite_differences.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/chemotools/utils/finite_differences.py b/chemotools/utils/finite_differences.py index f60de614..0ba05a03 100644 --- a/chemotools/utils/finite_differences.py +++ b/chemotools/utils/finite_differences.py @@ -238,13 +238,18 @@ def gen_squ_fw_fin_diff_mat_cho_banded( # first, it needs to be ensured that the number of data points is enough to # support the kernel for the respective difference order at least once - check_scalar( - n_data, - name="n_data", - target_type=Integral, - min_val=differences + 1, - include_boundaries="left", - ) + try: + check_scalar( + n_data, + name="n_data", + target_type=Integral, + min_val=differences + 1, + include_boundaries="left", + ) + + # NOTE: this is only for Sklearn compatibility + except ValueError: + raise ValueError(f"Got n_features = {n_data}, must be >= {differences + 1}.") # afterwards, the squared forward finite differences matrix is computed if orig_first: From e3dbf2ffabb1b827f074e4021bec27df3deb2233 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:00:07 +0200 Subject: [PATCH 041/118] fix: fixed original signal overwrite --- chemotools/utils/whittaker_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 51df29b3..34253f11 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -649,7 +649,7 @@ def _solve_single_b_fixed_lam( return ( self._solve( lam=lam, - b_weighted=b, + b_weighted=b.copy(), w=w, )[0], lam, From a72900af48753e59895e07360a14e31f6ff568c7 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:01:38 +0200 Subject: [PATCH 042/118] refactor: updated Whittaker-like baseline corrections with new Whittaker logic --- chemotools/baseline/_air_pls.py | 11 ++++------- chemotools/baseline/_ar_pls.py | 15 +++++++-------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 70d8c077..54d5e0cb 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -16,7 +16,6 @@ """ - import logging import numpy as np @@ -123,9 +122,9 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": # the internal solver is set up self._setup_for_fit( - series_size=X.shape[1], - lam=self.lam, + n_data=X.shape[1], differences=self.polynomial_order, + lam=self.lam, ) return self @@ -185,14 +184,12 @@ def _calculate_air_pls(self, x): # FIXME: this initialisation will will fail for many signals and produce a # zero-baseline z = np.zeros_like(x) - dssn_thresh = max(1e-3 * np.abs(x).sum(), 1e-308) # to avoid 0 equalities + dssn_thresh = max(1e-3 * np.abs(x).sum(), 1e-308) # to avoid 0 equalities # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` for i in range(0, self.nr_iterations - 1): # the baseline is fitted using the Whittaker smoother framework - z, _ = self._solve_single_x( - x=x, w=w, mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_ - ) + z, _ = self._solve_single_b_fixed_lam(b=x, w=w) d = x - z dssn = np.abs(d[d < 0].sum()) diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index ba3d5932..938f57c1 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -128,9 +128,9 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": # the internal solver is setup self._setup_for_fit( - series_size=X.shape[1], - lam=self.lam, + n_data=X.shape[1], differences=self.differences, + lam=self.lam, ) return self @@ -194,21 +194,20 @@ def _calculate_ar_pls(self, x): # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` for _ in range(self.nr_iterations): # the baseline is fitted using the Whittaker smoother framework - z, _ = self._solve_single_x( - x=x, w=w, mod_squ_fin_diff_mat_lub=self.base_squ_fw_fin_diff_mat_lub_ - ) + z, _ = self._solve_single_b_fixed_lam(b=x, w=w) d = x - z # if there is no data point below the baseline, the baseline is considered # to be fitted - d_negative = d[d < 0] + d_negative = d[np.where(d < 0)[0]] if len(d_negative) == 0: break - m = np.mean(d_negative) - s = np.std(d_negative) + m = d_negative.mean() + s = d_negative.std() exponent = np.clip(2.0 * (d - (2.0 * s - m)) / s, -709, 709) # type: ignore wt = 1.0 / (1.0 + np.exp(exponent)) if np.linalg.norm(w - wt) / np.linalg.norm(w) < self.ratio: # type: ignore break w = wt + return z From 5e41a0325f04811ae0a52e1e7f8a3722d2afb141 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:17:43 +0200 Subject: [PATCH 043/118] refactor: adapted to new internal whittaker implementation --- chemotools/smooth/_whittaker_smooth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index bd625fbf..bae814b4 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -111,7 +111,7 @@ def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": self.n_features_in_ = X.shape[1] # ... and all the required attributes for fitting self._setup_for_fit( - series_size=self.n_features_in_, + n_data=self.n_features_in_, lam=self.lam, differences=self.differences, ) From d4a2f1d3214823a0eef641e699edd326442b5c3d Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:21:14 +0200 Subject: [PATCH 044/118] refactor: updated Whittaker functionality tests; reverted internal copying --- chemotools/utils/whittaker_base.py | 2 +- tests/test_functionality.py | 68 ++++++++++++++++-------------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 34253f11..51df29b3 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -649,7 +649,7 @@ def _solve_single_b_fixed_lam( return ( self._solve( lam=lam, - b_weighted=b.copy(), + b_weighted=b, w=w, )[0], lam, diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 21a615e4..a114ae7b 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -30,7 +30,7 @@ StandardNormalVariate, ) from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth -from chemotools.utils.models import _PENTAPY_AVAILABLE, BandedSolveDecompositions +from chemotools.utils.models import _PENTAPY_AVAILABLE, BandedSolvers from tests.fixtures import reference_airpls # noqa: F401 from tests.fixtures import reference_arpls # noqa: F401 from tests.fixtures import reference_msc_mean # noqa: F401 @@ -817,12 +817,17 @@ def test_whittaker_smooth( else: weights = None + spectrum_to_fit_original = np.tile(spectrum, reps=reps) + spectrum_to_fit = spectrum_to_fit_original.copy() + # Act spectrum_corrected = whittaker_smooth.fit_transform( - X=np.tile(spectrum, reps=reps), sample_weight=weights + X=spectrum_to_fit, sample_weight=weights ) # Assert + # NOTE: the following test makes sure nothing was overwritten + assert np.array_equal(spectrum_to_fit, spectrum_to_fit_original) assert np.allclose( spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 ) @@ -836,65 +841,64 @@ def test_whittaker_with_pentapy( ): # this test is skipped with a warning if pentapy is not installed if not _PENTAPY_AVAILABLE: - logging.warning("pentapy is not installed") - pytest.skip("pentapy is not installed, test cannot be performed") + pytest.skip("Pentapy is not installed, test cannot be performed") # else nothing # Arrange np.random.seed(42) spectrum = np.random.rand(n_samples, 1000) - whittaker_smooth = WhittakerSmooth(differences=2) + whittaker_smooth = WhittakerSmooth(lam=100.0, differences=2) + + weights = None if with_weights and not same_weights_for_all: weights = np.ones(shape=(n_samples, len(spectrum[0]))) elif with_weights and same_weights_for_all: weights = np.ones(shape=(len(spectrum[0]),)) - else: - weights = None # Act with pentapy spectrum_corr_pentapy = whittaker_smooth.fit_transform( - spectrum, sample_weight=weights + X=spectrum, sample_weight=weights ) # Assert with pentapy - assert ( - whittaker_smooth._solve( - bw=spectrum.transpose(), - log_lam=np.log(whittaker_smooth.lam), - w=None, - mod_squ_fin_diff_mat_lub=whittaker_smooth.base_squ_fw_fin_diff_mat_lub_, - )[2] - == BandedSolveDecompositions.PENTAPY - ) + # NOTE: the weight is not correct since the test only checks the method + solve_method = whittaker_smooth._solve( + lam=whittaker_smooth._lam_inter_.fixed_lambda, + b_weighted=spectrum.transpose(), + w=1.0, + )[1] + assert solve_method == BandedSolvers.PENTAPY # Act without pentapy whittaker_smooth._WhittakerLikeSolver__allow_pentapy = False # type: ignore - spectrum_corr_scipy = whittaker_smooth.fit_transform( + spectrum_corr_factorized_solve = whittaker_smooth.fit_transform( spectrum, sample_weight=weights ) # Assert without pentapy - assert ( - whittaker_smooth._solve( - bw=spectrum.transpose(), - log_lam=np.log(whittaker_smooth.lam), - w=None, - mod_squ_fin_diff_mat_lub=whittaker_smooth.base_squ_fw_fin_diff_mat_lub_, - )[2] - == BandedSolveDecompositions.CHOLESKY - ) - assert np.allclose(spectrum_corr_pentapy[0], spectrum_corr_scipy[0]) + # NOTE: the weight is not correct since the test only checks the method + solve_method = whittaker_smooth._solve( + lam=whittaker_smooth._lam_inter_.fixed_lambda, + b_weighted=spectrum.transpose(), + w=1.0, + )[1] + assert solve_method == BandedSolvers.PIVOTED_LU + assert np.allclose(spectrum_corr_pentapy[0], spectrum_corr_factorized_solve[0]) @pytest.mark.parametrize( - "log10_lam", np.arange(start=-50.0, stop=170.0, step=20.0).tolist() + "log10_lam", np.arange(start=-25.0, stop=15.0, step=5.0).tolist() ) -@pytest.mark.parametrize("difference", [1, 2, 10]) +@pytest.mark.parametrize("difference", [1, 2]) @pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) @pytest.mark.parametrize("size", [5_000]) def test_whittaker_constant_signal( - size: int, fill_value: float, difference: int, log10_lam: float + size: int, + fill_value: float, + difference: int, + log10_lam: float, ) -> None: + # Arrange spectrum = np.full(shape=(size,), fill_value=fill_value).reshape((1, -1)) whittaker_smooth = WhittakerSmooth(lam=10.0**log10_lam, differences=difference) @@ -908,5 +912,5 @@ def test_whittaker_constant_signal( spectrum_corrected[0], spectrum[0], atol=size * np.finfo(np.float64).eps, # type: ignore - rtol=0.0, + rtol=1e-6, ) From 664e66e910860de6d9159351e25ade3b494c2bc3 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:27:42 +0200 Subject: [PATCH 045/118] refactor: moved `pentapy` import check into dedicated `_runtime`-module --- chemotools/_runtime/__init__.py | 21 +++++++++++++++++++++ chemotools/utils/models.py | 11 ----------- chemotools/utils/whittaker_base.py | 6 +++--- tests/test_functionality.py | 7 +++---- 4 files changed, 27 insertions(+), 18 deletions(-) create mode 100644 chemotools/_runtime/__init__.py diff --git a/chemotools/_runtime/__init__.py b/chemotools/_runtime/__init__.py new file mode 100644 index 00000000..10dd6a18 --- /dev/null +++ b/chemotools/_runtime/__init__.py @@ -0,0 +1,21 @@ +""" +This submodule checks for the presence of the required software packages at runtime. + +The following optional packages are checked for: +- `pentapy` for solving pentadiagonal systems of equations for the Whittaker-Henderson + smoothing algorithm. + +""" + +### Imports ### + +# if possible, pentapy is imported since it provides a more efficient implementation +# of solving pentadiagonal systems of equations, but the package is not in the +# dependencies, so ``chemotools`` needs to be made aware of whether it is available +PENTAPY_AVAILABLE: bool = False +try: + import pentapy as pp # noqa: F401 + + PENTAPY_AVAILABLE: bool = True +except ImportError: + pass diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index a0e73709..409153cb 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -13,17 +13,6 @@ import numpy as np -# if possible, pentapy is imported since it provides a more efficient implementation -# of solving pentadiagonal systems of equations, but the package is not in the -# dependencies, so ``chemotools`` needs to be made aware of whether it is available -try: - import pentapy as pp # noqa: F401 - - _PENTAPY_AVAILABLE = True -except ImportError: - _PENTAPY_AVAILABLE = False - - ### Enums ### # an Enum class for the solve types used for solving linear systems that involve banded diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index 51df29b3..a1e8474d 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -14,6 +14,7 @@ import numpy as np from scipy.optimize import minimize_scalar +from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.utils.banded_linalg import ( LAndUBandCounts, conv_upper_chol_banded_to_lu_banded_storage, @@ -26,7 +27,6 @@ gen_squ_fw_fin_diff_mat_cho_banded, ) from chemotools.utils.models import ( - _PENTAPY_AVAILABLE, BandedLUFactorization, BandedPentapyFactorization, BandedSolvers, @@ -34,7 +34,7 @@ WhittakerSmoothMethods, ) -if _PENTAPY_AVAILABLE: +if PENTAPY_AVAILABLE: import pentapy as pp ### Type Aliases ### @@ -273,7 +273,7 @@ def _setup_for_fit( # finally, Pentapy is enabled if available, the number of differences is 2, # and the lambda parameter is not fitted automatically self._pentapy_enabled_: bool = ( - _PENTAPY_AVAILABLE + PENTAPY_AVAILABLE and self.differences_ == 2 and self.__allow_pentapy and not self._lam_inter_.fit_auto diff --git a/tests/test_functionality.py b/tests/test_functionality.py index a114ae7b..cf789c99 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -1,9 +1,8 @@ -import logging - import numpy as np import pandas as pd import pytest +from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.augmentation import ( BaselineShift, ExponentialNoise, @@ -30,7 +29,7 @@ StandardNormalVariate, ) from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth -from chemotools.utils.models import _PENTAPY_AVAILABLE, BandedSolvers +from chemotools.utils.models import BandedSolvers from tests.fixtures import reference_airpls # noqa: F401 from tests.fixtures import reference_arpls # noqa: F401 from tests.fixtures import reference_msc_mean # noqa: F401 @@ -840,7 +839,7 @@ def test_whittaker_with_pentapy( n_samples: int, with_weights: bool, same_weights_for_all: bool ): # this test is skipped with a warning if pentapy is not installed - if not _PENTAPY_AVAILABLE: + if not PENTAPY_AVAILABLE: pytest.skip("Pentapy is not installed, test cannot be performed") # else nothing From ff17817e302f67dffe725784b69b079c18d4416e Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:31:11 +0200 Subject: [PATCH 046/118] fix: removed `# else nothing` --- chemotools/utils/banded_linalg.py | 2 +- chemotools/utils/check_inputs.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 1cb34cd9..6c940449 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -93,7 +93,7 @@ def _check_full_arr_n_diag_counts_for_lu_banded( if num_rows != num_cols: raise ValueError(f"\nThe matrix must be square, but it has shape {a_shape}.") - # else nothing + def conv_upper_chol_banded_to_lu_banded_storage( diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index ed64424e..107ec1ae 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -30,7 +30,6 @@ def check_weights( weights_checked = weights.reshape((1, -1)) else: weights_checked = weights - # else nothing # now, the need to be checked for having the right shape weights_checked = check_array( @@ -48,7 +47,6 @@ def check_weights( f"Weights must have {n_features} columns, but they have " f"{weights_checked.shape[1]} columns." ) - # else nothing # finally, it needs to be checked that the weights are all non-negative ... if np.any(weights < 0.0): @@ -61,7 +59,6 @@ def check_weights( raise ValueError( "At least one weights needs to be > 0, but all weights were 0.0." ) - # else nothing # the weights are returned together with a flag whether to apply the same weights # for all samples or not From 67694d92a04898b193d8f421011c99f45ae14c04 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:33:57 +0200 Subject: [PATCH 047/118] refactor: raise error when weights are not provided for log marginal likelihood --- chemotools/utils/whittaker_base.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base.py index a1e8474d..8f997172 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base.py @@ -677,15 +677,21 @@ def _solve_single_b_auto_lam_lml( """ + # if the weights are not provided, the log marginal likelihood cannot be + # computed - at least not in a meaningful way + if isinstance(w, (float, int)): + raise ValueError( + "\nAutomatic fitting of the penalty weight lambda by maximizing the " + "log marginal likelihood is only possible if weights are provided.\n" + "Please provide weights for the series to smooth." + ) + # first, the constant terms of the log marginal likelihood are computed starting # from the log pseudo-determinant of the weight matrix, i.e., the product of the # non-zero elements of the weight vector - nnz_w = self.n_data_ - log_pseudo_det_w = 0.0 # ln(1**nnz_w) = 0.0 - if isinstance(w, np.ndarray): - nonzero_w_idxs = np.where(w > w.max() * self.__zero_weight_tol)[0] - nnz_w = nonzero_w_idxs.size - log_pseudo_det_w = np.log(w[nonzero_w_idxs]).sum() + nonzero_w_idxs = np.where(w > w.max() * self.__zero_weight_tol)[0] + nnz_w = nonzero_w_idxs.size + log_pseudo_det_w = np.log(w[nonzero_w_idxs]).sum() # the constant term of the log marginal likelihood is computed w_plus_n_samples_term = ( From 200e45ef25ce91afa8ba68afaa3c20cd4202a42a Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 21:53:06 +0200 Subject: [PATCH 048/118] fix: fixed type hints breaking Python 3.9 compatibility --- chemotools/augmentation/spectrum_scale.py | 8 +++++++- chemotools/baseline/_air_pls.py | 5 +++-- chemotools/baseline/_ar_pls.py | 5 +++-- chemotools/smooth/_whittaker_smooth.py | 15 ++++++++++----- chemotools/utils/banded_linalg.py | 3 ++- chemotools/utils/check_inputs.py | 14 ++++++++++---- tests/test_functionality.py | 1 - 7 files changed, 35 insertions(+), 16 deletions(-) diff --git a/chemotools/augmentation/spectrum_scale.py b/chemotools/augmentation/spectrum_scale.py index a3249dab..7365b8e1 100644 --- a/chemotools/augmentation/spectrum_scale.py +++ b/chemotools/augmentation/spectrum_scale.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted @@ -35,7 +37,11 @@ class SpectrumScale(OneToOneFeatureMixin, BaseEstimator, TransformerMixin): Transform the input data by scaling the spectrum. """ - def __init__(self, scale: float = 0.0, random_state: int | None = None): + def __init__( + self, + scale: float = 0.0, + random_state: Optional[int] = None, + ): self.scale = scale self.random_state = random_state diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 54d5e0cb..18de17c0 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -17,6 +17,7 @@ """ import logging +from typing import Union import numpy as np from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin @@ -84,11 +85,11 @@ class AirPls( # TODO: polynomial order is actually differences def __init__( self, - lam: int | float = 100, + lam: Union[float, int] = 100, polynomial_order: int = 1, nr_iterations: int = 15, ): - self.lam: int | float = lam + self.lam: Union[float, int] = lam self.polynomial_order: int = polynomial_order self.nr_iterations: int = nr_iterations diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 938f57c1..00f1ba23 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -20,6 +20,7 @@ import logging from numbers import Integral +from typing import Union import numpy as np from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin @@ -76,12 +77,12 @@ class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLike def __init__( self, - lam: float | int = 1e4, + lam: Union[float, int] = 1e4, differences: int = 2, ratio: float = 0.01, nr_iterations: int = 100, ): - self.lam: float | int = lam + self.lam: Union[float, int] = lam self.differences: int = differences self.ratio: float = ratio self.nr_iterations: int = nr_iterations diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index bae814b4..8565c05b 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -14,6 +14,8 @@ """ +from typing import Optional, Union + from numpy import ndarray from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted @@ -76,11 +78,11 @@ class WhittakerSmooth( def __init__( self, - lam: int | float = 1e2, + lam: Union[float, int] = 1e2, differences: int = 1, ): - self.lam = lam - self.differences = differences + self.lam: Union[float, int] = lam + self.differences: int = differences def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": """ @@ -125,7 +127,7 @@ def transform( self, X: ndarray, y: None = None, - sample_weight: ndarray | None = None, + sample_weight: Optional[ndarray] = None, ) -> ndarray: """ Transform the input data by calculating the Whittaker smooth. @@ -178,7 +180,10 @@ def transform( )[0] def fit_transform( - self, X: ndarray, y: None = None, sample_weight: ndarray | None = None + self, + X: ndarray, + y: None = None, + sample_weight: Optional[ndarray] = None, ) -> ndarray: """Fit the transformer to the input data and transform it. diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 6c940449..508ee1b6 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -15,6 +15,7 @@ ### Imports ### from numbers import Integral +from typing import Union import numpy as np from numpy.typing import ArrayLike @@ -184,7 +185,7 @@ def conv_upper_chol_banded_to_lu_banded_storage( def conv_to_lu_banded_storage( - a: np.ndarray | spmatrix, + a: Union[np.ndarray, spmatrix], l_and_u: tuple[int, int], ) -> np.ndarray: """Converts a (sparse) square banded matrix A to its banded storage required for diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index 107ec1ae..e57c4c30 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -1,10 +1,14 @@ -from typing import Literal +from typing import Literal, Optional, Tuple, Type, Union import numpy as np from sklearn.utils.validation import check_array -def check_input(X, y=None, dtype: type | Literal["numeric"] | None = "numeric"): +def check_input( + X, + y=None, + dtype: Union[Type, Literal["numeric"], None] = "numeric", +): # Check that X is a 2D array and has only finite values X = check_array(X, ensure_2d=True, force_all_finite=True, dtype=dtype) @@ -18,8 +22,10 @@ def check_input(X, y=None, dtype: type | Literal["numeric"] | None = "numeric"): def check_weights( - weights: np.ndarray | None, n_samples: int, n_features: int -) -> tuple[np.ndarray | None, bool]: + weights: Optional[np.ndarray], + n_samples: int, + n_features: int, +) -> Tuple[Optional[np.ndarray], bool]: # if the weights are None, None is returned and a flag that the same weights should # be applied for all samples if weights is None: diff --git a/tests/test_functionality.py b/tests/test_functionality.py index cf789c99..f5ea8ca0 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -841,7 +841,6 @@ def test_whittaker_with_pentapy( # this test is skipped with a warning if pentapy is not installed if not PENTAPY_AVAILABLE: pytest.skip("Pentapy is not installed, test cannot be performed") - # else nothing # Arrange np.random.seed(42) From 45c2c384f2b6db13b64ffe8f145b9a1c05334992 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 23:19:28 +0200 Subject: [PATCH 049/118] fix: removed dead branch --- chemotools/baseline/_air_pls.py | 9 +-------- chemotools/baseline/_ar_pls.py | 9 +-------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 18de17c0..b35d667a 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -170,14 +170,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: for i, x in enumerate(X_): X_[i] = x - self._calculate_air_pls(x) - # FIXME: can this even happen because X is ensured to be 2D? - if X_.ndim == 1: - # FIXME: shouldn't this be a row and not a column vector because - # Scikit-Learn works with shape (n_samples, n_features), i.e., - # (1, n_features) for a single sample? - return X_.reshape((-1, 1)) - else: - return X_ + return X_ def _calculate_air_pls(self, x): # FIXME: this initial weighting strategy might not yield the best results diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 00f1ba23..a1b56a7b 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -177,14 +177,7 @@ def transform(self, X: np.ndarray, y=None) -> np.ndarray: for i, x in enumerate(X_): X_[i] = x - self._calculate_ar_pls(x) - # FIXME: can this even happen because X is ensured to be 2D? - if X_.ndim == 1: - # FIXME: shouldn't this be a row and not a column vector because - # Scikit-Learn works with shape (n_samples, n_features), i.e., - # (1, n_features) for a single sample? - return X_.reshape((-1, 1)) - else: - return X_ + return X_ def _calculate_ar_pls(self, x): # FIXME: this initial weighting strategy might not yield the best results From fc206a1eee0aae421417e78babe01df96c542fe3 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 23:20:36 +0200 Subject: [PATCH 050/118] refactor/fix: removed dead functions; excluded hard-to-produce-edge-cases from coverage; removed unused imports --- chemotools/utils/banded_linalg.py | 167 ++---------------------------- 1 file changed, 11 insertions(+), 156 deletions(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index 508ee1b6..f72d6bb8 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -14,14 +14,9 @@ ### Imports ### -from numbers import Integral -from typing import Union - import numpy as np from numpy.typing import ArrayLike from scipy.linalg import lapack -from scipy.sparse import spmatrix -from sklearn.utils import check_array, check_scalar from chemotools.utils.models import BandedLUFactorization @@ -51,52 +46,6 @@ def _datacopied(arr, original): return arr.base is None -def _check_full_arr_n_diag_counts_for_lu_banded( - a_shape: tuple[int, int], - l_and_u: tuple[int, int], -) -> None: - """Validates the shape of a full array and the number of sub- and superdiagonals - for LU decomposition of a banded (sparse) matrix. - """ - num_rows, num_cols = a_shape - num_low_diags, num_upp_diags = l_and_u - - check_scalar( - x=num_rows, - name="num_rows", - target_type=Integral, - min_val=1, - include_boundaries="left", - ) - check_scalar( - x=num_cols, - name="num_cols", - target_type=Integral, - min_val=1, - include_boundaries="left", - ) - check_scalar( - x=num_low_diags, - name="num_low_diags", - target_type=Integral, - min_val=0, - max_val=num_rows - 1, - include_boundaries="both", - ) - check_scalar( - x=num_upp_diags, - name="num_upp_diags", - target_type=Integral, - min_val=0, - max_val=num_rows - 1, - include_boundaries="both", - ) - - if num_rows != num_cols: - raise ValueError(f"\nThe matrix must be square, but it has shape {a_shape}.") - - - def conv_upper_chol_banded_to_lu_banded_storage( ab: np.ndarray, ) -> tuple[LAndUBandCounts, np.ndarray]: @@ -184,103 +133,6 @@ def conv_upper_chol_banded_to_lu_banded_storage( return l_and_u, np.row_stack((ab, ab_subdiags)) -def conv_to_lu_banded_storage( - a: Union[np.ndarray, spmatrix], - l_and_u: tuple[int, int], -) -> np.ndarray: - """Converts a (sparse) square banded matrix A to its banded storage required for - LU decomposition in LAPACK-routines like the function ``lu_banded`` or SciPy's - ``solve_banded``. This format is identical for pentapy where it is referred to as - "column-wise flattened". - Cholesky-decompositions require a different format. - - Parameters - ---------- - a : np.ndarray or sparse matrix of shape (n, n) - A square banded NumPy-2D-Array or SciPy sparse matrix. "Square" means that the - row count equals the column count while "banded" implies that only the main - diagonal and a few sub- and/or superdiagonals are non-zero (see `l_and_u`). - l_and_u : tuple[int, int] - The number of "non-zero" sub- (first) and superdiagonals (second element) aside - the main diagonal which does not need to be considered here. "Non-zero" can be - a bit misleading in this context. These numbers should count up to the diagonal - after which all following diagonals are zero. Zero-diagonals that come before - still need to be included. - Wrong specification of this can lead to non-zero-diagonals being ignored or - zero-diagonals being included which corrupts the results or reduces the - performance. - - Returns - ------- - ab : np.ndarray of shape (l_and_u[0] + 1 + l_and_u[1], n) - A NumPy-2D-Array resembling `a` in banded storage format (see Notes). - - Raises - ------ - ValueError - If `a` is not square. - ValueError - If the number of rows of `a` does not match the number of rows given by - the diagonal number. - - Notes - ----- - For LAPACK's LU decomposition, the matrix `a` is stored in `ab` using the matrix - diagonal ordered form: - - ```python - ab[u + i - j, j] == a[i,j] # see below for u - ``` - - An example of `ab` (shape of a is ``(7,7)``, `u`=3 superdiagonals, `l`=2 - subdiagonals) looks like: - - ```python - * * * a03 a14 a25 a36 - * * a02 a13 a24 a35 a46 - * a01 a12 a23 a34 a45 a56 # ^ superdiagonals - a00 a11 a22 a33 a44 a55 a66 # main diagonal - a10 a21 a32 a43 a54 a65 * # v subdiagonals - a20 a31 a42 a53 a64 * * - ``` - - where all entries marked with ``*`` are ``0`` when returned by this function. - Internally LAPACK relies on an expanded version of this format to perform inplace - operations, but the respective functions handle the conversion themselves. - - """ - - # the matrix is checked for being square and for having the correct number of rows - num_low_diags, num_upp_diags = l_and_u - a = check_array(array=a, accept_sparse=True, ensure_2d=True) - _check_full_arr_n_diag_counts_for_lu_banded( - a_shape=a.shape, l_and_u=l_and_u # type: ignore - ) - - # first, the number of lower and upper diagonals is extracted and turned into two - # offset vectors - main_diag_idx = num_upp_diags - num_cols = a.shape[-1] - - # now, the diagonal extraction method is specified based and the banded storage is - # filled by it - diag_method = a.diagonal # type: ignore - ab = np.zeros( - shape=(num_low_diags + 1 + num_upp_diags, num_cols), - dtype=a.dtype, # type: ignore - ) - - # the superdiagonals and the main diagonal - for offset in range(num_upp_diags, -1, -1): - ab[main_diag_idx - offset, offset::] = diag_method(offset) - - # the subdiagonals - for offset in range(-1, -num_low_diags - 1, -1): - ab[main_diag_idx - offset, 0:offset] = diag_method(offset) - - return ab - - ### LAPACK-Wrappers for banded LU decomposition ### @@ -368,7 +220,7 @@ def lu_banded( # then, the number of lower and upper subdiagonals needs to be checked for being # consistent with the shape of ``ab`` num_low_diags, num_upp_diags = l_and_u - if num_low_diags + num_upp_diags + 1 != ab.shape[0]: + if num_low_diags + num_upp_diags + 1 != ab.shape[0]: # pragma: no cover raise ValueError( f"\nInvalid values for the number of lower and upper " f"diagonals: l+u+1 ({num_low_diags + num_upp_diags + 1}) does not equal " @@ -403,7 +255,9 @@ def lu_banded( ) # Case 2: the factorisation was not completed due to invalid input - raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrf.") + raise ValueError( # pragma: no cover # noqa: E501 + f"\nIllegal value in {-info}-th argument of internal gbtrf." + ) def lu_solve_banded( @@ -464,7 +318,7 @@ def lu_solve_banded( # then, the shapes of the LU decomposition and ``b`` need to be validated against # each other - if lub_factorization.n_cols != b_inter.shape[0]: + if lub_factorization.n_cols != b_inter.shape[0]: # pragma: no cover raise ValueError( f"\nShapes of lub ({lub_factorization.n_cols}) and b " f"({b_inter.shape[0]}) are not compatible." @@ -492,11 +346,11 @@ def lu_solve_banded( raise np.linalg.LinAlgError("\nMatrix is singular.") # Case 3: the solution could not be computed due to invalid input - elif info < 0: + elif info < 0: # pragma: no cover raise ValueError(f"\nIllegal value in {-info}-th argument of internal gbtrs.") # Case 4: unexpected error - raise AssertionError( + raise AssertionError( # pragma: no cover f"\nThe internal gbtrs returned info > 0 ({info}) which should not happen." ) @@ -560,7 +414,7 @@ def slogdet_lu_banded( # overflow however needs to lead to a raise and in this case the log(det) is either # +inf in case of overflow only or NaN in case of the simultaneous occurrence of # zero and overflow - if np.isnan(logabsdet) or np.isposinf(logabsdet): + if np.isnan(logabsdet) or np.isposinf(logabsdet): # pragma: no cover raise OverflowError( "\nFloating point overflow in natural logarithm. At least 1 main diagonal " "entry results in overflow, thereby corrupting the determinant." @@ -568,9 +422,10 @@ def slogdet_lu_banded( # finally, the absolute value of the natural logarithm of the determinant is # returned together with its sign - if np.isneginf(logabsdet): + if np.isneginf(logabsdet): # pragma: no cover return 0.0, logabsdet - elif u_diag_sign_is_pos: + + if u_diag_sign_is_pos: return sign, logabsdet return -sign, logabsdet From bf9c5c8eac916e0c32349dadfc02ff86cd4c7ea1 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 23:20:51 +0200 Subject: [PATCH 051/118] refactor: lowered method name --- chemotools/utils/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index 409153cb..d5b79dbb 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -123,7 +123,7 @@ class WhittakerSmoothLambda: def _validate_n_set_method(self) -> None: try: - self.method_used = WhittakerSmoothMethods(self.method) + self.method_used = WhittakerSmoothMethods(self.method.lower()) except ValueError: raise ValueError( f"\nThe method '{self.method}' is not valid. " From 5a1f735b8e3697cabc03488b21e87212c62041aa Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 23:21:11 +0200 Subject: [PATCH 052/118] feat: added coverage to tests --- pyproject.toml | 5 +++++ requirements-dev.txt | 1 + 2 files changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 6c046a18..c0c5a5c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,11 @@ requires = [ ] build-backend = "setuptools.build_meta" +[tool.coverage.run] +omit = [ + "chemotools/_runtime/*", + ] + [tool.ruff] # Enable pycodestyle (`E`), Pyflakes (`F`) checks. select = ["E", "F"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 3b69fc17..2258cbae 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,4 +3,5 @@ isort matplotlib pentapy pytest +pytest-cov pytest-xdist \ No newline at end of file From a8ecb7d77914ea520ca93a3becee44971e336fa0 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 23:33:12 +0200 Subject: [PATCH 053/118] tests: added parametrized parallelizable tests for the lambda-value of the Whittaker smoother --- tests/test_for_utils/test_models.py | 252 ++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 tests/test_for_utils/test_models.py diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py new file mode 100644 index 00000000..51c21592 --- /dev/null +++ b/tests/test_for_utils/test_models.py @@ -0,0 +1,252 @@ +""" +Test suite for the utility models in the :mod:`chemotools.utils.models` module. + +""" + +### Imports ### + +from typing import List, Tuple, Union + +import numpy as np +import pytest + +from chemotools.utils.models import WhittakerSmoothLambda, WhittakerSmoothMethods +from tests.test_for_utils.utils import ExpectedWhittakerSmoothLambda, float_is_bit_equal + +### Type aliases ### + +_Numeric = Union[float, int] +_LambdaValueNumeric = Union[_Numeric, Tuple[_Numeric, _Numeric]] +_LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] +_WhittakerMethod = Union[str, WhittakerSmoothMethods] +_WhittakerMethodSequence = List[_WhittakerMethod] +_ExpectedLambdaResult = Union[ + ExpectedWhittakerSmoothLambda, + ValueError, + TypeError, +] +_LambdaTestCombination = Tuple[ + _LambdaValueNumericOrFlawed, + _WhittakerMethodSequence, + _ExpectedLambdaResult, +] + +### Global constants ### + +_NAN: float = float("nan") +_FIXED_WHITTAKER_METHODS: _WhittakerMethodSequence = [ + "fixed", + WhittakerSmoothMethods.FIXED, +] +_LOGML_WHITTAKER_METHODS: _WhittakerMethodSequence = [ + "logml", + WhittakerSmoothMethods.LOGML, +] +# NOTE: "aauto" is not a typo, but helps to not confuse it with "all" +_aauto_whittaker_methods: _WhittakerMethodSequence = _LOGML_WHITTAKER_METHODS + [] +_all_whittaker_methods: _WhittakerMethodSequence = ( + _FIXED_WHITTAKER_METHODS + _aauto_whittaker_methods +) + + +### Test Suite ### + + +@pytest.mark.parametrize( + "combination", + [ + ( # Number 0 + 100.0, + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 1 + (100.0, 100.0), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 2 + (100.0, 100.000001), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 3 + (100.0, 100.000001), + _aauto_whittaker_methods, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 4 + (100.000001, 100.0), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 5 + (100.000001, 100.0), + _aauto_whittaker_methods, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 6 + (100.000001, 100.0), + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.000001, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 7 + (100.0, 10_000.0), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=WhittakerSmoothMethods.LOGML, + log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + ), + ), + ( # Number 8 + (10_000.0, 100.0), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=WhittakerSmoothMethods.LOGML, + log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + ), + ), + ( # Number 9 + 0.0, + _FIXED_WHITTAKER_METHODS, + ValueError, + ), + ( # Number 10 + (100.0, 10_000.0), + _FIXED_WHITTAKER_METHODS, + ValueError, + ), + ( # Number 11 + 100.0, + _aauto_whittaker_methods, + ValueError, + ), + ( # Number 12 + (0.0, 100.0), + _all_whittaker_methods, + ValueError, + ), + ( # Number 13 + (100.0, 0.0), + _all_whittaker_methods, + ValueError, + ), + ( # Number 14 + (0.0, 0.0), + _all_whittaker_methods, + ValueError, + ), + ( # Number 15 + "error", + _all_whittaker_methods, + TypeError, + ), + ( # Number 16 + 100.0, + "error", + ValueError, + ), + ], +) +def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> None: + """ + Tests the class :class:`WhittakerSmoothLambda` for the correct behavior of its + ``__post_init__`` method. + + """ + + # the combination is unpacked + lambda_value, methods, expected_result = combination + + # if the expected result is an exception, it is tested whether the correct exception + # is raised + if not isinstance(expected_result, ExpectedWhittakerSmoothLambda): + for meth in methods: + with pytest.raises(expected_result): # type: ignore + WhittakerSmoothLambda( + bounds=lambda_value, # type: ignore + method=meth, # type: ignore + ) + + return + + # if the expected result is a valid result, the class is instantiated and the + # attributes are tested + for meth in methods: + lambda_model = WhittakerSmoothLambda( + bounds=lambda_value, # type: ignore + method=meth, # type: ignore + ) + + assert lambda_model.fit_auto == expected_result.fit_auto + assert lambda_model.method_used == expected_result.method_used + # NOTE: since NAN-values are used, the comparison is split into two parts for + # the fixed lambda value and each of the bounds + assert float_is_bit_equal( + value=lambda_model.fixed_lambda, + reference=expected_result.fixed_lambda, + ) + assert float_is_bit_equal( + value=lambda_model.auto_bounds[0], + reference=expected_result.auto_bounds[0], + ) + assert float_is_bit_equal( + value=lambda_model.auto_bounds[1], + reference=expected_result.auto_bounds[1], + ) + assert float_is_bit_equal( + value=lambda_model.log_auto_bounds[0], + reference=expected_result.log_auto_bounds[0], + ) + assert float_is_bit_equal( + value=lambda_model.log_auto_bounds[1], + reference=expected_result.log_auto_bounds[1], + ) From 365c5dd5422cedd7c3d8b6559d79fe3af8318e3e Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 10 May 2024 23:36:44 +0200 Subject: [PATCH 054/118] tests: added utility functions for model testing --- tests/test_for_utils/utils.py | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/test_for_utils/utils.py b/tests/test_for_utils/utils.py index 8019f37b..85d20896 100644 --- a/tests/test_for_utils/utils.py +++ b/tests/test_for_utils/utils.py @@ -10,6 +10,8 @@ ### Imports ### +from dataclasses import dataclass +from math import isnan from typing import Tuple import numpy as np @@ -17,9 +19,67 @@ from scipy.sparse import csr_matrix from scipy.sparse import diags as sp_diags +from chemotools.utils.models import WhittakerSmoothMethods + +### Dataclasses ### + + +@dataclass +class ExpectedWhittakerSmoothLambda: + """ + Dataclass for checking the expected results for the class :class:`WhittakerSmoothLambda` + from the module :mod:`chemotools.utils.models`. + + """ # noqa: E501 + + fixed_lambda: float + auto_bounds: Tuple[float, float] + fit_auto: bool + method_used: WhittakerSmoothMethods + log_auto_bounds: Tuple[float, float] = (0.0, 0.0) + + ### Utility Functions ### +def float_is_bit_equal(value: float, reference: float) -> bool: + """ + Checks if two floating-point numbers are equal up to the last bit and handles the + case of NaN values as well. + + Doctests + -------- + >>> # Imports + >>> from tests.test_for_utils.utils import float_is_bit_equal + + >>> # Test 1 + >>> float_is_bit_equal(value=1.0, reference=1.0) + True + + >>> # Test 2 + >>> float_is_bit_equal(value=1.0, reference=10.0) + False + + >>> # Test 3 + >>> float_is_bit_equal(value=1.0, reference=float("nan")) + False + + >>> # Test 4 + >>> float_is_bit_equal(value=float("nan"), reference=float("nan")) + True + + >>> # Test 5 + >>> float_is_bit_equal(value=float("nan"), reference=1.0) + False + + """ + + if isnan(reference): + return isnan(value) + + return value == reference + + def conv_upper_cho_banded_storage_to_sparse( ab: np.ndarray, ) -> csr_matrix: From 9140e92181eff722affc64b612e746805bfbd91a Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 10:51:13 +0200 Subject: [PATCH 055/118] style: made class heritage more structured --- chemotools/baseline/_air_pls.py | 5 ++++- chemotools/baseline/_ar_pls.py | 7 ++++++- chemotools/smooth/_whittaker_smooth.py | 10 +++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index b35d667a..717e378a 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -31,7 +31,10 @@ # TODO: is polynomial_order actually differences and if so, is the description correct? class AirPls( - OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver + OneToOneFeatureMixin, + BaseEstimator, + TransformerMixin, + WhittakerLikeSolver, ): """ This class implements the Adaptive Iteratively Reweighted Penalized Least Squares diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index a1b56a7b..0c5368ee 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -32,7 +32,12 @@ logger = logging.getLogger(__name__) -class ArPls(OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver): +class ArPls( + OneToOneFeatureMixin, + BaseEstimator, + TransformerMixin, + WhittakerLikeSolver, +): """ This class implements the Asymmetrically Reweighted Penalized Least Squares a.k.a ArPLS which is a baseline correction method for spectroscopy data. It uses an diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 8565c05b..0442f09b 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -21,11 +21,15 @@ from sklearn.utils.validation import check_is_fitted from chemotools.utils.check_inputs import check_input, check_weights +from chemotools.utils.models import WhittakerSmoothLambda from chemotools.utils.whittaker_base import WhittakerLikeSolver class WhittakerSmooth( - OneToOneFeatureMixin, BaseEstimator, TransformerMixin, WhittakerLikeSolver + OneToOneFeatureMixin, + BaseEstimator, + TransformerMixin, + WhittakerLikeSolver, ): """ A transformer that performs smoothing on data according to the Whittaker-Henderson @@ -78,10 +82,10 @@ class WhittakerSmooth( def __init__( self, - lam: Union[float, int] = 1e2, + lam: Union[float, int, WhittakerSmoothLambda] = 1e2, differences: int = 1, ): - self.lam: Union[float, int] = lam + self.lam: Union[float, int, WhittakerSmoothLambda] = lam self.differences: int = differences def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": From 199eac4096aea3b996df1729af383aa89488fdf6 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 15:55:47 +0200 Subject: [PATCH 056/118] refactor: split up whittaker base to make it more modular, flexible, future-suited, and readable - part I basic solvers --- chemotools/utils/whittaker_base/__init__.py | 20 + .../whittaker_base/auto_lambda/__init__.py | 12 + .../auto_lambda/optimization.py | 94 ++++ .../utils/whittaker_base/initialisation.py | 190 ++++++++ .../main.py} | 428 +++--------------- chemotools/utils/whittaker_base/misc.py | 39 ++ chemotools/utils/whittaker_base/solvers.py | 216 +++++++++ 7 files changed, 628 insertions(+), 371 deletions(-) create mode 100644 chemotools/utils/whittaker_base/__init__.py create mode 100644 chemotools/utils/whittaker_base/auto_lambda/__init__.py create mode 100644 chemotools/utils/whittaker_base/auto_lambda/optimization.py create mode 100644 chemotools/utils/whittaker_base/initialisation.py rename chemotools/utils/{whittaker_base.py => whittaker_base/main.py} (55%) create mode 100644 chemotools/utils/whittaker_base/misc.py create mode 100644 chemotools/utils/whittaker_base/solvers.py diff --git a/chemotools/utils/whittaker_base/__init__.py b/chemotools/utils/whittaker_base/__init__.py new file mode 100644 index 00000000..fd4eefd8 --- /dev/null +++ b/chemotools/utils/whittaker_base/__init__.py @@ -0,0 +1,20 @@ +""" +This submodule contains the base class ``WhittakerLikeSolver`` which is used to +efficiently solve the Penalized Least Squares problems that arise in the +Whittaker-Henderson smoothing algorithm and its variants, e.g., for baseline correction. + +Since the class would be too big if all the methods were implemented in a single file, +the implementation is split into the class itself and a utility module that contains +utility functions used by the class. + +""" + +### Imports ### + +from chemotools.utils.models import ( # noqa: F401 + WhittakerSmoothLambda, + WhittakerSmoothMethods, +) +from chemotools.utils.whittaker_base.main import ( # noqa: F401 + WhittakerLikeSolver, +) diff --git a/chemotools/utils/whittaker_base/auto_lambda/__init__.py b/chemotools/utils/whittaker_base/auto_lambda/__init__.py new file mode 100644 index 00000000..a33c62fe --- /dev/null +++ b/chemotools/utils/whittaker_base/auto_lambda/__init__.py @@ -0,0 +1,12 @@ +""" +This submodule contains the functions used for the automated fitting of the penalty +weight lambda within the ``WhittakerLikeSolver`` class that would have cluttered the +class implementation. + +""" + +### Imports ### + +from chemotools.utils.whittaker_base.auto_lambda.optimization import ( + get_optimized_lambda, +) # noqa: F401 diff --git a/chemotools/utils/whittaker_base/auto_lambda/optimization.py b/chemotools/utils/whittaker_base/auto_lambda/optimization.py new file mode 100644 index 00000000..c488b36f --- /dev/null +++ b/chemotools/utils/whittaker_base/auto_lambda/optimization.py @@ -0,0 +1,94 @@ +""" +This submodule contains the functions used for the optimization in the automated fitting +of the penalty weight lambda within the ``WhittakerLikeSolver`` class that would have +cluttered the class implementation. + +""" + +### Imports ### + +from math import ceil, exp +from typing import Callable, Tuple + +from scipy.optimize import OptimizeResult, brute, minimize_scalar + +from chemotools.utils.models import WhittakerSmoothLambda + +### Constants ### + +_LN_TEN: float = 2.302585092994046 # ln(10) +_half_log_decade: float = 0.5 * _LN_TEN +_X_ABS_LOG_TOL: float = 0.05 + +### Optimization Functions ### + + +def finish_lambda_optimization( + fun: Callable[..., float], + xmin: float, + args: Tuple, +) -> OptimizeResult: + """ + This function is used to finish the optimization of the penalty weight lambda + after the initial optimization has been performed with the ``brute`` method. + + It spans an interval of +- half a decade around the minimum found by the brute force + method and then performs a scalar optimization with the ``minimize_scalar`` method. + + """ + + # first, the bounds for the scalar optimization are set + bounds = (xmin - _half_log_decade, xmin + _half_log_decade) + + # now, the scalar optimization is performed + return minimize_scalar( + fun=fun, + bounds=bounds, + args=args, + method="bounded", + options={"xatol": _X_ABS_LOG_TOL}, + ) + + +def get_optimized_lambda( + fun: Callable[..., float], + lam: WhittakerSmoothLambda, + args: Tuple, +) -> float: + """ + This function optimizes the penalty weight lambda with the brute force method. + + """ + + # first, the number of steps is computed in a way that the step size is roughly + # half a decade + # if the bounds are at max one decade apart, the finish optimization can be run + # directly + log_low_bound, log_upp_bound = lam.log_auto_bounds + bound_log_diff = log_upp_bound - log_low_bound + if bound_log_diff <= _LN_TEN: + return minimize_scalar( + fun=fun, + bounds=(log_low_bound, log_upp_bound), + args=args, + method="bounded", + options={"xatol": _X_ABS_LOG_TOL}, + ).x + + # otherwise, the number of steps is computed ... + n_steps = 1 + ceil(bound_log_diff / _half_log_decade) + + # ...and the brute force optimization with final polish is performed + # NOTE: ``brute`` can work with floats internally and this is exploited here + # NOTE: since the optimization is carried out over the log of lambda, the + # exponential of the result is returned + return exp( + brute( # type: ignore + func=fun, + ranges=(lam.log_auto_bounds,), + Ns=n_steps, + args=args, + finish=finish_lambda_optimization, + full_output=False, + ) + ) diff --git a/chemotools/utils/whittaker_base/initialisation.py b/chemotools/utils/whittaker_base/initialisation.py new file mode 100644 index 00000000..d6eee0ad --- /dev/null +++ b/chemotools/utils/whittaker_base/initialisation.py @@ -0,0 +1,190 @@ +""" +This submodule contains the utility functions used at the initialisation of the +``WhittakerLikeSolver`` class that would have cluttered the class implementation. + +""" + +### Imports ### + +from typing import Any, Tuple, Type, Union + +import numpy as np + +from chemotools.utils import banded_linalg as bla +from chemotools.utils import finite_differences as fdiff +from chemotools.utils import models + +### Type Aliases ### + +_RealNumeric = Union[int, float] +_WhittakerSmoothLambdaPlain = Tuple[ + _RealNumeric, + _RealNumeric, + models.WhittakerSmoothMethods, +] +_LambdaSpecs = Union[ + _RealNumeric, + _WhittakerSmoothLambdaPlain, + models.WhittakerSmoothLambda, +] + +### Constants ### + +_RealNumericTypes = (int, float) + +### Functions ### + + +def get_checked_lambda(lam: Any) -> models.WhittakerSmoothLambda: + """ + Checks the penalty weights lambda and casts it to the respective dataclass used + inside the ``WhittakerLikeSolver`` class. + + """ + + # if lambda is already the correct dataclass, it can be returned directly since all + # the checks have already been performed + if isinstance(lam, models.WhittakerSmoothLambda): + return lam + + # now, there are other cases to check + # Case 1: lambda is a single number + if isinstance(lam, _RealNumericTypes): + return models.WhittakerSmoothLambda( + bounds=lam, method=models.WhittakerSmoothMethods.FIXED + ) + + # Case 2: lambda is a tuple + if isinstance(lam, tuple): + # if the tuple has the wrong length, an error is raised + if len(lam) != 3: + raise ValueError( + f"\nThe lambda parameter must be a tuple of three elements (lower " + f"bound, upper bound, method), but it has {len(lam)} elements " + f"instead." + ) + + # otherwise, the tuple is unpacked and the dataclass is created + return models.WhittakerSmoothLambda( + bounds=(lam[0], lam[1]), + method=lam[2], + ) + + # Case 3: lambda is not a valid type + raise TypeError( + f"\nThe lambda parameter must be an integer, a float, a tuple of (lower bound, " + f"upper bound, method), or an instance of WhittakerSmoothLambda, but it is " + f"{type(lam)} instead." + ) + + +def get_squ_fw_diff_mat_banded( + n_data: int, + differences: int, + orig_first: bool, + dtype: Type, +) -> Tuple[bla.LAndUBandCounts, np.ndarray]: + """ + Returns the squared forward finite difference penalty matrix ``D.T @ D`` or its + "flipped" counterpart ``D @ D.T`` in the banded storage format used for LAPACK's + banded LU decomposition. + + """ + + # the squared forward finite difference matrix D.T @ D or D @ D.T is generated ... + # NOTE: the matrix is returned with integer entries because integer computations + # can be carried out at maximum precision; this has to be converted to + # double precision for the LU decomposition + penalty_mat_banded = fdiff.gen_squ_fw_fin_diff_mat_cho_banded( + n_data=n_data, + differences=differences, + orig_first=orig_first, + ).astype(dtype) + + # ... and cast to the banded storage format for LAPACK's LU decomposition + return bla.conv_upper_chol_banded_to_lu_banded_storage(ab=penalty_mat_banded) + + +def get_flipped_fw_diff_kernel(differences: int, dtype: Type) -> np.ndarray: + """ + Returns the flipped forward finite difference kernel for the specified difference + order. + + """ + + return np.flip(fdiff.calc_forward_diff_kernel(differences=differences)).astype( + dtype + ) + + +def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> float: + """ + Computes the natural logarithm of the pseudo-determinant of the squared forward + finite differences matrix ``D.T @ D`` which is necessary for the calculation of + the log marginal likelihood for the automatic fitting of the penalty weight. + + Returns + ------- + log_pseudo_det : float + The natural logarithm of the pseudo-determinant of the penalty matrix. + + Raises + ------ + RuntimeError + If the pseudo-determinant of the penalty matrix is negative, thereby indicating + that the system is extremely ill-conditioned and the automatic fitting of the + penalty weight is not possible. + + Notes + ----- + Basically, this could be solved by evaluation of the eigenvalues of ``D.T @ D`` with + a banded eigensolver, but this is computationally expensive and not necessary (the + function is tested against this though). + The pseudo-determinant of ``D.T @ D`` is the determinant of ``D @ D.T`` because + ``D.T @ D`` is rank-deficient with ``differences`` zero eigenvalues while + ``D @ D.T`` has full rank. + Since both matrices share the same non-zero eigenvalues, the pseudo-determinant is + easily computed as the determinant of ``D @ D.T`` via a partially pivoted LU + decomposition. + + Throughout this function, the matrix ``D.T @ D`` is referred to as the "flipped + penalty matrix" even though it is not actually flipped. + + """ + + # the flipped penalty matrix D @ D.T is computed + _, flipped_penalty_matb = get_squ_fw_diff_mat_banded( + n_data=n_data, + differences=differences, + orig_first=True, + dtype=dtype, + ) + + # the pseudo-determinant is computed from the partially pivoted LU decomposition + # of the flipped penalty matrix + flipped_l_and_u, flipped_penalty_matb = ( + bla.conv_upper_chol_banded_to_lu_banded_storage(ab=flipped_penalty_matb) + ) + log_pseudo_det_sign, log_pseudo_det = bla.slogdet_lu_banded( + lub_factorization=bla.lu_banded( + l_and_u=flipped_l_and_u, + ab=flipped_penalty_matb, + check_finite=False, + ), + ) + + # if the sign of the pseudo-determinant is positive, the log pseudo-determinant + # is returned + if log_pseudo_det_sign > 0.0: + return log_pseudo_det + + # otherwise, if is negative, the penalty matrix is extremely ill-conditioned and + # the automatic fitting of the penalty weight is not possible + raise RuntimeError( + f"\nThe pseudo-determinant of the penalty D.T @ D matrix is negative, " + f"indicating that the system is extremely ill-conditioned.\n" + f"Automatic fitting for {n_data} data points and difference order " + f"{differences} is not possible.\n" + f"Please consider reducing the number of data points to smooth by, e.g., " + f"binning or lowering the difference order." + ) diff --git a/chemotools/utils/whittaker_base.py b/chemotools/utils/whittaker_base/main.py similarity index 55% rename from chemotools/utils/whittaker_base.py rename to chemotools/utils/whittaker_base/main.py index 8f997172..94931b12 100644 --- a/chemotools/utils/whittaker_base.py +++ b/chemotools/utils/whittaker_base/main.py @@ -8,42 +8,23 @@ ### Imports ### -from math import ceil, exp -from typing import Generator, Optional, Tuple, Union +from math import exp +from typing import Optional, Union import numpy as np -from scipy.optimize import minimize_scalar from chemotools._runtime import PENTAPY_AVAILABLE -from chemotools.utils.banded_linalg import ( - LAndUBandCounts, - conv_upper_chol_banded_to_lu_banded_storage, - lu_banded, - lu_solve_banded, - slogdet_lu_banded, -) -from chemotools.utils.finite_differences import ( - calc_forward_diff_kernel, - gen_squ_fw_fin_diff_mat_cho_banded, -) -from chemotools.utils.models import ( - BandedLUFactorization, - BandedPentapyFactorization, - BandedSolvers, - WhittakerSmoothLambda, - WhittakerSmoothMethods, -) - -if PENTAPY_AVAILABLE: - import pentapy as pp +from chemotools.utils import models +from chemotools.utils.banded_linalg import LAndUBandCounts, slogdet_lu_banded +from chemotools.utils.whittaker_base import auto_lambda as auto +from chemotools.utils.whittaker_base import initialisation as init +from chemotools.utils.whittaker_base import solvers +from chemotools.utils.whittaker_base.misc import get_weight_generator ### Type Aliases ### -_Factorization = Union[BandedLUFactorization, BandedPentapyFactorization] -_FactorizationForLogMarginalLikelihood = BandedLUFactorization -_WhittakerSmoothLambdaPlain = Tuple[ - Union[int, float], Union[int, float], WhittakerSmoothMethods -] +_Factorization = Union[models.BandedLUFactorization, models.BandedPentapyFactorization] +_FactorizationForLogMarginalLikelihood = models.BandedLUFactorization ### Class Implementation ### @@ -81,7 +62,7 @@ class WhittakerLikeSolver: required for the automatic fitting of the lambda parameter by maximizing the log marginal likelihood, i.e., when ``lam_ == WhittakerSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. Flipping is required due to NumPy's definition of convolution. - _penalty_matb_ : ndarray of shape (n_data - differences + 1, n_data - differences + 1) + _penalty_mat_banded_ : ndarray of shape (n_data - differences + 1, n_data - differences + 1) The squared forward finite differences matrix ``D.T @ D`` stored in the banded storage format used for LAPACK's banded LU decomposition. _penalty_mat_log_pseudo_det_ : float @@ -113,92 +94,16 @@ class WhittakerLikeSolver: __allow_pentapy: bool = True __zero_weight_tol: float = 1e-10 - def __init__( - self, - ) -> None: + def __init__(self) -> None: # pragma: no cover pass ### Initialization and Setup Methods ### - def _calc_penalty_log_pseudo_det(self) -> float: - """ - Computes the natural logarithm of the pseudo-determinant of the squared forward - finite differences matrix ``D.T @ D`` which is necessary for the calculation of - the log marginal likelihood for the automatic fitting of the penalty weight. - - Returns - ------- - log_pseudo_det : float - The natural logarithm of the pseudo-determinant of the penalty matrix. - - Raises - ------ - RuntimeError - If the pseudo-determinant of the penalty matrix is negative, thereby - indicating that the system is extremely ill-conditioned and the automatic - fitting of the penalty weight is not possible. - - Notes - ----- - Basically, this could be solved by evaluation of the eigenvalues of ``D.T @ D`` - with a banded eigensolver, but this is computationally expensive and not - necessary. - The pseudo-determinant of ``D.T @ D`` is the determinant of ``D @ D.T`` because - ``D.T @ D`` is rank-deficient with ``differences`` zero eigenvalues while - ``D @ D.T`` has full rank. - Since both matrices share the same non-zero eigenvalues, the pseudo-determinant - is easily computed as the determinant of ``D @ D.T`` via a partially pivoted - LU decomposition. - - Throughout this method, the matrix ``D.T @ D`` is referred to as the "flipped - penalty matrix" even though it is not actually flipped. - - """ - - # the flipped penalty matrix D @ D.T is computed - # NOTE: the matrix is returned with integer entries because integer computations - # can be carried out at maximum precision; this has to be converted to - # double precision for the LU decomposition - flipped_penalty_matb = gen_squ_fw_fin_diff_mat_cho_banded( - n_data=self.n_data_, - differences=self.differences_, - orig_first=True, - ).astype(self.__dtype) - - # the pseudo-determinant is computed from the partially pivoted LU decomposition - # of the flipped penalty matrix - flipped_l_and_u, flipped_penalty_matb = ( - conv_upper_chol_banded_to_lu_banded_storage(ab=flipped_penalty_matb) - ) - log_pseudo_det_sign, log_pseudo_det = slogdet_lu_banded( - lub_factorization=lu_banded( - l_and_u=flipped_l_and_u, - ab=flipped_penalty_matb, - check_finite=False, - ), - ) - - # if the sign of the pseudo-determinant is positive, the log pseudo-determinant - # is returned - if log_pseudo_det_sign > 0.0: - return log_pseudo_det - - # otherwise, if is negative, the penalty matrix is extremely ill-conditioned and - # the automatic fitting of the penalty weight is not possible - raise RuntimeError( - f"\nThe pseudo-determinant of the penalty D.T @ D matrix is negative, " - f"indicating that the system is extremely ill-conditioned.\n" - f"Automatic fitting for {self.n_data_} data points and difference order " - f"{self.differences_} is not possible.\n" - f"Please consider reducing the number of data points to smooth by, e.g., " - f"binning or lowering the difference order." - ) - def _setup_for_fit( self, n_data: int, differences: int, - lam: Union[int, float, _WhittakerSmoothLambdaPlain, WhittakerSmoothLambda], + lam: init._LambdaSpecs, ) -> None: """ Pre-computes everything that can be computed for the smoothing in general as @@ -211,48 +116,19 @@ def _setup_for_fit( # the input arguments are stored and validated self.n_data_: int = n_data self.differences_: int = differences + self._lam_inter_: models.WhittakerSmoothLambda = init.get_checked_lambda( + lam=lam + ) - self._lam_inter_: WhittakerSmoothLambda - if isinstance(lam, (int, float)): - self._lam_inter_ = WhittakerSmoothLambda( - bounds=lam, - method=WhittakerSmoothMethods.FIXED, - ) - elif isinstance(lam, WhittakerSmoothLambda): - self._lam_inter_ = lam - elif isinstance(lam, tuple): - if len(lam) != 3: - raise ValueError( - f"\nThe lambda parameter must be a tuple of three elements (lower " - f"bound, upper bound, method), but it has {len(lam)} elements " - f"instead." - ) - - self._lam_inter_ = WhittakerSmoothLambda( - bounds=(lam[0], lam[1]), - method=lam[2], - ) - else: - raise TypeError( - f"\nThe lambda parameter must be an integer, a float, a tuple of " - f"(lower bound, upper bound, method), or an instance of " - f"WhittakerSmoothLambda, but it is {type(lam)} instead." - ) - - # the squared forward finite difference matrix D.T @ D is computed ... - # NOTE: the matrix is returned with integer entries because integer computations - # can be carried out at maximum precision; this has to be converted to - # double precision for the LU decomposition + # the squared forward finite difference matrix D.T @ D is computed in band + # storage format for LAPACK's banded LU decomposition self._l_and_u_: LAndUBandCounts - self._penalty_matb_: np.ndarray = gen_squ_fw_fin_diff_mat_cho_banded( + self._penalty_mat_banded_: np.ndarray + self._l_and_u_, self._penalty_mat_banded_ = init.get_squ_fw_diff_mat_banded( n_data=self.n_data_, differences=self.differences_, orig_first=False, - ).astype(self.__dtype) - - # ... and cast to the banded storage format for LAPACK's LU decomposition - self._l_and_u_, self._penalty_matb_ = ( - conv_upper_chol_banded_to_lu_banded_storage(ab=self._penalty_matb_) + dtype=self.__dtype, ) # if the penalty weight is fitted automatically by maximization of the @@ -261,14 +137,19 @@ def _setup_for_fit( self._diff_kernel_flipped_: np.ndarray = np.ndarray([], dtype=self.__dtype) self._penalty_mat_log_pseudo_det_: float = float("nan") if self._lam_inter_.fit_auto and self._lam_inter_.method_used in { - WhittakerSmoothMethods.LOGML, + models.WhittakerSmoothMethods.LOGML, }: # NOTE: the kernel is also returned with integer entries because integer # computations can be carried out at maximum precision - self._diff_kernel_flipped_ = np.flip( - calc_forward_diff_kernel(differences=self.differences_) - ).astype(self.__dtype) - self._penalty_mat_log_pseudo_det_ = self._calc_penalty_log_pseudo_det() + self._diff_kernel_flipped_ = init.get_flipped_fw_diff_kernel( + differences=self.differences_, + dtype=self.__dtype, + ) + self._penalty_mat_log_pseudo_det_ = init.get_penalty_log_pseudo_det( + n_data=self.n_data_, + differences=self.differences_, + dtype=self.__dtype, + ) # finally, Pentapy is enabled if available, the number of differences is 2, # and the lambda parameter is not fitted automatically @@ -281,172 +162,35 @@ def _setup_for_fit( ### Solver Methods ### - def _solve_pentapy(self, ab: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: - """ - Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` - with the ``pentapy`` package. This is the same as solving the linear system - ``A @ x = b`` where ``A = W + lam * D.T @ D`` and ``b = W @ b``. - - Notes - ----- - Pentapy does not (maybe yet) allow for 2D right-hand side matrices, so the - solution is computed for each column of ``bw`` separately. - - """ # noqa: E501 - - # for 1-dimensional right-hand side vectors, the solution is computed directly - if b_weighted.ndim == 1: - return pp.solve( - mat=ab, - rhs=b_weighted, - is_flat=True, - index_row_wise=False, - solver=1, - ) - - # for 2-dimensional right-hand side matrices, the solution is computed for each - # column separately - else: - # NOTE: the solutions are first written into the rows of the solution matrix - # because row-access is more efficient for C-contiguous arrays; - # afterwards, the solution matrix is transposed - solution = np.empty(shape=(b_weighted.shape[1], b_weighted.shape[0])) - for iter_j in range(0, b_weighted.shape[1]): - solution[iter_j, ::] = pp.solve( - mat=ab, - rhs=b_weighted[::, iter_j], - is_flat=True, - index_row_wise=False, - solver=1, - ) - - return solution.transpose() - - def _solve_pivoted_lu( - self, - ab: np.ndarray, - b_weighted: np.ndarray, - ) -> tuple[np.ndarray, BandedLUFactorization]: - """ - Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` - with the LU decomposition. This is the same as solving the linear system - ``A @ x = b`` where ``A = W + lam * D.T @ D`` and ``b = W @ b``. - - If the LU decomposition fails, a ``LinAlgError`` is raised which is fatal since - the next level of escalation would be using a QR-decomposition which is not - implemented (yet). - - """ # noqa: E501 - - lub_factorization = lu_banded( - l_and_u=self._l_and_u_, - ab=ab, - check_finite=False, - ) - return ( - lu_solve_banded( - lub_factorization=lub_factorization, - b=b_weighted, - check_finite=False, - overwrite_b=True, - ), - lub_factorization, - ) - + # TODO: implement solver that does not rely on normal equations def _solve( self, lam: float, b_weighted: np.ndarray, w: Union[float, np.ndarray], - ) -> tuple[np.ndarray, BandedSolvers, _Factorization]: + ) -> tuple[np.ndarray, models.BandedSolvers, _Factorization]: """ - Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` - where ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and - ``D`` is the finite difference matrix of order ``differences``. ``lam`` - represents the penalty weight for the smoothing. - For details on why the system is not formulated in a more efficient way, please - refer to the Notes section. - - Parameters - ---------- - lam : float - The penalty weight lambda to use for the smoothing. - b_weighted : ndarray of shape (m,) or (m, n) - The weighted right-hand side vector or matrix of the linear system of - equations given by ``W @ b``. - w : float or ndarray of shape (m,) - The weights to use for the linear system of equations given in terms of the - main diagonal of the weight matrix ``W``. - It can either be a vector of weights for each data point or a single - scalar - namely ``1.0`` - if no weights are provided. - - Returns - ------- - x : np.ndarray of shape (m,) - The solution vector of the linear system of equations. - decomposition_type : BandedSolveDecompositions - The type of decomposition used to solve the linear system of equations. - decomposition : BandedLUFactorization or BandedPentapyFactorization - The decomposition used to solve the linear system of equations which is - stored as a class instance specifying everything required to solve the - system with the ``decomposition_type`` used. - - Raises - ------ - RuntimeError - If all available solvers failed to solve the linear system of equations - which indicates a highly ill-conditioned system. - - Notes - ----- - It might seem more efficient to solve the linear system ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` - because this only requires a multiplication of ``m`` weights with the reciprocal - of the penalty weight whereas the multiplication with ``D.T @ D`` requires - roughly ``m * (1 + 2 * differences)`` multiplications with ``m`` as the number - of data points and ``differences`` as the difference order. On top of that, - ``m * differences`` multiplications - so roughly 50% - would be redundant given - that the penalty ``D.T @ D`` matrix is symmetric. - However, NumPy's scalar multiplication is so highly optimized that the - multiplication with ``D.T @ D`` without considering symmetry is almost as fast - as the multiplication with the diagonal matrix ``W``, especially when compared - to the computational load of the banded solvers. + Internal wrapper for the solver methods to solve the linear system of equations + for the Whittaker-like smoother. + It will first attempt to solve the system via the normal equations via either + a direct pentadiagonal solve or an LU decomposition of the banded normal + equations matrix. This is less numerically stable because the condition number + of the normal equations matrix is the square of the condition number of the + original system, but on the other hand, it can be way faster. + If this fails, it will fall back to the more numerically stable QR + decomposition (to be implemented). """ # noqa: E501 - # the banded storage format for the LAPACK LU decomposition is computed by - # scaling the penalty matrix with the penalty weight lambda and then adding the - # diagonal matrix with the weights - ab = lam * self._penalty_matb_ - ab[self.differences_, ::] += w - - # the linear system of equations is solved with the most efficient method - # Case 1: Pentapy can be used - if self._pentapy_enabled_: - x = self._solve_pentapy(ab=ab, b_weighted=b_weighted) - if np.isfinite(x).all(): - return ( - x, - BandedSolvers.PENTAPY, - BandedPentapyFactorization(), - ) - - # Case 2: LU decomposition (final fallback for pentapy) - try: - x, lub_factorization = self._solve_pivoted_lu(ab=ab, b_weighted=b_weighted) - return x, BandedSolvers.PIVOTED_LU, lub_factorization - - except np.linalg.LinAlgError: - available_solvers = f"{BandedSolvers.PIVOTED_LU}" - if self._pentapy_enabled_: - available_solvers = f"{BandedSolvers.PENTAPY}, {available_solvers}" - - raise RuntimeError( - f"\nAll available solvers ({available_solvers}) failed to solve the " - f"linear system of equations which indicates a highly ill-conditioned " - f"system.\n" - f"Please consider reducing the number of data points to smooth by, " - f"e.g., binning or lowering the difference order." - ) + return solvers.solve_normal_equations( + lam=lam, + differences=self.differences_, + l_and_u=self._l_and_u_, + penalty_mat_banded=self._penalty_mat_banded_, + b_weighted=b_weighted, + w=w, + pentapy_enabled=self._pentapy_enabled_, + ) ### Auxiliary Methods to prepare the data for the solver ### @@ -700,50 +444,16 @@ def _solve_single_b_auto_lam_lml( - self._penalty_mat_log_pseudo_det_ ) - # unless the search space spans less than 1 decade, i.e., ln(10) ~= 2.3, a grid - # search is carried out to shrink the search space for the final optimization; - # the grid is spanned with an integer number of steps of half a decade - log_low_bound, log_upp_bound = self._lam_inter_.log_auto_bounds - bound_log_diff = log_upp_bound - log_low_bound - if bound_log_diff > self.__LN_TEN: - half_decade = 0.5 * self.__LN_TEN - target_best = float("inf") - n_steps = 1 + ceil(bound_log_diff / half_decade) # - # NOTE: the following ensures that the upper bound is not exceeded - step_size = bound_log_diff / (n_steps - 1) - - # all the trial values are evaluated and the best one is stored - for trial in range(0, n_steps): - log_lam_curr = log_low_bound + trial * step_size - target_curr = self._marginal_likelihood_objective( - log_lam=log_lam_curr, - b=b, - w=w, - w_plus_penalty_plus_n_samples_term=w_plus_n_samples_term, - ) - - if target_curr < target_best: - log_lam_best = log_lam_curr - target_best = target_curr - - # then, the bounds for the final optimization are shrunk to plus/minus half - # a decade around the best trial value - # NOTE: the following ensures that the bounds are not violated - log_low_bound = max(log_lam_best - half_decade, log_low_bound) - log_upp_bound = min(log_lam_best + half_decade, log_upp_bound) - # the optimization of the log marginal likelihood is carried out - opt_res = minimize_scalar( + opt_lambda = auto.get_optimized_lambda( fun=self._marginal_likelihood_objective, - bounds=(log_low_bound, log_upp_bound), + lam=self._lam_inter_, args=(b, w, w_plus_n_samples_term), - method="bounded", - options={"xatol": 0.05}, ) # the optimal penalty weight lambda is returned together with the smoothed # series - return self._solve_single_b_fixed_lam(b=b, w=w, lam=exp(opt_res.x)) + return self._solve_single_b_fixed_lam(b=b, w=w, lam=opt_lambda) def _solve_multiple_b( self, @@ -783,30 +493,6 @@ def _solve_multiple_b( np.full(shape=(X.shape[0],), fill_value=self._lam_inter_.fixed_lambda), ) - def _get_weight_generator( - self, w: Optional[np.ndarray], n_series: int - ) -> Generator[Union[float, np.ndarray], None, None]: - """ - Generates a generator that yields the weights for each series in a series matrix - ``X``. - - """ - - # Case 1: No weights - if w is None: - for _ in range(n_series): - yield 1.0 - - # Case 2: 1D weights - elif w.ndim == 1: - for _ in range(n_series): - yield w - - # Case 3: 2D weights - elif w.ndim == 2: - for w_vect in w: - yield w_vect - ### Main Solver Entry Point ### def _whittaker_solve( @@ -860,15 +546,15 @@ def _whittaker_solve( # first, the smoothing method is specified depending on whether the penalty # weight lambda is fitted automatically or not smooth_method_assignment = { - WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, - WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_lml, + models.WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, + models.WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_lml, } smooth_method = smooth_method_assignment[self._lam_inter_.method_used] # then, the solution is computed for each series by means of a loop X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) - w_gen = self._get_weight_generator(w=w, n_series=X.shape[0]) + w_gen = get_weight_generator(w=w, n_series=X.shape[0]) for iter_i, (x_vect, w_vect) in enumerate(zip(X, w_gen)): X_smooth[iter_i], lam[iter_i] = smooth_method(b=x_vect, w=w_vect) diff --git a/chemotools/utils/whittaker_base/misc.py b/chemotools/utils/whittaker_base/misc.py new file mode 100644 index 00000000..107bc636 --- /dev/null +++ b/chemotools/utils/whittaker_base/misc.py @@ -0,0 +1,39 @@ +""" +This submodule contains miscellaneous functions used by ``WhittakerLikeSolver`` class +that would have cluttered the class implementation. + +""" + +### Imports ### + +from typing import Generator, Optional, Union + +import numpy as np + +### Functions ### + + +def get_weight_generator( + w: Optional[np.ndarray], + n_series: int, +) -> Generator[Union[float, np.ndarray], None, None]: + """ + Generates a generator that yields the weights for each series in a series matrix + ``X``. + + """ + + # Case 1: No weights + if w is None: + for _ in range(n_series): + yield 1.0 + + # Case 2: 1D weights + elif w.ndim == 1: + for _ in range(n_series): + yield w + + # Case 3: 2D weights + elif w.ndim == 2: + for w_vect in w: + yield w_vect diff --git a/chemotools/utils/whittaker_base/solvers.py b/chemotools/utils/whittaker_base/solvers.py new file mode 100644 index 00000000..01987a1d --- /dev/null +++ b/chemotools/utils/whittaker_base/solvers.py @@ -0,0 +1,216 @@ +""" +This submodule contains the solver functions used by the ``WhittakerLikeSolver`` class +that would have cluttered the class implementation. + +""" + +### Imports ### + + +from typing import Union + +import numpy as np + +from chemotools._runtime import PENTAPY_AVAILABLE +from chemotools.utils import banded_linalg as bla +from chemotools.utils import models + +if PENTAPY_AVAILABLE: + import pentapy as pp + +### Type Aliases ### + +_Factorization = Union[models.BandedLUFactorization, models.BandedPentapyFactorization] + +### Functions ### + + +def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: + """ + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with the + ``pentapy`` package. This is the same as solving the linear system ``A @ x = b`` + where ``A = W + lam * D.T @ D`` and ``b = W @ b``. + + Notes + ----- + Pentapy does not (maybe yet) allow for 2D right-hand side matrices, so the + solution is computed for each column of ``bw`` separately. + + """ + + # for 1-dimensional right-hand side vectors, the solution is computed directly + if b_weighted.ndim == 1: + return pp.solve( + mat=a_banded, + rhs=b_weighted, + is_flat=True, + index_row_wise=False, + solver=1, + ) + + # for 2-dimensional right-hand side matrices, the solution is computed for each + # column separately + else: + # NOTE: the solutions are first written into the rows of the solution matrix + # because row-access is more efficient for C-contiguous arrays; + # afterwards, the solution matrix is transposed + solution = np.empty(shape=(b_weighted.shape[1], b_weighted.shape[0])) + for iter_j in range(0, b_weighted.shape[1]): + solution[iter_j, ::] = pp.solve( + mat=a_banded, + rhs=b_weighted[::, iter_j], + is_flat=True, + index_row_wise=False, + solver=1, + ) + + return solution.transpose() + + +def solve_ppivoted_lu( + l_and_u: bla.LAndUBandCounts, + a_banded: np.ndarray, + b_weighted: np.ndarray, +) -> tuple[np.ndarray, models.BandedLUFactorization]: + """ + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with a + partially pivoted LU decomposition. This is the same as solving the linear system + ``A @ x = b`` where ``A = W + lam * D.T @ D`` and ``b = W @ b``. + + If the LU decomposition fails, a ``LinAlgError`` is raised which is fatal since + the next level of escalation would be using a QR-decomposition which is not + implemented (yet). + + """ + + lub_factorization = bla.lu_banded( + l_and_u=l_and_u, + ab=a_banded, + check_finite=False, + ) + return ( + bla.lu_solve_banded( + lub_factorization=lub_factorization, + b=b_weighted, + check_finite=False, + overwrite_b=True, + ), + lub_factorization, + ) + + +def solve_normal_equations( + lam: float, + differences: int, + l_and_u: bla.LAndUBandCounts, + penalty_mat_banded: np.ndarray, + b_weighted: np.ndarray, + w: Union[float, np.ndarray], + pentapy_enabled: bool, +) -> tuple[np.ndarray, models.BandedSolvers, _Factorization]: + """ + Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` where + ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and ``D`` is + the finite difference matrix of order ``differences``. ``lam`` represents the + penalty weight for the smoothing. + For details on why the system is not formulated in a more efficient way, please + refer to the Notes section. + + Parameters + ---------- + lam : float + The penalty weight lambda to use for the smoothing. + differences : int + The order of the finite differences to use for the smoothing. + l_and_u : LAndUBandCounts + The number of sub- and super-diagonals of ``penalty_mat_banded``. + penalty_mat_banded : ndarray of shape (2 * differences + 1, m) + The penalty matrix ``D.T @ D`` in the banded storage format used for LAPACK's + banded LU decomposition. + b_weighted : ndarray of shape (m,) or (m, n) + The weighted right-hand side vector or matrix of the linear system of equations + given by ``W @ b``. + w : float or ndarray of shape (m,) + The weights to use for the linear system of equations given in terms of the main + diagonal of the weight matrix ``W``. + It can either be a vector of weights for each data point or a single scalar - + namely ``1.0`` - if no weights are provided. + pentapy_enabled : bool + Determines whether the ``pentapy`` solver is enabled (``True``) or not + (``False``). + + Returns + ------- + x : np.ndarray of shape (m,) + The solution vector of the linear system of equations. + decomposition_type : BandedSolveDecompositions + The type of decomposition used to solve the linear system of equations. + decomposition : BandedLUFactorization or BandedPentapyFactorization + The decomposition used to solve the linear system of equations which is stored + as a class instance specifying everything required to solve the system with + the ``decomposition_type`` used. + + Raises + ------ + RuntimeError + If all available solvers failed to solve the linear system of equations which + indicates a highly ill-conditioned system. + + Notes + ----- + It might seem more efficient to solve the linear system ``((1.0 / lam) * W + D.T @ D) @ x = (1.0 / lam) * W @ b`` + because this only requires a multiplication of ``m`` weights with the reciprocal of + the penalty weight whereas the multiplication with ``D.T @ D`` requires roughly + ``m * (1 + 2 * differences)`` multiplications with ``m`` as the number of data + points and ``differences`` as the difference order. On top of that, ``m * differences`` + multiplications - so roughly 50% - would be redundant given that the penalty + ``D.T @ D`` matrix is symmetric. + However, NumPy's scalar multiplication is so highly optimized that the + multiplication with ``D.T @ D`` without considering symmetry is almost as fast as + the multiplication with the diagonal matrix ``W``, especially when compared to the + computational load of the banded solvers. + + """ # noqa: E501 + + # the banded storage format for the LAPACK LU decomposition is computed by + # scaling the penalty matrix with the penalty weight lambda and then adding the + # diagonal matrix with the weights + a_banded = lam * penalty_mat_banded + a_banded[differences, ::] += w + + # the linear system of equations is solved with the most efficient method + # Case 1: Pentapy can be used + if pentapy_enabled: + x = solve_pentapy(a_banded=a_banded, b_weighted=b_weighted) + if np.isfinite(x).all(): + return ( + x, + models.BandedSolvers.PENTAPY, + models.BandedPentapyFactorization(), + ) + + # Case 2: LU decomposition (final fallback for pentapy) + try: + x, lub_factorization = solve_ppivoted_lu( + l_and_u=l_and_u, + a_banded=a_banded, + b_weighted=b_weighted, + ) + return ( + x, + models.BandedSolvers.PIVOTED_LU, + lub_factorization, + ) + + except np.linalg.LinAlgError: + available_solvers = f"{models.BandedSolvers.PIVOTED_LU}" + if pentapy_enabled: + available_solvers = f"{models.BandedSolvers.PENTAPY}, {available_solvers}" + + raise RuntimeError( + f"\nAll available solvers ({available_solvers}) failed to solve the " + f"linear system of equations which indicates a highly ill-conditioned " + f"system.\n" + f"Please consider reducing the number of data points to smooth by, " + f"e.g., binning or lowering the difference order." + ) From a3b5af65da8b33bf89d7c428d8b245ff9f11073c Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 16:42:27 +0200 Subject: [PATCH 057/118] refactor: split up whittaker base to make it more modular, flexible, future-suited, and readable - part II log marginal likelihood --- .../whittaker_base/auto_lambda/__init__.py | 11 +- .../utils/whittaker_base/auto_lambda/logml.py | 191 ++++++++++++++++++ .../whittaker_base/auto_lambda/shared.py | 39 ++++ chemotools/utils/whittaker_base/main.py | 166 ++------------- 4 files changed, 253 insertions(+), 154 deletions(-) create mode 100644 chemotools/utils/whittaker_base/auto_lambda/logml.py create mode 100644 chemotools/utils/whittaker_base/auto_lambda/shared.py diff --git a/chemotools/utils/whittaker_base/auto_lambda/__init__.py b/chemotools/utils/whittaker_base/auto_lambda/__init__.py index a33c62fe..bba327c5 100644 --- a/chemotools/utils/whittaker_base/auto_lambda/__init__.py +++ b/chemotools/utils/whittaker_base/auto_lambda/__init__.py @@ -7,6 +7,13 @@ class implementation. ### Imports ### -from chemotools.utils.whittaker_base.auto_lambda.optimization import ( +from chemotools.utils.whittaker_base.auto_lambda.logml import ( # noqa: F401 + get_log_marginal_likelihood, + get_log_marginal_likelihood_constant_term, +) +from chemotools.utils.whittaker_base.auto_lambda.optimization import ( # noqa: F401 get_optimized_lambda, -) # noqa: F401 +) +from chemotools.utils.whittaker_base.auto_lambda.shared import ( # noqa: F401 + _Factorization, +) diff --git a/chemotools/utils/whittaker_base/auto_lambda/logml.py b/chemotools/utils/whittaker_base/auto_lambda/logml.py new file mode 100644 index 00000000..79ffc407 --- /dev/null +++ b/chemotools/utils/whittaker_base/auto_lambda/logml.py @@ -0,0 +1,191 @@ +""" +This submodule contains the functions used for the automated fitting of the penalty +weight lambda within the ``WhittakerLikeSolver`` class based on the log marginal +likelihood that would have cluttered the class implementation. + +""" + +### Imports ### + +from typing import Union + +import numpy as np + +from chemotools.utils import banded_linalg as bla +from chemotools.utils import models +from chemotools.utils.whittaker_base.auto_lambda.shared import get_smooth_wrss + +### Constants ### + +_LN_TWO_PI: float = 1.8378770664093453 # ln(2 * pi) + +### Type Aliases ### + +# TODO: add QR factorization +_FactorizationForLogMarginalLikelihood = models.BandedLUFactorization + +### Functions ### + + +def get_log_marginal_likelihood_constant_term( + differences: int, + penalty_mat_log_pseudo_det: float, + w: np.ndarray, + zero_weight_tol: float, +) -> float: + """ + Computes the constant term of the log marginal likelihood for the automatic fitting + of the penalty weight lambda, i.e., + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(pseudo_det(D.T @ D))`` + + or better + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(det(D @ D.T))`` + + with: + + - ``ln`` as the natural logarithm, + - ``pseudo_det(A)`` as the pseudo-determinant of the matrix ``A``, i.e., the product + of its non-zero eigenvalues, + - ``det(A)`` as the determinant of the matrix ``A``, i.e., the product of its + eigenvalues, + - ``W`` as the diagonal matrix with the weights on the main diagonal, + - ``D.T @ D`` as the squared forward finite differences matrix, + - ``D @ D.T`` as the flipped squared forward finite differences matrix, + - ``d`` as the difference order used for the smoothing, and + - ``n^`` as the number of data points with non-zero weights in the series to smooth. + + It should be noted that ``pseudo_det(D.T @ D)`` is replaced by ``det(D @ D.T)`` here + because the latter is not rank-deficient. + + """ + + # first, the constant terms of the log marginal likelihood are computed starting + # from the log pseudo-determinant of the weight matrix, i.e., the product of the + # non-zero elements of the weight vector + nonzero_w_idxs = np.where(w > w.max() * zero_weight_tol)[0] + nnz_w = nonzero_w_idxs.size + log_pseudo_det_w = np.log(w[nonzero_w_idxs]).sum() + + # the constant term of the log marginal likelihood is computed + return ( + (nnz_w - differences) * _LN_TWO_PI + - log_pseudo_det_w + - penalty_mat_log_pseudo_det + ) + + +def get_log_marginal_likelihood( + factorization: _FactorizationForLogMarginalLikelihood, + log_lam: float, + lam: float, + differences: int, + diff_kernel_flipped: np.ndarray, + b: np.ndarray, + b_smooth: np.ndarray, + w: Union[float, np.ndarray], + w_plus_penalty_plus_n_samples_term: float, +) -> float: + """ + Computes the log marginal likelihood for the automatic fitting of the penalty + weight lambda. For the definitions used (and manipulated here), please refer to + the Notes section. + + Parameters + ---------- + factorization : BandedLUFactorization + The factorization of the matrix to solve the linear system of equations, + i.e., ``W + lambda * D.T @ D`` from the description above. + Currently, only partially pivoted banded LU decompositions can be used to + compute the log marginal likelihood. + log_lam : float + The natural logarithm of the penalty weight lambda used for the smoothing. + lam : float + The penalty weight lambda used for the smoothing, i.e., ``exp(log_lam)``. + differences : int + The order of the finite differences to use for the smoothing. + diff_kernel_flipped : ndarray of shape (differences + 1,) + The flipped forward finite differences kernel used for the smoothing. + b, b_smooth : ndarray of shape (m,) + The original series and its smoothed counterpart. + w : float or ndarray of shape (m,) + The weights to use for the smoothing. + w_plus_penalty_plus_n_samples_term : float + The last term of the log marginal likelihood that is constant since it + involves the weights, the penalty matrix, and the number of data points + which are all constant themselves (see the Notes for details). + + Notes + ----- + The log marginal likelihood is given by: + + ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - ln(pseudo_det(lambda * D.T @ D)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` + + or better + + ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - (n - d) * ln(lambda) - ln(det(D @ D.T)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` + + with: + + - ``wRSS`` as the weighted Sum of Squared Residuals between the original and the + smoothed series, + - ``PSS`` as the Penalty Sum of Squares which is given by the sum of the squared + elements of the ``d``-th order forward finite differences of the smoothed + series, + - ``lambda`` as the penalty weight used for the smoothing, + - ``d`` as the difference order used for the smoothing, + - ``ln`` as the natural logarithm, + - ``pseudo_det(A)`` as the pseudo-determinant of the matrix ``A``, i.e., the + product of its non-zero eigenvalues, + - ``det(A)`` as the determinant of the matrix ``A``, i.e., the product of its + eigenvalues, + - ``W`` as the diagonal matrix with the weights on the main diagonal, + - ``D.T @ D`` as the squared forward finite differences matrix, + - ``D @ D.T`` as the flipped squared forward finite differences matrix, + - ``n`` is the number of data points in the series to smooth, and + - ``n^`` is the number of data points with non-zero weights in the series to + smooth. + + It should be noted that ``pseudo_det(D.T @ D)`` is replaced by ``det(D @ D.T)`` + here because the latter is not rank-deficient. + + """ # noqa: E501 + + # first, the weighted Sum of Squared Residuals is computed ... + wrss = get_smooth_wrss(b=b, b_smooth=b_smooth, w=w) + # ... followed by the Penalty Sum of Squares which requires the squared forward + # finite differences of the smoothed series + # NOTE: ``np.convolve`` is used to compute the forward finite differences and + # since it flips the provided kernel, an already flipped kernel is used + pss = ( + lam * np.square(np.convolve(b_smooth, diff_kernel_flipped, mode="valid")).sum() + ) + + # besides the determinant of the combined left hand side matrix has to be + # computed from its decomposition + lhs_logdet_sign, lhs_logabsdet = bla.slogdet_lu_banded( + lub_factorization=factorization, + ) + + # if the sign of the determinant is positive, the log marginal likelihood is + # computed and returned + if lhs_logdet_sign > 0.0: + return -0.5 * ( + wrss + + pss + - (b.size - differences) * log_lam + + lhs_logabsdet + + w_plus_penalty_plus_n_samples_term + ) + + # otherwise, if the determinant is negative, the system is extremely + # ill-conditioned and the log marginal likelihood cannot be computed + raise RuntimeError( + "\nThe determinant of the combined left hand side matrix " + "W + lambda * D.T @ D is negative, indicating that the system is extremely " + "ill-conditioned.\n" + "The log marginal likelihood cannot be computed.\n" + "Please consider reducing the number of data points to smooth by, e.g., " + "binning or lowering the difference order." + ) diff --git a/chemotools/utils/whittaker_base/auto_lambda/shared.py b/chemotools/utils/whittaker_base/auto_lambda/shared.py new file mode 100644 index 00000000..db1cf0ac --- /dev/null +++ b/chemotools/utils/whittaker_base/auto_lambda/shared.py @@ -0,0 +1,39 @@ +""" +This submodule contains the shared logics when it comes to the automated fitting of the +penalty weight lambda within the ``WhittakerLikeSolver`` class that would have cluttered +the class implementation. + +""" + +### Imports ### + +from typing import Union + +import numpy as np + +from chemotools.utils import models + +### Type Aliases ### + +_Factorization = Union[models.BandedLUFactorization, models.BandedPentapyFactorization] + +### Functions ### + + +def get_smooth_wrss( + b: np.ndarray, + b_smooth: np.ndarray, + w: Union[float, np.ndarray], +) -> float: + """ + Computes the (weighted) Sum of Squared Residuals (w)RSS between the original and + the smoothed series. + + """ + + # Case 1: no weights are provided + if isinstance(w, float): + return np.square(b - b_smooth).sum() + + # Case 2: weights are provided + return (w * np.square(b - b_smooth)).sum() diff --git a/chemotools/utils/whittaker_base/main.py b/chemotools/utils/whittaker_base/main.py index 94931b12..58122f54 100644 --- a/chemotools/utils/whittaker_base/main.py +++ b/chemotools/utils/whittaker_base/main.py @@ -15,18 +15,12 @@ from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.utils import models -from chemotools.utils.banded_linalg import LAndUBandCounts, slogdet_lu_banded +from chemotools.utils.banded_linalg import LAndUBandCounts from chemotools.utils.whittaker_base import auto_lambda as auto from chemotools.utils.whittaker_base import initialisation as init from chemotools.utils.whittaker_base import solvers from chemotools.utils.whittaker_base.misc import get_weight_generator -### Type Aliases ### - -_Factorization = Union[models.BandedLUFactorization, models.BandedPentapyFactorization] -_FactorizationForLogMarginalLikelihood = models.BandedLUFactorization - - ### Class Implementation ### @@ -88,8 +82,6 @@ class WhittakerLikeSolver: """ # noqa: E501 - __LN_TWO_PI: float = 1.8378770664093453 - __LN_TEN: float = 2.302585092994046 __dtype: type = np.float64 __allow_pentapy: bool = True __zero_weight_tol: float = 1e-10 @@ -168,7 +160,7 @@ def _solve( lam: float, b_weighted: np.ndarray, w: Union[float, np.ndarray], - ) -> tuple[np.ndarray, models.BandedSolvers, _Factorization]: + ) -> tuple[np.ndarray, models.BandedSolvers, auto._Factorization]: """ Internal wrapper for the solver methods to solve the linear system of equations for the Whittaker-like smoother. @@ -192,133 +184,7 @@ def _solve( pentapy_enabled=self._pentapy_enabled_, ) - ### Auxiliary Methods to prepare the data for the solver ### - - def calc_wrss( - self, b: np.ndarray, b_smooth: np.ndarray, w: Union[float, np.ndarray] - ) -> float: - """ - Computes the (weighted) Sum of Squared Residuals (w)RSS between the original and - the smoothed series. - - """ - - # Case 1: no weights are provided - if isinstance(w, float): - return np.square(b - b_smooth).sum() - - # Case 2: weights are provided - return (w * np.square(b - b_smooth)).sum() - - def _calc_log_marginal_likelihood( - self, - factorization: _FactorizationForLogMarginalLikelihood, - log_lam: float, - lam: float, - b: np.ndarray, - b_smooth: np.ndarray, - w: Union[float, np.ndarray], - w_plus_penalty_plus_n_samples_term: float, - ) -> float: - """ - Computes the log marginal likelihood for the automatic fitting of the penalty - weight lambda. For the definitions used (and manipulated here), please refer to - the Notes section. - - Parameters - ---------- - factorization : BandedLUFactorization - The factorization of the matrix to solve the linear system of equations, - i.e., ``W + lambda * D.T @ D`` from the description above. - Currently, only partially pivoted banded LU decompositions can be used to - compute the log marginal likelihood. - log_lam : float - The natural logarithm of the penalty weight lambda used for the smoothing. - lam : float - The penalty weight lambda used for the smoothing, i.e., ``exp(log_lam)``. - b, b_smooth : ndarray of shape (m,) - The original series and its smoothed counterpart. - w : float or ndarray of shape (m,) - The weights to use for the smoothing. - w_plus_penalty_plus_n_samples_term : float - The last term of the log marginal likelihood that is constant since it - involves the weights, the penalty matrix, and the number of data points - which are all constant themselves (see the Notes for details). - - Notes - ----- - The log marginal likelihood is given by: - - ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - ln(pseudo_det(lambda * D.T @ D)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` - - or better - - ``-0.5 * [wRSS + lambda * PSS - ln(pseudo_det(W)) - (n - d) * ln(lambda) - ln(det(D @ D.T)) + ln(det(W + lambda * D.T @ D)) + (n^ - d) * ln(2 * pi)]`` - - where: - - - ``wRSS`` is the weighted Sum of Squared Residuals between the original and the - smoothed series, - - ``PSS`` is the Penalty Sum of Squares which is given by the sum of the squared - elements of the ``d``-th order forward finite differences of the smoothed - series, - - ``d`` is the difference order used for the smoothing. - - ``ln`` as the natural logarithm, - - ``pseudo_det(A)`` is the pseudo-determinant of the matrix ``A``, i.e., the - product of its non-zero eigenvalues, - - ``det(A)`` is the determinant of the matrix ``A``, i.e., the product of its - eigenvalues, - - ``W`` is the diagonal matrix with the weights on the main diagonal, - - ``D.T @ D`` is the squared forward finite differences matrix, and - - ``n`` is the number of data points in the series to smooth, - - ``n^`` is the number of data points with non-zero weights in the series to - smooth. - - It should be noted that ``pseudo_det(D.T @ D)`` is replaced by ``det(D @ D.T)`` - here because the latter is not rank-deficient. - - """ # noqa: E501 - - # first, the weighted Sum of Squared Residuals is computed ... - wrss = self.calc_wrss(b=b, b_smooth=b_smooth, w=w) - # ... followed by the Penalty Sum of Squares which requires the squared forward - # finite differences of the smoothed series - # NOTE: ``np.convolve`` is used to compute the forward finite differences and - # since it flips the provided kernel, an already flipped kernel is used - pss = ( - lam - * np.square( - np.convolve(b_smooth, self._diff_kernel_flipped_, mode="valid") - ).sum() - ) - - # besides the determinant of the combined left hand side matrix has to be - # computed from its decomposition - lhs_logdet_sign, lhs_logabsdet = slogdet_lu_banded( - lub_factorization=factorization, - ) - - # if the sign of the determinant is positive, the log marginal likelihood is - # computed and returned - if lhs_logdet_sign > 0.0: - return -0.5 * ( - wrss - + pss - - (b.size - self.differences_) * log_lam - + lhs_logabsdet - + w_plus_penalty_plus_n_samples_term - ) - - # otherwise, if the determinant is negative, the system is extremely - # ill-conditioned and the log marginal likelihood cannot be computed - raise RuntimeError( - "\nThe determinant of the combined left hand side matrix " - "W + lambda * D.T @ D is negative, indicating that the system is extremely " - "ill-conditioned.\n" - "The log marginal likelihood cannot be computed.\n" - "Please consider reducing the number of data points to smooth by, e.g., " - "binning or lowering the difference order." - ) + ### Auxiliary Methods for automated fitting of the penalty weight ### def _marginal_likelihood_objective( self, @@ -358,10 +224,12 @@ def _marginal_likelihood_objective( # finally, the log marginal likelihood is computed and returned (negative since # the objective function is minimized, but the log marginal likelihood is # to be maximized) - return (-1.0) * self._calc_log_marginal_likelihood( + return (-1.0) * auto.get_log_marginal_likelihood( factorization=factorization, # type: ignore log_lam=log_lam, lam=lam, + differences=self.differences_, + diff_kernel_flipped=self._diff_kernel_flipped_, b=b, b_smooth=b_smooth, w=w, @@ -409,7 +277,7 @@ def _solve_single_b_fixed_lam( lam, ) - def _solve_single_b_auto_lam_lml( + def _solve_single_b_auto_lam_logml( self, b: np.ndarray, w: Union[float, np.ndarray], @@ -430,18 +298,12 @@ def _solve_single_b_auto_lam_lml( "Please provide weights for the series to smooth." ) - # first, the constant terms of the log marginal likelihood are computed starting - # from the log pseudo-determinant of the weight matrix, i.e., the product of the - # non-zero elements of the weight vector - nonzero_w_idxs = np.where(w > w.max() * self.__zero_weight_tol)[0] - nnz_w = nonzero_w_idxs.size - log_pseudo_det_w = np.log(w[nonzero_w_idxs]).sum() - - # the constant term of the log marginal likelihood is computed - w_plus_n_samples_term = ( - (nnz_w - self.differences_) * self.__LN_TWO_PI - - log_pseudo_det_w - - self._penalty_mat_log_pseudo_det_ + # the term that is constant for the log marginal likelihood is computed + w_plus_n_samples_term = auto.get_log_marginal_likelihood_constant_term( + differences=self.differences_, + penalty_mat_log_pseudo_det=self._penalty_mat_log_pseudo_det_, + w=w, + zero_weight_tol=self.__zero_weight_tol, ) # the optimization of the log marginal likelihood is carried out @@ -547,7 +409,7 @@ def _whittaker_solve( # weight lambda is fitted automatically or not smooth_method_assignment = { models.WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, - models.WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_lml, + models.WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_logml, } smooth_method = smooth_method_assignment[self._lam_inter_.method_used] From 1c709d30bcf974bc0d5fb0a4b795471fd01c00e5 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 18:24:41 +0200 Subject: [PATCH 058/118] tests/refactor: simplified model tests --- tests/test_for_utils/test_models.py | 65 +++++++++------------------ tests/test_for_utils/utils.py | 68 +++++++++++++++++++++-------- 2 files changed, 70 insertions(+), 63 deletions(-) diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 51c21592..1f88bc2f 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -5,25 +5,25 @@ ### Imports ### -from typing import List, Tuple, Union +from typing import List, Tuple, Type, Union import numpy as np import pytest -from chemotools.utils.models import WhittakerSmoothLambda, WhittakerSmoothMethods -from tests.test_for_utils.utils import ExpectedWhittakerSmoothLambda, float_is_bit_equal +from chemotools.utils import models +from tests.test_for_utils.utils import ExpectedWhittakerSmoothLambda ### Type aliases ### _Numeric = Union[float, int] _LambdaValueNumeric = Union[_Numeric, Tuple[_Numeric, _Numeric]] _LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] -_WhittakerMethod = Union[str, WhittakerSmoothMethods] +_WhittakerMethod = Union[str, models.WhittakerSmoothMethods] _WhittakerMethodSequence = List[_WhittakerMethod] _ExpectedLambdaResult = Union[ ExpectedWhittakerSmoothLambda, - ValueError, - TypeError, + Type[ValueError], + Type[TypeError], ] _LambdaTestCombination = Tuple[ _LambdaValueNumericOrFlawed, @@ -31,16 +31,16 @@ _ExpectedLambdaResult, ] -### Global constants ### +### Constants ### _NAN: float = float("nan") _FIXED_WHITTAKER_METHODS: _WhittakerMethodSequence = [ "fixed", - WhittakerSmoothMethods.FIXED, + models.WhittakerSmoothMethods.FIXED, ] _LOGML_WHITTAKER_METHODS: _WhittakerMethodSequence = [ "logml", - WhittakerSmoothMethods.LOGML, + models.WhittakerSmoothMethods.LOGML, ] # NOTE: "aauto" is not a typo, but helps to not confuse it with "all" _aauto_whittaker_methods: _WhittakerMethodSequence = _LOGML_WHITTAKER_METHODS + [] @@ -62,7 +62,7 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=WhittakerSmoothMethods.FIXED, + method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -73,7 +73,7 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=WhittakerSmoothMethods.FIXED, + method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -84,7 +84,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=WhittakerSmoothMethods.FIXED, + method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -95,7 +95,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=WhittakerSmoothMethods.FIXED, + method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -106,7 +106,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=WhittakerSmoothMethods.FIXED, + method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -117,7 +117,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=WhittakerSmoothMethods.FIXED, + method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -128,7 +128,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=WhittakerSmoothMethods.FIXED, + method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -139,7 +139,7 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=WhittakerSmoothMethods.LOGML, + method_used=models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(np.log(100.0), np.log(10_000.0)), ), ), @@ -150,7 +150,7 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=WhittakerSmoothMethods.LOGML, + method_used=models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(np.log(100.0), np.log(10_000.0)), ), ), @@ -211,7 +211,7 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N if not isinstance(expected_result, ExpectedWhittakerSmoothLambda): for meth in methods: with pytest.raises(expected_result): # type: ignore - WhittakerSmoothLambda( + models.WhittakerSmoothLambda( bounds=lambda_value, # type: ignore method=meth, # type: ignore ) @@ -221,32 +221,9 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N # if the expected result is a valid result, the class is instantiated and the # attributes are tested for meth in methods: - lambda_model = WhittakerSmoothLambda( + lambda_model = models.WhittakerSmoothLambda( bounds=lambda_value, # type: ignore method=meth, # type: ignore ) - assert lambda_model.fit_auto == expected_result.fit_auto - assert lambda_model.method_used == expected_result.method_used - # NOTE: since NAN-values are used, the comparison is split into two parts for - # the fixed lambda value and each of the bounds - assert float_is_bit_equal( - value=lambda_model.fixed_lambda, - reference=expected_result.fixed_lambda, - ) - assert float_is_bit_equal( - value=lambda_model.auto_bounds[0], - reference=expected_result.auto_bounds[0], - ) - assert float_is_bit_equal( - value=lambda_model.auto_bounds[1], - reference=expected_result.auto_bounds[1], - ) - assert float_is_bit_equal( - value=lambda_model.log_auto_bounds[0], - reference=expected_result.log_auto_bounds[0], - ) - assert float_is_bit_equal( - value=lambda_model.log_auto_bounds[1], - reference=expected_result.log_auto_bounds[1], - ) + expected_result.assert_is_equal_to(other=lambda_model) diff --git a/tests/test_for_utils/utils.py b/tests/test_for_utils/utils.py index 85d20896..10ea645a 100644 --- a/tests/test_for_utils/utils.py +++ b/tests/test_for_utils/utils.py @@ -19,25 +19,7 @@ from scipy.sparse import csr_matrix from scipy.sparse import diags as sp_diags -from chemotools.utils.models import WhittakerSmoothMethods - -### Dataclasses ### - - -@dataclass -class ExpectedWhittakerSmoothLambda: - """ - Dataclass for checking the expected results for the class :class:`WhittakerSmoothLambda` - from the module :mod:`chemotools.utils.models`. - - """ # noqa: E501 - - fixed_lambda: float - auto_bounds: Tuple[float, float] - fit_auto: bool - method_used: WhittakerSmoothMethods - log_auto_bounds: Tuple[float, float] = (0.0, 0.0) - +from chemotools.utils import models ### Utility Functions ### @@ -773,6 +755,54 @@ def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: return sign, logabsdet +### Dataclasses ### + + +@dataclass +class ExpectedWhittakerSmoothLambda: + """ + Dataclass for checking the expected results for the class :class:`WhittakerSmoothLambda` + from the module :mod:`chemotools.utils.models`. + + """ # noqa: E501 + + fixed_lambda: float + auto_bounds: Tuple[float, float] + fit_auto: bool + method_used: models.WhittakerSmoothMethods + log_auto_bounds: Tuple[float, float] = (0.0, 0.0) + + def assert_is_equal_to(self, other: models.WhittakerSmoothLambda) -> None: + """ + Checks if the current instance is equal to another instance of the same class. + + """ + + assert other.fit_auto is self.fit_auto + assert other.method_used == self.method_used + # NOTE: since NAN-values are used, the comparison is split into two parts for + # the fixed lambda value and each of the bounds + assert float_is_bit_equal( + value=other.fixed_lambda, + reference=self.fixed_lambda, + ) + assert float_is_bit_equal( + value=other.auto_bounds[0], reference=self.auto_bounds[0] + ) + assert float_is_bit_equal( + value=other.auto_bounds[1], + reference=self.auto_bounds[1], + ) + assert float_is_bit_equal( + value=other.log_auto_bounds[0], + reference=self.log_auto_bounds[0], + ) + assert float_is_bit_equal( + value=other.log_auto_bounds[1], + reference=self.log_auto_bounds[1], + ) + + ### Doctests ### if __name__ == "__main__": # pragma: no cover From 63b12172aa7a1430be683677dec2add534923319 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 19:35:52 +0200 Subject: [PATCH 059/118] tests: added clearer comments to model tests --- tests/test_for_utils/test_models.py | 131 ++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 29 deletions(-) diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 1f88bc2f..006393e6 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -20,11 +20,7 @@ _LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] _WhittakerMethod = Union[str, models.WhittakerSmoothMethods] _WhittakerMethodSequence = List[_WhittakerMethod] -_ExpectedLambdaResult = Union[ - ExpectedWhittakerSmoothLambda, - Type[ValueError], - Type[TypeError], -] +_ExpectedLambdaResult = Union[ExpectedWhittakerSmoothLambda, Type[Exception]] _LambdaTestCombination = Tuple[ _LambdaValueNumericOrFlawed, _WhittakerMethodSequence, @@ -55,7 +51,7 @@ @pytest.mark.parametrize( "combination", [ - ( # Number 0 + ( # Number 0 (fixed float; fixed method) 100.0, _FIXED_WHITTAKER_METHODS, ExpectedWhittakerSmoothLambda( @@ -66,7 +62,18 @@ log_auto_bounds=(_NAN, _NAN), ), ), - ( # Number 1 + ( # Number 1 (fixed integer; fixed method) + 100, + _FIXED_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 2 (coinciding floats; fixed method) (100.0, 100.0), _FIXED_WHITTAKER_METHODS, ExpectedWhittakerSmoothLambda( @@ -77,20 +84,20 @@ log_auto_bounds=(_NAN, _NAN), ), ), - ( # Number 2 - (100.0, 100.000001), + ( # Number 3 (coinciding integers; fixed method) + (100, 100), _FIXED_WHITTAKER_METHODS, ExpectedWhittakerSmoothLambda( - fixed_lambda=100.000001, + fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, method_used=models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), - ( # Number 3 + ( # Number 4 (virtually coinciding floats; fixed method) (100.0, 100.000001), - _aauto_whittaker_methods, + _FIXED_WHITTAKER_METHODS, ExpectedWhittakerSmoothLambda( fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), @@ -99,9 +106,9 @@ log_auto_bounds=(_NAN, _NAN), ), ), - ( # Number 4 - (100.000001, 100.0), - _FIXED_WHITTAKER_METHODS, + ( # Number 5 (virtually coinciding floats; automated methods) + (100.0, 100.000001), + _aauto_whittaker_methods, ExpectedWhittakerSmoothLambda( fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), @@ -110,9 +117,9 @@ log_auto_bounds=(_NAN, _NAN), ), ), - ( # Number 5 + ( # Number 6 (flipped virtually coinciding floats; fixed method) (100.000001, 100.0), - _aauto_whittaker_methods, + _FIXED_WHITTAKER_METHODS, ExpectedWhittakerSmoothLambda( fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), @@ -121,9 +128,9 @@ log_auto_bounds=(_NAN, _NAN), ), ), - ( # Number 6 + ( # Number 7 (flipped virtually coinciding floats; automated methods) (100.000001, 100.0), - _FIXED_WHITTAKER_METHODS, + _aauto_whittaker_methods, ExpectedWhittakerSmoothLambda( fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), @@ -132,7 +139,7 @@ log_auto_bounds=(_NAN, _NAN), ), ), - ( # Number 7 + ( # Number 8 (search space floats; logml method) (100.0, 10_000.0), _LOGML_WHITTAKER_METHODS, ExpectedWhittakerSmoothLambda( @@ -143,7 +150,18 @@ log_auto_bounds=(np.log(100.0), np.log(10_000.0)), ), ), - ( # Number 8 + ( # Number 9 (search space integers; logml method) + (100, 10_000), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + ), + ), + ( # Number 10 (flipped search space floats; logml method) (10_000.0, 100.0), _LOGML_WHITTAKER_METHODS, ExpectedWhittakerSmoothLambda( @@ -154,46 +172,94 @@ log_auto_bounds=(np.log(100.0), np.log(10_000.0)), ), ), - ( # Number 9 + ( # Number 11 (flipped search space integers; logml method) + (10_000, 100), + _LOGML_WHITTAKER_METHODS, + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + ), + ), + ( # Number 12 (fixed zero float; fixed method) 0.0, _FIXED_WHITTAKER_METHODS, ValueError, ), - ( # Number 10 + ( # Number 13 (fixed zero integer; fixed method) + 0, + _FIXED_WHITTAKER_METHODS, + ValueError, + ), + ( # Number 14 (search space floats; fixed method) (100.0, 10_000.0), _FIXED_WHITTAKER_METHODS, ValueError, ), - ( # Number 11 + ( + # Number 15 (search space integers; fixed method) + (100, 10_000), + _FIXED_WHITTAKER_METHODS, + ValueError, + ), + ( # Number 16 (fixed float; automated method) 100.0, _aauto_whittaker_methods, ValueError, ), - ( # Number 12 + ( + # Number 17 (fixed integer; automated method) + 100, + _aauto_whittaker_methods, + ValueError, + ), + ( # Number 18 (search space floats with zero; all methods) (0.0, 100.0), _all_whittaker_methods, ValueError, ), - ( # Number 13 + ( # Number 19 (search space integers with zero; all methods) + (0, 100), + _all_whittaker_methods, + ValueError, + ), + ( # Number 20 (flipped search space floats with zero; all methods) (100.0, 0.0), _all_whittaker_methods, ValueError, ), - ( # Number 14 + ( # Number 21 (flipped search space integer with zero; all methods) + (100, 0), + _all_whittaker_methods, + ValueError, + ), + ( # Number 22 (all float zeros; all methods) (0.0, 0.0), _all_whittaker_methods, ValueError, ), - ( # Number 15 + ( # Number 23 (all float integers; all methods) + (0, 0), + _all_whittaker_methods, + ValueError, + ), + ( # Number 24 (wrong type; all methods) "error", _all_whittaker_methods, TypeError, ), - ( # Number 16 + ( # Number 25 (fixed float; wrong method) 100.0, "error", ValueError, ), + ( # Number 26 (fixed integer; wrong method) + 100, + "error", + ValueError, + ), ], ) def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> None: @@ -201,6 +267,13 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N Tests the class :class:`WhittakerSmoothLambda` for the correct behavior of its ``__post_init__`` method. + The ``combination`` parameter defines + + - the lambda value(s) to be used, + - the method(s) to be used, and + - the expected result(s) of the instantiation (will be an exception if the input + should be considered invalid by the dataclass). + """ # the combination is unpacked From 5ec64879d5dde35611be71d44541bedd31eac6b8 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 19:39:25 +0200 Subject: [PATCH 060/118] tests/refactor: removed `numpy` from model tests --- tests/test_for_utils/test_models.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 006393e6..20766590 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -5,9 +5,9 @@ ### Imports ### +from math import log from typing import List, Tuple, Type, Union -import numpy as np import pytest from chemotools.utils import models @@ -147,7 +147,7 @@ auto_bounds=(100.0, 10_000.0), fit_auto=True, method_used=models.WhittakerSmoothMethods.LOGML, - log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + log_auto_bounds=(log(100.0), log(10_000.0)), ), ), ( # Number 9 (search space integers; logml method) @@ -158,7 +158,7 @@ auto_bounds=(100.0, 10_000.0), fit_auto=True, method_used=models.WhittakerSmoothMethods.LOGML, - log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + log_auto_bounds=(log(100.0), log(10_000.0)), ), ), ( # Number 10 (flipped search space floats; logml method) @@ -169,7 +169,7 @@ auto_bounds=(100.0, 10_000.0), fit_auto=True, method_used=models.WhittakerSmoothMethods.LOGML, - log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + log_auto_bounds=(log(100.0), log(10_000.0)), ), ), ( # Number 11 (flipped search space integers; logml method) @@ -180,7 +180,7 @@ auto_bounds=(100.0, 10_000.0), fit_auto=True, method_used=models.WhittakerSmoothMethods.LOGML, - log_auto_bounds=(np.log(100.0), np.log(10_000.0)), + log_auto_bounds=(log(100.0), log(10_000.0)), ), ), ( # Number 12 (fixed zero float; fixed method) From 70e932257b515eeef5ba33e367968e49ff427c14 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 19:44:27 +0200 Subject: [PATCH 061/118] tests/refactor: renamed `Numeric` to `RealNumeric` --- tests/test_for_utils/test_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 20766590..53487f88 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -15,8 +15,8 @@ ### Type aliases ### -_Numeric = Union[float, int] -_LambdaValueNumeric = Union[_Numeric, Tuple[_Numeric, _Numeric]] +_RealNumeric = Union[float, int] +_LambdaValueNumeric = Union[_RealNumeric, Tuple[_RealNumeric, _RealNumeric]] _LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] _WhittakerMethod = Union[str, models.WhittakerSmoothMethods] _WhittakerMethodSequence = List[_WhittakerMethod] From 3fbbd7abdf6da578520d0f79601905311978b874 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 19:51:29 +0200 Subject: [PATCH 062/118] tests/refactor: slightly reduced type aliases --- tests/test_for_utils/test_models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 53487f88..1c526e9c 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -20,11 +20,10 @@ _LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] _WhittakerMethod = Union[str, models.WhittakerSmoothMethods] _WhittakerMethodSequence = List[_WhittakerMethod] -_ExpectedLambdaResult = Union[ExpectedWhittakerSmoothLambda, Type[Exception]] _LambdaTestCombination = Tuple[ _LambdaValueNumericOrFlawed, _WhittakerMethodSequence, - _ExpectedLambdaResult, + Union[ExpectedWhittakerSmoothLambda, Type[Exception]], ] ### Constants ### From cb97bf425414a783cecacab604ebe6a17d0db070 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 19:53:23 +0200 Subject: [PATCH 063/118] tests/fix: fixed wrong description in docstring --- tests/test_for_utils/test_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 1c526e9c..16bbc452 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -270,7 +270,7 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N - the lambda value(s) to be used, - the method(s) to be used, and - - the expected result(s) of the instantiation (will be an exception if the input + - the expected result of the instantiation (will be an exception if the input should be considered invalid by the dataclass). """ From e57ab4a1bd1c51897c726ed8af6b08f845fbac6e Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 19:54:50 +0200 Subject: [PATCH 064/118] refactor: made weight iterator less error prone and better testable --- chemotools/utils/whittaker_base/misc.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/chemotools/utils/whittaker_base/misc.py b/chemotools/utils/whittaker_base/misc.py index 107bc636..a4ecb587 100644 --- a/chemotools/utils/whittaker_base/misc.py +++ b/chemotools/utils/whittaker_base/misc.py @@ -6,7 +6,7 @@ ### Imports ### -from typing import Generator, Optional, Union +from typing import Any, Generator, Union import numpy as np @@ -14,7 +14,7 @@ def get_weight_generator( - w: Optional[np.ndarray], + w: Any, n_series: int, ) -> Generator[Union[float, np.ndarray], None, None]: """ @@ -23,17 +23,31 @@ def get_weight_generator( """ + # if the weights are neither None, nor a 1D- or a 2D-Array, an error is raised + if not (w is None or isinstance(w, np.ndarray)): + raise TypeError( + f"The weights must either be None, a NumPy-1D-, or a NumPy-2D-Array, but " + f"they are of type '{type(w)}'." + ) + # Case 1: No weights if w is None: for _ in range(n_series): yield 1.0 - # Case 2: 1D weights + # Case 2: 1D or 2D weights elif w.ndim == 1: for _ in range(n_series): yield w # Case 3: 2D weights elif w.ndim == 2: - for w_vect in w: - yield w_vect + for idx in range(0, n_series): + yield w[idx] + + # Case 4: Invalid weights + elif w.ndim > 2: + raise ValueError( + f"The weights must be either a 1D- or a 2D-array, but they are " + f"{w.ndim}-dimensional with shape {w.shape}." + ) From fcb957b32c35946b40ba5b8ef54c1cd91cfb4b93 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 22:22:12 +0200 Subject: [PATCH 065/118] tests/feat/refactor: added fixture for automated lambda estimation of whittaker smoother; formatted fixtures; changed type hints --- tests/fixtures.py | 91 +++- .../spectrum_whittaker_auto_lambda.csv | 502 ++++++++++++++++++ 2 files changed, 565 insertions(+), 28 deletions(-) create mode 100644 tests/resources/spectrum_whittaker_auto_lambda.csv diff --git a/tests/fixtures.py b/tests/fixtures.py index f1ebcba0..98225ac9 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,4 +1,5 @@ import os +from typing import List import numpy as np import pytest @@ -9,88 +10,119 @@ @pytest.fixture -def spectrum() -> list[np.ndarray]: +def spectrum() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "spectrum.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "spectrum.csv"), + delimiter=",", + ) ] @pytest.fixture -def spectrum_arpls() -> list[np.ndarray]: +def spectrum_arpls() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "spectrum_arpls.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "spectrum_arpls.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_airpls() -> list[np.ndarray]: +def reference_airpls() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_airpls.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_airpls.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_arpls() -> list[np.ndarray]: +def reference_arpls() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_arpls.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_arpls.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_msc_mean() -> list[np.ndarray]: +def reference_msc_mean() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_msc_mean.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_msc_mean.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_msc_median() -> list[np.ndarray]: +def reference_msc_median() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_msc_median.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_msc_median.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_sg_15_2() -> list[np.ndarray]: +def reference_sg_15_2() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_sg_15_2.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_sg_15_2.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_snv() -> list[np.ndarray]: +def reference_snv() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_snv.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_snv.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_whittaker() -> list[np.ndarray]: +def reference_whittaker() -> List[np.ndarray]: return [ np.loadtxt( - os.path.join(path_to_resources, "reference_whittaker.csv"), delimiter="," - ).tolist() + os.path.join(path_to_resources, "reference_whittaker.csv"), + delimiter=",", + ) ] @pytest.fixture -def reference_finite_differences() -> list[tuple[int, int, np.ndarray]]: +def spectrum_whittaker_auto_lambda() -> np.ndarray: + spectral_data = np.loadtxt( + os.path.join(path_to_resources, "spectrum_whittaker_auto_lambda.csv"), + delimiter=",", + skiprows=1, + ) + + return spectral_data[::, 1] + + +@pytest.fixture +def noise_level_whittaker_auto_lambda() -> np.ndarray: + spectral_data = np.loadtxt( + os.path.join(path_to_resources, "spectrum_whittaker_auto_lambda.csv"), + delimiter=",", + skiprows=1, + ) + + return spectral_data[::, 2] + + +@pytest.fixture +def reference_finite_differences() -> List[tuple[int, int, np.ndarray]]: fin_diff_table = np.genfromtxt( os.path.join(path_to_resources, "reference_finite_differences.csv"), skip_header=2, @@ -114,3 +146,6 @@ def reference_finite_differences() -> list[tuple[int, int, np.ndarray]]: ) return fin_diff_ordered_coeffs + + +spectrum_whittaker_auto_lambda() diff --git a/tests/resources/spectrum_whittaker_auto_lambda.csv b/tests/resources/spectrum_whittaker_auto_lambda.csv new file mode 100644 index 00000000..a65def00 --- /dev/null +++ b/tests/resources/spectrum_whittaker_auto_lambda.csv @@ -0,0 +1,502 @@ +# x,y,noise_std +-5.000000000000000000e+00,3.913954426305540135e+00,3.096822336193649078e-02 +-4.980000000000000426e+00,3.913295001715116683e+00,4.157579901324756633e-02 +-4.959999999999999964e+00,3.967828933711835759e+00,2.021882259976331628e-02 +-4.940000000000000391e+00,3.913240339328647899e+00,3.283002638476407303e-02 +-4.919999999999999929e+00,3.906660793153114319e+00,2.629869250880701587e-02 +-4.900000000000000355e+00,3.977753591201872219e+00,2.303151065858748819e-02 +-4.879999999999999893e+00,3.918578498160504520e+00,4.030304002301314525e-02 +-4.860000000000000320e+00,3.909640759183042213e+00,2.308082479438558487e-02 +-4.839999999999999858e+00,3.928403360537521927e+00,2.120712443290193039e-02 +-4.820000000000000284e+00,3.950831318015709748e+00,2.136998567160089402e-02 +-4.799999999999999822e+00,3.931835980130326647e+00,3.665332100171989765e-02 +-4.780000000000000249e+00,3.913960091908467831e+00,3.219054411264235527e-02 +-4.759999999999999787e+00,3.944102620606957821e+00,4.689772360937648904e-02 +-4.740000000000000213e+00,3.927372355534060500e+00,3.710244685607267529e-02 +-4.719999999999999751e+00,3.935926500645895665e+00,4.757583831925741441e-02 +-4.700000000000000178e+00,3.940090812557987210e+00,2.609806421525888431e-02 +-4.679999999999999716e+00,4.006116813960685086e+00,4.372331764659243697e-02 +-4.660000000000000142e+00,3.981343849480365460e+00,2.459103331165375314e-02 +-4.639999999999999680e+00,3.957171589280387813e+00,3.317473331736146142e-02 +-4.620000000000000107e+00,3.918252069140263139e+00,2.429907714232983768e-02 +-4.599999999999999645e+00,3.989915028726924273e+00,4.662424792287282738e-02 +-4.580000000000000071e+00,3.984671553543385869e+00,3.256017889923769337e-02 +-4.559999999999999609e+00,4.036915022948532439e+00,2.863787788929512274e-02 +-4.540000000000000036e+00,4.018106961074466632e+00,4.080141249468031972e-02 +-4.519999999999999574e+00,3.935916303030416064e+00,4.359886618341140413e-02 +-4.500000000000000000e+00,3.933475657586883667e+00,3.361348387306223318e-02 +-4.480000000000000426e+00,3.943095738054793742e+00,2.516707436675810414e-02 +-4.459999999999999964e+00,4.000466770834566255e+00,3.088943627207259290e-02 +-4.439999999999999503e+00,3.964649483902519478e+00,2.963575571223017041e-02 +-4.419999999999999929e+00,3.984856670488913455e+00,2.251854582721338899e-02 +-4.400000000000000355e+00,3.971856089063352968e+00,2.495601165215273129e-02 +-4.379999999999999893e+00,4.030635978593705815e+00,3.734265590577109939e-02 +-4.360000000000000320e+00,3.981218782347658802e+00,3.320986912666832463e-02 +-4.339999999999999858e+00,3.968899147970850194e+00,3.269095491082592980e-02 +-4.320000000000000284e+00,3.982898269224011578e+00,2.946282537533189208e-02 +-4.299999999999999822e+00,3.939100003418123386e+00,2.865522681337081753e-02 +-4.280000000000000249e+00,3.945635266010562692e+00,3.472014240355719705e-02 +-4.259999999999999787e+00,4.000947172556999831e+00,3.030321708700593059e-02 +-4.240000000000000213e+00,4.020343398727571405e+00,3.157915029390712114e-02 +-4.219999999999999751e+00,4.063889834011682467e+00,4.463116837108123403e-02 +-4.200000000000000178e+00,4.019044487495743390e+00,2.820994850940350754e-02 +-4.179999999999999716e+00,3.999693957024348379e+00,2.842345038179301675e-02 +-4.160000000000000142e+00,4.014216556726715623e+00,2.068763011433182725e-02 +-4.139999999999999680e+00,3.997568023205597232e+00,3.579063885698285929e-02 +-4.120000000000000107e+00,3.969380819221903778e+00,3.710054561456280209e-02 +-4.099999999999999645e+00,3.973378322910303240e+00,2.674569508640989302e-02 +-4.080000000000000071e+00,4.018743284686774331e+00,4.125625535047831166e-02 +-4.059999999999999609e+00,4.002858211233585983e+00,3.722005134771839280e-02 +-4.040000000000000036e+00,4.021668953186641637e+00,2.647815044301944643e-02 +-4.019999999999999574e+00,3.963600307242701959e+00,2.404215547884083759e-02 +-4.000000000000000000e+00,4.001938886864440548e+00,2.316279516014600401e-02 +-3.979999999999999982e+00,3.961121319087129944e+00,3.165838742547153611e-02 +-3.959999999999999964e+00,3.988298094713540465e+00,2.927789155113174474e-02 +-3.939999999999999947e+00,4.015211499281781116e+00,1.965729088192920068e-02 +-3.919999999999999929e+00,3.996739007072117822e+00,2.217006837373004455e-02 +-3.899999999999999911e+00,4.006967280403993747e+00,3.548421177927273823e-02 +-3.879999999999999893e+00,3.992420505343744708e+00,2.287452531123754587e-02 +-3.859999999999999876e+00,3.989833605239252012e+00,1.982777448634700374e-02 +-3.839999999999999858e+00,3.977133776300772361e+00,2.387953952540879110e-02 +-3.820000000000000284e+00,3.968534106303187148e+00,4.404511893658814153e-02 +-3.799999999999999822e+00,4.046166805023654334e+00,3.313448396509128396e-02 +-3.780000000000000249e+00,3.981901684829826316e+00,2.802464965497018790e-02 +-3.759999999999999787e+00,3.968265699229948318e+00,3.089639612993963155e-02 +-3.740000000000000213e+00,3.993594708763612111e+00,2.708670552718019792e-02 +-3.719999999999999751e+00,3.978060837482736645e+00,3.452291125902230001e-02 +-3.700000000000000178e+00,3.999624538760653270e+00,3.961449084431505063e-02 +-3.679999999999999716e+00,3.959951862232888420e+00,2.127983196111482220e-02 +-3.660000000000000142e+00,3.992008806185594505e+00,3.730351277723908954e-02 +-3.639999999999999680e+00,3.983111142787472581e+00,2.560441937780641442e-02 +-3.620000000000000107e+00,3.997789721210798675e+00,4.189555115844698086e-02 +-3.599999999999999645e+00,3.978584700131533758e+00,3.025928312227217989e-02 +-3.580000000000000071e+00,3.999259303534389520e+00,2.857730727887747985e-02 +-3.560000000000000053e+00,3.990540510474509084e+00,2.321307742211498049e-02 +-3.540000000000000036e+00,4.006843582528653691e+00,3.607893641315891575e-02 +-3.520000000000000018e+00,3.989502366165125213e+00,5.280686333954774658e-02 +-3.500000000000000000e+00,3.971463221592128612e+00,3.195062668002687095e-02 +-3.479999999999999982e+00,3.984236422056169680e+00,3.653447718907179564e-02 +-3.459999999999999964e+00,3.965390902218237645e+00,2.523632145940367388e-02 +-3.439999999999999947e+00,3.988691848103080417e+00,3.194360253100081426e-02 +-3.419999999999999929e+00,3.915564628228665800e+00,3.335365548095119148e-02 +-3.399999999999999911e+00,3.933238574213607119e+00,1.955557934578588258e-02 +-3.379999999999999893e+00,3.984902392991356290e+00,3.531213606899333973e-02 +-3.359999999999999876e+00,3.966998602981461008e+00,3.858838304594502944e-02 +-3.339999999999999858e+00,3.980905662625323860e+00,2.116723883099221049e-02 +-3.320000000000000284e+00,3.914229362381833166e+00,3.160429335949033886e-02 +-3.299999999999999822e+00,3.994917048087540667e+00,3.132077094697865299e-02 +-3.280000000000000249e+00,3.943874923203797955e+00,2.512446779038992120e-02 +-3.259999999999999787e+00,3.950489243789083726e+00,2.346503295241367942e-02 +-3.240000000000000213e+00,3.927196810974445729e+00,3.915093278749896127e-02 +-3.219999999999999751e+00,3.947917455856836622e+00,2.476543934279373110e-02 +-3.200000000000000178e+00,3.969343297431183970e+00,3.412387431505126134e-02 +-3.179999999999999716e+00,3.925906099200734189e+00,2.901476964540048928e-02 +-3.160000000000000142e+00,3.923668551812799166e+00,2.540509909929494770e-02 +-3.139999999999999680e+00,3.947226670336628995e+00,2.993884543265358869e-02 +-3.120000000000000107e+00,3.928346744753318465e+00,3.630472353955733339e-02 +-3.099999999999999645e+00,3.900075964511259929e+00,4.332776229755198882e-02 +-3.080000000000000071e+00,3.935772299731806978e+00,2.449157550065346814e-02 +-3.060000000000000053e+00,3.915896784887003879e+00,1.992532293741383978e-02 +-3.040000000000000036e+00,3.904143184923001275e+00,2.719088101344899547e-02 +-3.020000000000000018e+00,3.897907823509206615e+00,3.955967022312247006e-02 +-3.000000000000000000e+00,3.866438996497719049e+00,3.584595008712544023e-02 +-2.979999999999999982e+00,3.903357248827355352e+00,2.678761408580850739e-02 +-2.959999999999999964e+00,3.867880849871169868e+00,4.056478219242153765e-02 +-2.939999999999999947e+00,3.882397409790404375e+00,3.205584351100022877e-02 +-2.919999999999999929e+00,3.936961090451624568e+00,3.332833371997186156e-02 +-2.899999999999999911e+00,3.866043917754864445e+00,2.340248663650760716e-02 +-2.879999999999999893e+00,3.874818538198867124e+00,3.363872375973592233e-02 +-2.859999999999999876e+00,3.874819534869030413e+00,3.412069939766745885e-02 +-2.839999999999999858e+00,3.852214585211989117e+00,3.848297388549758169e-02 +-2.819999999999999840e+00,3.853548185955963934e+00,2.811664579389831775e-02 +-2.799999999999999822e+00,3.854754553304260423e+00,2.534333019107942730e-02 +-2.779999999999999805e+00,3.841071553729384025e+00,2.160748256322369762e-02 +-2.759999999999999787e+00,3.906409499587252476e+00,3.989927560475871327e-02 +-2.739999999999999769e+00,3.892863359620862163e+00,4.394563528675898834e-02 +-2.719999999999999751e+00,3.865947691256906626e+00,4.331492627779613691e-02 +-2.699999999999999734e+00,3.824190739531758254e+00,2.394046463981249276e-02 +-2.680000000000000160e+00,3.910131192202466188e+00,2.358546713714732707e-02 +-2.660000000000000142e+00,3.842141175582946655e+00,3.143049748680051825e-02 +-2.640000000000000124e+00,3.823047218261981239e+00,3.201948188508568077e-02 +-2.620000000000000107e+00,3.872275415949148147e+00,2.823873364965024529e-02 +-2.600000000000000089e+00,3.805037068723069016e+00,2.923310143617065260e-02 +-2.580000000000000071e+00,3.847414491372637801e+00,4.797550196894023039e-02 +-2.560000000000000053e+00,3.843464708155962750e+00,3.266237082284365739e-02 +-2.540000000000000036e+00,3.798922067708248473e+00,3.133758414632323297e-02 +-2.520000000000000018e+00,3.827628701786526033e+00,2.317422859332170931e-02 +-2.500000000000000000e+00,3.809526646402432881e+00,3.295109809401969292e-02 +-2.479999999999999982e+00,3.747985523101889260e+00,4.050258952135226548e-02 +-2.459999999999999964e+00,3.806809286093004729e+00,1.923621539387884369e-02 +-2.439999999999999947e+00,3.781100587500339838e+00,2.316177455479511565e-02 +-2.419999999999999929e+00,3.749411789432546716e+00,3.866553106380841293e-02 +-2.399999999999999911e+00,3.792726529826150994e+00,5.083314405848265299e-02 +-2.379999999999999893e+00,3.749106715790383237e+00,1.836938560312795232e-02 +-2.359999999999999876e+00,3.757735405285781649e+00,2.169879163528952090e-02 +-2.339999999999999858e+00,3.752632605069845528e+00,3.800387835900527478e-02 +-2.319999999999999840e+00,3.746880786136597141e+00,3.890813293409937923e-02 +-2.299999999999999822e+00,3.750215780850299829e+00,5.729187485296455273e-02 +-2.279999999999999805e+00,3.711741146279374792e+00,3.449917492064095981e-02 +-2.259999999999999787e+00,3.630930613745085189e+00,3.742083549552085997e-02 +-2.239999999999999769e+00,3.713166467526545222e+00,3.498465794338397744e-02 +-2.219999999999999751e+00,3.681110389341025968e+00,3.815477170164736920e-02 +-2.199999999999999734e+00,3.702545598010270389e+00,3.134983031602142645e-02 +-2.180000000000000160e+00,3.715653624123065768e+00,3.811872814876098720e-02 +-2.160000000000000142e+00,3.604065445077755392e+00,4.082881623022253242e-02 +-2.140000000000000124e+00,3.679127871616473033e+00,2.843288863848685011e-02 +-2.120000000000000107e+00,3.723207343503561972e+00,3.295457755380135079e-02 +-2.100000000000000089e+00,3.593677436562992877e+00,6.640207053241493906e-02 +-2.080000000000000071e+00,3.730259422314661233e+00,9.533727857204726819e-02 +-2.060000000000000053e+00,3.652676325481325481e+00,4.814528861415776767e-02 +-2.040000000000000036e+00,3.664039414557082708e+00,4.667967662908709126e-02 +-2.020000000000000018e+00,3.605572286811380867e+00,7.161423172160229222e-02 +-2.000000000000000000e+00,3.539623085803978331e+00,6.708869557030820718e-02 +-1.979999999999999982e+00,3.619072317723509347e+00,5.743569593331862566e-02 +-1.959999999999999964e+00,3.711185604571868524e+00,8.364311972260785044e-02 +-1.939999999999999947e+00,3.719738846211896988e+00,7.184953346731948187e-02 +-1.919999999999999929e+00,3.640954715256664809e+00,6.558577658908354424e-02 +-1.899999999999999911e+00,3.606185742205383793e+00,1.060721402286213394e-01 +-1.879999999999999893e+00,3.649108418714726465e+00,9.092475993307447268e-02 +-1.859999999999999876e+00,3.709944524969695401e+00,7.155454639976135955e-02 +-1.839999999999999858e+00,3.419438852172148824e+00,1.228594590938914127e-01 +-1.819999999999999840e+00,3.593174533405012916e+00,1.550942001530101755e-01 +-1.799999999999999822e+00,3.520638523828726107e+00,1.219140064569037563e-01 +-1.779999999999999805e+00,3.608267192342823115e+00,1.205649991710633223e-01 +-1.759999999999999787e+00,3.453201243804023601e+00,1.376335561212327918e-01 +-1.739999999999999769e+00,3.607774685478800425e+00,1.665844544798526783e-01 +-1.719999999999999751e+00,3.640026211733285333e+00,1.332907382461752388e-01 +-1.699999999999999734e+00,3.642227320257549028e+00,1.268407969723837903e-01 +-1.679999999999999716e+00,3.803890280567367999e+00,1.576114673042917080e-01 +-1.660000000000000142e+00,3.735897857753192053e+00,2.214995227455958393e-01 +-1.640000000000000124e+00,3.663913595669907153e+00,1.424227087882632414e-01 +-1.620000000000000107e+00,3.641401992387191200e+00,1.551600530436650083e-01 +-1.600000000000000089e+00,3.217048317236007815e+00,2.630495244546968703e-01 +-1.580000000000000071e+00,3.892532742459869688e+00,2.368715709284081039e-01 +-1.560000000000000053e+00,3.360368769070483985e+00,1.982329599456646163e-01 +-1.540000000000000036e+00,3.752961584101674131e+00,2.460402241728126826e-01 +-1.520000000000000018e+00,3.745930947639183550e+00,3.309654834926570310e-01 +-1.500000000000000000e+00,3.798556507140971927e+00,2.244332830917858124e-01 +-1.479999999999999982e+00,3.810433536595719151e+00,2.921392931771470414e-01 +-1.459999999999999964e+00,3.916929666201640181e+00,2.653814591464132566e-01 +-1.439999999999999947e+00,4.031456282874307462e+00,3.642239947464401273e-01 +-1.419999999999999929e+00,4.756015559092563905e+00,5.166291421943192130e-01 +-1.399999999999999911e+00,3.594508053563978400e+00,3.764090765633805180e-01 +-1.379999999999999893e+00,3.995552010376214991e+00,4.221795343405287393e-01 +-1.359999999999999876e+00,3.802382220218354369e+00,4.175557607001874061e-01 +-1.339999999999999858e+00,4.654621606587832972e+00,3.786510240332576771e-01 +-1.319999999999999840e+00,4.498210774344039820e+00,3.245373535245575369e-01 +-1.299999999999999822e+00,4.167062990719260895e+00,5.052235057933155415e-01 +-1.279999999999999805e+00,4.216951262431840597e+00,6.532837690567284694e-01 +-1.259999999999999787e+00,4.559110373914525560e+00,3.273162563351011967e-01 +-1.239999999999999769e+00,4.762139581965920243e+00,5.769640282217669558e-01 +-1.219999999999999751e+00,3.280248253976095540e+00,5.275499350589685221e-01 +-1.199999999999999734e+00,4.276145716520765028e+00,7.549125729618073599e-01 +-1.179999999999999716e+00,4.600817916539200247e+00,6.092834368481748752e-01 +-1.160000000000000142e+00,4.534854213018084224e+00,6.756216430995778399e-01 +-1.140000000000000124e+00,5.024019049064923692e+00,5.385345533169113574e-01 +-1.120000000000000107e+00,5.239870345111208749e+00,5.402099371398212391e-01 +-1.100000000000000089e+00,5.485974253161926661e+00,4.018729221391404982e-01 +-1.080000000000000071e+00,5.318244642725155735e+00,6.373361947883031675e-01 +-1.060000000000000053e+00,6.760430236052386910e+00,6.124735590859445855e-01 +-1.040000000000000036e+00,5.752138952060318999e+00,4.249735541418221407e-01 +-1.020000000000000018e+00,4.159619060982223004e+00,8.032549364880220422e-01 +-1.000000000000000000e+00,6.137115336978383517e+00,6.702421100272603072e-01 +-9.799999999999995381e-01,5.872802388731122925e+00,5.096481823863698857e-01 +-9.599999999999999645e-01,6.281236755604411748e+00,6.472217891646340204e-01 +-9.399999999999995026e-01,5.565492156114431488e+00,6.959215464098150727e-01 +-9.199999999999999289e-01,6.546745314566285501e+00,9.349172151308963175e-01 +-9.000000000000003553e-01,4.235996624335836458e+00,5.674418073457194733e-01 +-8.799999999999998934e-01,5.895946189351668032e+00,1.151543603478050626e+00 +-8.600000000000003197e-01,8.090416456392947708e+00,9.886224714269703506e-01 +-8.399999999999998579e-01,7.099654579905307017e+00,9.411573406527600216e-01 +-8.200000000000002842e-01,7.783447599187353738e+00,1.563679351082028157e+00 +-7.999999999999998224e-01,5.860696892165741190e+00,1.070756444618480741e+00 +-7.800000000000002487e-01,6.197255583792041556e+00,7.878498533061101261e-01 +-7.599999999999997868e-01,5.755018464355096341e+00,8.295877048549399335e-01 +-7.400000000000002132e-01,7.207746759775751855e+00,4.903793932628179864e-01 +-7.199999999999997513e-01,6.849506693526540069e+00,8.751365338238001890e-01 +-7.000000000000001776e-01,7.063283370964628105e+00,8.139025491327271933e-01 +-6.799999999999997158e-01,3.945646506068891224e+00,1.013587577515847382e+00 +-6.600000000000001421e-01,6.267470566415362043e+00,7.742101094832944952e-01 +-6.399999999999996803e-01,6.558919601455357906e+00,7.859065498006260198e-01 +-6.200000000000001066e-01,7.077991229023441022e+00,6.918590236314428044e-01 +-5.999999999999996447e-01,6.213534208408828086e+00,9.083962692379881076e-01 +-5.800000000000000711e-01,5.040827980153054000e+00,9.659222627816541618e-01 +-5.599999999999996092e-01,6.088292857633700095e+00,9.970182802078784157e-01 +-5.400000000000000355e-01,4.933795191345740960e+00,6.901311785896823148e-01 +-5.199999999999995737e-01,4.549334630528049317e+00,5.649737176498387248e-01 +-5.000000000000000000e-01,5.385998717145486836e+00,8.874764677140921654e-01 +-4.799999999999995381e-01,5.833853142262386271e+00,8.402770111488416793e-01 +-4.599999999999999645e-01,5.659937411166305665e+00,8.734890865389354220e-01 +-4.399999999999995026e-01,3.821608855697117058e+00,6.361529153821200433e-01 +-4.199999999999999289e-01,4.956658156492856016e+00,7.052981973370683333e-01 +-3.999999999999994671e-01,4.579878580113756037e+00,5.913716519320653120e-01 +-3.799999999999998934e-01,5.313046851124894943e+00,6.604572801362433854e-01 +-3.600000000000003197e-01,3.276085072396378806e+00,5.970258352233933641e-01 +-3.399999999999998579e-01,4.945262532692082047e+00,6.546549831258263730e-01 +-3.200000000000002842e-01,5.074998639686343793e+00,5.939270740467554877e-01 +-2.999999999999998224e-01,5.647855412945030906e+00,5.294028271246801198e-01 +-2.800000000000002487e-01,4.105665388254435832e+00,6.313947350577137074e-01 +-2.599999999999997868e-01,4.450308562157782255e+00,5.585069151830991396e-01 +-2.400000000000002132e-01,4.071521720834863522e+00,4.594469840608414457e-01 +-2.199999999999997513e-01,2.963422081644037753e+00,7.669847097205949593e-01 +-2.000000000000001776e-01,3.868455002797947184e+00,4.251092352493783255e-01 +-1.799999999999997158e-01,3.536676735169854702e+00,3.809115333435043049e-01 +-1.600000000000001421e-01,3.211077182715823319e+00,3.539182369697171926e-01 +-1.399999999999996803e-01,3.157142724539423462e+00,4.179098049588486075e-01 +-1.200000000000001066e-01,3.253957454666461580e+00,2.287014051271014647e-01 +-9.999999999999964473e-02,3.548219807515344382e+00,3.531347515347905142e-01 +-8.000000000000007105e-02,3.507299359211579315e+00,2.648647778152579124e-01 +-5.999999999999960920e-02,2.682990980990552998e+00,4.679258280625686051e-01 +-4.000000000000003553e-02,2.988681850963338160e+00,4.187317005153019522e-01 +-1.999999999999957367e-02,3.143729582095237340e+00,2.296789574801562372e-01 +0.000000000000000000e+00,2.421701960787510988e+00,3.810571805066982343e-01 +2.000000000000046185e-02,2.880319783599672778e+00,2.221249274397878171e-01 +4.000000000000003553e-02,3.722343570451513095e+00,2.199211530517474755e-01 +6.000000000000049738e-02,2.857603579947825878e+00,2.448851310521519553e-01 +8.000000000000007105e-02,3.095978508429543030e+00,3.840540878242945655e-01 +1.000000000000005329e-01,2.914637263903819164e+00,1.904934648461840685e-01 +1.200000000000001066e-01,2.960170259963005979e+00,1.938639502937708481e-01 +1.399999999999996803e-01,2.949001397061277618e+00,2.123753112352188555e-01 +1.600000000000001421e-01,3.049640755278711701e+00,1.228069598001861884e-01 +1.799999999999997158e-01,2.728754984418608753e+00,2.559880576432678745e-01 +2.000000000000001776e-01,2.727849241201167452e+00,2.231426434949308757e-01 +2.199999999999997513e-01,2.992638787130469158e+00,2.191637740227992959e-01 +2.400000000000002132e-01,2.251918265667214758e+00,2.480313535065964692e-01 +2.599999999999997868e-01,2.889134209031075429e+00,2.459585819361506664e-01 +2.800000000000002487e-01,2.424532517989040237e+00,9.053335818983149208e-02 +2.999999999999998224e-01,2.165867195473284212e+00,1.703346508348992372e-01 +3.200000000000002842e-01,2.834687805641520786e+00,2.862061388394843697e-01 +3.399999999999998579e-01,2.506058393497093029e+00,2.187887655218112293e-01 +3.600000000000003197e-01,2.878502009412273299e+00,3.009170618786285445e-01 +3.799999999999998934e-01,2.538209946113609838e+00,2.513645475901037374e-01 +4.000000000000003553e-01,2.323923192893809109e+00,3.558336096401703008e-01 +4.199999999999999289e-01,2.335711674975006424e+00,2.440863314527701444e-01 +4.400000000000003908e-01,3.188210899075321514e+00,2.757914055844488677e-01 +4.599999999999999645e-01,2.392353676431516529e+00,2.469282079526576301e-01 +4.800000000000004263e-01,2.758761172409790241e+00,3.680833929312681274e-01 +5.000000000000000000e-01,3.169012719153432300e+00,3.436627873706666336e-01 +5.200000000000004619e-01,2.033325531486355864e+00,2.563589488507343472e-01 +5.400000000000000355e-01,2.606195281701388033e+00,2.688410546078896513e-01 +5.600000000000004974e-01,2.581526900163175764e+00,2.837953043520541341e-01 +5.800000000000000711e-01,2.467688230198862964e+00,3.711213884661984097e-01 +6.000000000000005329e-01,2.831073027789893892e+00,3.175362955311629154e-01 +6.200000000000001066e-01,3.161240012035097191e+00,1.997911486567817974e-01 +6.399999999999996803e-01,2.956347334622390743e+00,4.285265589597905822e-01 +6.600000000000001421e-01,2.527608156534642880e+00,2.560510676401487329e-01 +6.799999999999997158e-01,2.971776586979084644e+00,5.310266580236164824e-01 +7.000000000000001776e-01,2.877290794515752204e+00,5.240177912611390099e-01 +7.199999999999997513e-01,3.389214360428408845e+00,4.743984853334949348e-01 +7.400000000000002132e-01,2.522099656711926485e+00,4.929410237222000557e-01 +7.599999999999997868e-01,2.155763592687337749e+00,3.841695803658565778e-01 +7.800000000000002487e-01,2.799985290900282031e+00,5.452928167360916456e-01 +7.999999999999998224e-01,3.181999059836660315e+00,3.921740873565852659e-01 +8.200000000000002842e-01,3.057996485246090135e+00,3.338910048932451358e-01 +8.399999999999998579e-01,2.115576753796697851e+00,3.280525299425075536e-01 +8.600000000000003197e-01,2.721097204259094404e+00,5.254098256952898227e-01 +8.799999999999998934e-01,3.061150513259692563e+00,4.445749637844202407e-01 +9.000000000000003553e-01,2.669388958342580498e+00,4.072730575664818553e-01 +9.199999999999999289e-01,3.627533156198332609e+00,6.026970045505178897e-01 +9.400000000000003908e-01,2.765688681358758227e+00,5.117093613705018251e-01 +9.599999999999999645e-01,3.192183883069087624e+00,3.587640992401981976e-01 +9.800000000000004263e-01,3.968383003330220760e+00,3.270829475816557363e-01 +1.000000000000000000e+00,2.338013832820840854e+00,5.943069716041308181e-01 +1.020000000000000462e+00,3.313408994267729835e+00,6.526803408414116880e-01 +1.040000000000000036e+00,3.040682318981518861e+00,5.836565795219444963e-01 +1.060000000000000497e+00,2.779849182033996335e+00,4.256561800169474385e-01 +1.080000000000000071e+00,2.904547044921763188e+00,3.041189794730353446e-01 +1.100000000000000533e+00,2.986790168515752075e+00,3.526160436787704744e-01 +1.120000000000000107e+00,3.510619812891737190e+00,4.589691190502168672e-01 +1.140000000000000568e+00,3.610893289988389654e+00,5.683411129188775668e-01 +1.160000000000000142e+00,2.241228078826833059e+00,3.469867901057105963e-01 +1.179999999999999716e+00,3.814941693212685614e+00,3.599045368114944798e-01 +1.200000000000000178e+00,3.265716891407834943e+00,2.892595048204359420e-01 +1.219999999999999751e+00,3.081223862719026485e+00,3.447948472735757330e-01 +1.240000000000000213e+00,2.922621628709752617e+00,3.363244971630394708e-01 +1.259999999999999787e+00,2.600552915231134854e+00,5.720317054932907430e-01 +1.280000000000000249e+00,2.295526789533440581e+00,4.703156894625505902e-01 +1.299999999999999822e+00,2.134518269816672920e+00,2.073651949699803210e-01 +1.320000000000000284e+00,2.505688380794310000e+00,3.565010685590284667e-01 +1.339999999999999858e+00,2.657778015600396060e+00,2.054430250094419763e-01 +1.360000000000000320e+00,2.880342521059242600e+00,3.269488151166322631e-01 +1.379999999999999893e+00,2.744123595985678232e+00,3.190639167541055032e-01 +1.400000000000000355e+00,2.834484243824526128e+00,3.600015743496797760e-01 +1.419999999999999929e+00,3.111700384330541080e+00,3.055750774141779980e-01 +1.440000000000000391e+00,1.984596428286149017e+00,2.535374835126482096e-01 +1.459999999999999964e+00,2.184618266171591827e+00,4.125742803545879300e-01 +1.480000000000000426e+00,2.541586847516879466e+00,3.075604621153351093e-01 +1.500000000000000000e+00,1.886697152297341695e+00,3.969470656999851688e-01 +1.520000000000000462e+00,2.042829442663604667e+00,3.025771102881774643e-01 +1.540000000000000036e+00,2.260314403072414713e+00,2.367490966951061038e-01 +1.560000000000000497e+00,1.967071821134906751e+00,3.089001509687088198e-01 +1.580000000000000071e+00,2.255154594264408896e+00,2.813968051504651124e-01 +1.600000000000000533e+00,1.664795947481280347e+00,2.310721598292219126e-01 +1.620000000000000107e+00,1.898006700231108290e+00,2.653301084336139870e-01 +1.640000000000000568e+00,2.167986247869837069e+00,1.582119967000197303e-01 +1.660000000000000142e+00,2.052209084359961633e+00,2.480990399266391477e-01 +1.679999999999999716e+00,2.030829428195580988e+00,1.746181670706817290e-01 +1.700000000000000178e+00,2.079538602632515332e+00,1.586178729041856916e-01 +1.719999999999999751e+00,1.447949949786532686e+00,1.487681488926025641e-01 +1.740000000000000213e+00,1.984142869518430441e+00,2.126409080700615295e-01 +1.759999999999999787e+00,1.594984196307964508e+00,1.531504013947989629e-01 +1.780000000000000249e+00,1.689744286673886542e+00,1.573895567145644492e-01 +1.799999999999999822e+00,1.505327862233904135e+00,1.433670028956879894e-01 +1.820000000000000284e+00,1.657357557672590298e+00,1.936888522083639597e-01 +1.839999999999999858e+00,1.627728676663654062e+00,1.973138182971920507e-01 +1.860000000000000320e+00,1.562003428819664252e+00,1.075696220393412700e-01 +1.879999999999999893e+00,1.495731507212113787e+00,1.246181847495901329e-01 +1.900000000000000355e+00,1.696456855549726805e+00,1.184320222108378451e-01 +1.919999999999999929e+00,1.753621707095358806e+00,1.654974896688882480e-01 +1.940000000000000391e+00,1.510696371828758267e+00,8.744842229968947944e-02 +1.959999999999999964e+00,1.593205776328715517e+00,9.488123223315308996e-02 +1.980000000000000426e+00,1.371252828445131744e+00,9.083416534289066868e-02 +2.000000000000000000e+00,1.631767388510174444e+00,5.605529023766811503e-02 +2.020000000000000462e+00,1.510870281561765172e+00,9.794045190385711197e-02 +2.040000000000000036e+00,1.509917665204379755e+00,5.847087504586767626e-02 +2.060000000000000497e+00,1.457511736987583184e+00,1.026291527197704528e-01 +2.080000000000000071e+00,1.337502930906462328e+00,5.273559726837979966e-02 +2.100000000000000533e+00,1.428158853424737362e+00,6.186049157060879988e-02 +2.120000000000000107e+00,1.334146609616660761e+00,6.233053017040882798e-02 +2.140000000000000568e+00,1.389119451915253656e+00,6.753635107838724949e-02 +2.160000000000000142e+00,1.411502044605835504e+00,4.818573805837670504e-02 +2.179999999999999716e+00,1.512147721837651781e+00,4.450992438755188574e-02 +2.200000000000000178e+00,1.342215912911993270e+00,5.676913138113579699e-02 +2.219999999999999751e+00,1.533350868768530884e+00,6.970841109848661954e-02 +2.240000000000000213e+00,1.398941725956053928e+00,6.789068920224745896e-02 +2.259999999999999787e+00,1.315483430698168199e+00,4.651874555391048022e-02 +2.280000000000000249e+00,1.373201771977372143e+00,4.002537228954656384e-02 +2.299999999999999822e+00,1.285003032494459685e+00,3.701739336027300153e-02 +2.320000000000000284e+00,1.337936194998987194e+00,4.090671416312927772e-02 +2.339999999999999858e+00,1.276400380907636301e+00,5.126562293055141051e-02 +2.360000000000000320e+00,1.300022331653995611e+00,5.490064925439376575e-02 +2.379999999999999893e+00,1.333665544228300748e+00,4.442743573097127779e-02 +2.400000000000000355e+00,1.293747202699239374e+00,3.450761667350280004e-02 +2.419999999999999929e+00,1.244199007704499316e+00,4.892606653276775075e-02 +2.440000000000000391e+00,1.324761412428399510e+00,3.594625252613110195e-02 +2.459999999999999964e+00,1.235109059399808418e+00,2.588565954245041975e-02 +2.480000000000000426e+00,1.302786487079408806e+00,4.302629597811168799e-02 +2.500000000000000000e+00,1.193293907935522258e+00,4.554179185929631279e-02 +2.520000000000000462e+00,1.166485983069556198e+00,2.969047947990152009e-02 +2.540000000000000036e+00,1.227974018485518126e+00,3.211761999137055512e-02 +2.560000000000000497e+00,1.304406572007846821e+00,2.881781570521451882e-02 +2.580000000000000071e+00,1.118044531986680745e+00,5.065123842298295542e-02 +2.600000000000000533e+00,1.176494597509851525e+00,3.111566005871567453e-02 +2.620000000000000107e+00,1.138071003896516276e+00,1.588287971280955349e-02 +2.640000000000000568e+00,1.182284596889840778e+00,3.734954384966652358e-02 +2.660000000000000142e+00,1.142711990112235343e+00,2.222259134544606399e-02 +2.679999999999999716e+00,1.172697850945799924e+00,3.267050585695246284e-02 +2.700000000000000178e+00,1.125106052110490307e+00,2.879231061755915463e-02 +2.719999999999999751e+00,1.063066446130191745e+00,3.453316633484269926e-02 +2.740000000000000213e+00,1.091014756190950008e+00,3.593049971171793922e-02 +2.759999999999999787e+00,1.117608734824651284e+00,3.620445660684060152e-02 +2.780000000000000249e+00,1.070946674106443952e+00,2.945674602064272604e-02 +2.799999999999999822e+00,1.158073375390544557e+00,4.317271897172083456e-02 +2.820000000000000284e+00,1.068532570468847309e+00,3.608477525373053607e-02 +2.839999999999999858e+00,1.073138054167278588e+00,3.920116041946775098e-02 +2.860000000000000320e+00,1.051324237483512336e+00,3.217497612874499480e-02 +2.879999999999999893e+00,1.048394425887490877e+00,2.863637825214125979e-02 +2.900000000000000355e+00,1.043197653893642496e+00,2.303674148816653097e-02 +2.919999999999999929e+00,9.927415994114875408e-01,2.750601863091974020e-02 +2.940000000000000391e+00,1.041076924330221587e+00,2.705651838895691344e-02 +2.959999999999999964e+00,9.525405867745834199e-01,1.703397553553930369e-02 +2.980000000000000426e+00,9.813458308074753944e-01,3.980445657729349968e-02 +3.000000000000000000e+00,9.788892868312137896e-01,3.501065160424287476e-02 +3.019999999999999574e+00,9.791733570405278808e-01,2.074376790171749904e-02 +3.040000000000000924e+00,9.484615006489022226e-01,3.100365139014824370e-02 +3.060000000000000497e+00,9.351859625054994574e-01,3.481369483420125588e-02 +3.080000000000000071e+00,9.057035218946286603e-01,2.772343988678164020e-02 +3.099999999999999645e+00,8.651708341540602643e-01,3.921165500460101205e-02 +3.120000000000000995e+00,9.303704135098522787e-01,2.771220111105161463e-02 +3.140000000000000568e+00,9.193051217201706482e-01,3.402373318223596288e-02 +3.160000000000000142e+00,9.352467420486022531e-01,3.588952176393950949e-02 +3.179999999999999716e+00,8.813652204922443056e-01,2.990696161174580953e-02 +3.199999999999999289e+00,9.390011623128490248e-01,1.901648106737680380e-02 +3.220000000000000639e+00,8.600902219662046599e-01,4.474787744605149969e-02 +3.240000000000000213e+00,8.523875153073585675e-01,3.323721427792761945e-02 +3.259999999999999787e+00,8.644647000854875918e-01,2.951322030495437398e-02 +3.279999999999999361e+00,8.690682320477158829e-01,2.307464008203872000e-02 +3.300000000000000711e+00,8.079756093381198490e-01,2.212612398486654697e-02 +3.320000000000000284e+00,8.294698933376599692e-01,3.349877650881752378e-02 +3.339999999999999858e+00,7.773164878496438002e-01,2.792524629966190025e-02 +3.359999999999999432e+00,7.987031000048978591e-01,2.519442004395512635e-02 +3.380000000000000782e+00,7.915291710156696636e-01,2.941592650294502787e-02 +3.400000000000000355e+00,7.742913156876776171e-01,3.018585662763769925e-02 +3.419999999999999929e+00,8.052117292761523659e-01,3.534820799363974642e-02 +3.439999999999999503e+00,6.975738328763312346e-01,2.992469270581009366e-02 +3.460000000000000853e+00,7.456754448582639805e-01,5.177506811051349467e-02 +3.480000000000000426e+00,7.971989053927358571e-01,3.183223099024329317e-02 +3.500000000000000000e+00,7.500214809794597537e-01,2.973325203337600503e-02 +3.519999999999999574e+00,7.405704628922389343e-01,2.644933588288904402e-02 +3.540000000000000924e+00,7.010520360337462176e-01,2.751384680887634590e-02 +3.560000000000000497e+00,7.748798508620088343e-01,2.238844522514950308e-02 +3.580000000000000071e+00,7.037393802241858065e-01,3.022398023282772683e-02 +3.599999999999999645e+00,6.853447274267203682e-01,3.659059286972034947e-02 +3.620000000000000995e+00,6.404736362845638853e-01,4.159673361489631821e-02 +3.640000000000000568e+00,5.796863065161470541e-01,2.310965550118417033e-02 +3.660000000000000142e+00,6.706090484334317203e-01,2.150927154134406327e-02 +3.679999999999999716e+00,6.590807197035827292e-01,2.415547493455632785e-02 +3.700000000000001066e+00,6.579262974794020113e-01,3.664203532810380443e-02 +3.720000000000000639e+00,6.362810821527531413e-01,4.363591989867501852e-02 +3.740000000000000213e+00,6.329963281254368246e-01,3.069766501312837931e-02 +3.759999999999999787e+00,5.674844485932917237e-01,2.929212140056279556e-02 +3.779999999999999361e+00,6.020347859780290634e-01,2.453295123549698378e-02 +3.800000000000000711e+00,5.979345896309717912e-01,1.885462643881315875e-02 +3.820000000000000284e+00,5.921153801598458832e-01,2.472956858646184128e-02 +3.839999999999999858e+00,5.168250498864187525e-01,3.932557515162073830e-02 +3.859999999999999432e+00,5.804153801121019196e-01,2.396882167584396886e-02 +3.880000000000000782e+00,5.121237847876588534e-01,2.891589906210544070e-02 +3.900000000000000355e+00,5.784753349303056735e-01,3.652093289156662509e-02 +3.919999999999999929e+00,5.514372689831463781e-01,2.860219042728869357e-02 +3.939999999999999503e+00,4.913097559383847734e-01,2.105444236715212428e-02 +3.960000000000000853e+00,5.158491350566534184e-01,4.172547955572703665e-02 +3.980000000000000426e+00,5.006790115005972375e-01,3.660623110329951774e-02 +4.000000000000000000e+00,5.296558230138243006e-01,2.904212449707989363e-02 +4.019999999999999574e+00,5.527501560920484724e-01,3.635397445862977722e-02 +4.040000000000000924e+00,5.121606975311043808e-01,2.941529899783447422e-02 +4.060000000000000497e+00,4.620786835801224401e-01,3.957298503403856166e-02 +4.080000000000000071e+00,5.631392727992968750e-01,3.838219534881322920e-02 +4.099999999999999645e+00,5.001731340426340822e-01,2.855744716291427898e-02 +4.120000000000000995e+00,4.759932526974401945e-01,1.858866659018939063e-02 +4.140000000000000568e+00,4.408134938285349635e-01,2.613790641807950052e-02 +4.160000000000000142e+00,4.661151650812371816e-01,2.028844154342078487e-02 +4.179999999999999716e+00,4.170876717713623005e-01,3.040067984335515991e-02 +4.200000000000001066e+00,4.673591396611029847e-01,3.570914616284417281e-02 +4.220000000000000639e+00,4.763519606919671134e-01,2.539380830284991911e-02 +4.240000000000000213e+00,4.092221112259533666e-01,2.153629549044803679e-02 +4.259999999999999787e+00,3.932138318109248964e-01,2.659889927443474106e-02 +4.279999999999999361e+00,4.201976468779460294e-01,3.705805557891760083e-02 +4.300000000000000711e+00,3.600548568063514643e-01,2.534162187361805110e-02 +4.320000000000000284e+00,4.174107488585798165e-01,3.201413680890515673e-02 +4.339999999999999858e+00,4.200430502438344815e-01,3.407525487923236673e-02 +4.359999999999999432e+00,3.398977450477670414e-01,4.296195288458046213e-02 +4.380000000000000782e+00,3.616536429337198211e-01,2.686004203200916332e-02 +4.400000000000000355e+00,3.430360370501685519e-01,3.390148210600547884e-02 +4.419999999999999929e+00,3.413386186647102138e-01,4.121900071809197347e-02 +4.439999999999999503e+00,2.999581533858771132e-01,2.742344937660504481e-02 +4.460000000000000853e+00,4.195723359185571377e-01,2.022926550192588255e-02 +4.480000000000000426e+00,3.306936459388154503e-01,3.731986554227401803e-02 +4.500000000000000000e+00,3.418964395084210794e-01,4.555894651088994440e-02 +4.519999999999999574e+00,3.042222781899822093e-01,3.797397839357180560e-02 +4.540000000000000924e+00,2.945166790367058640e-01,3.255860667849282825e-02 +4.560000000000000497e+00,3.353764562249985648e-01,4.859846603864388659e-02 +4.580000000000000071e+00,2.817826789133563992e-01,2.650893947918774030e-02 +4.599999999999999645e+00,3.504444880940863483e-01,3.514300648032542274e-02 +4.620000000000000995e+00,2.864676182506220581e-01,3.684391263051964294e-02 +4.640000000000000568e+00,2.634071544168652967e-01,2.308952122403817517e-02 +4.660000000000000142e+00,2.388318962249805499e-01,2.924970611005074661e-02 +4.679999999999999716e+00,2.575495264893400416e-01,3.474081006829046747e-02 +4.700000000000001066e+00,2.032770265411950605e-01,4.046234879416068381e-02 +4.720000000000000639e+00,2.754515805332317657e-01,2.425437302632402550e-02 +4.740000000000000213e+00,2.510993908751023018e-01,3.621635024533232522e-02 +4.759999999999999787e+00,2.865785354726087530e-01,3.795087884160799202e-02 +4.779999999999999361e+00,3.109668724655918481e-01,4.300319067191369499e-02 +4.800000000000000711e+00,1.785131240197480318e-01,3.270046984938442675e-02 +4.820000000000000284e+00,2.571486778006862317e-01,2.718603598796379486e-02 +4.839999999999999858e+00,2.159567786508689025e-01,2.758167160029438916e-02 +4.859999999999999432e+00,1.759826199015482873e-01,3.354081722595050885e-02 +4.880000000000000782e+00,2.354648132723803911e-01,3.584108267447292523e-02 +4.900000000000000355e+00,2.306952276361795284e-01,3.216812284508516889e-02 +4.919999999999999929e+00,1.881273852558159876e-01,2.551945124320626965e-02 +4.939999999999999503e+00,1.949066051071076111e-01,2.446975641559565387e-02 +4.960000000000000853e+00,2.230855359433417673e-01,2.520107939712273176e-02 +4.980000000000000426e+00,1.271495425341684082e-01,3.617754646046948686e-02 +5.000000000000000000e+00,1.665385684665513233e-01,2.618489741967522377e-02 From 0037c7138b39e701b2e290318402573d614f0067 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 23:01:40 +0200 Subject: [PATCH 066/118] fix: fixed fatal repetition of upper Cholesky to LU banded storage conversion in log pseudodet computation --- chemotools/utils/whittaker_base/initialisation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/chemotools/utils/whittaker_base/initialisation.py b/chemotools/utils/whittaker_base/initialisation.py index d6eee0ad..5535a649 100644 --- a/chemotools/utils/whittaker_base/initialisation.py +++ b/chemotools/utils/whittaker_base/initialisation.py @@ -153,7 +153,7 @@ def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> fl """ # the flipped penalty matrix D @ D.T is computed - _, flipped_penalty_matb = get_squ_fw_diff_mat_banded( + flipped_l_and_u, flipped_penalty_matb = get_squ_fw_diff_mat_banded( n_data=n_data, differences=differences, orig_first=True, @@ -162,9 +162,6 @@ def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> fl # the pseudo-determinant is computed from the partially pivoted LU decomposition # of the flipped penalty matrix - flipped_l_and_u, flipped_penalty_matb = ( - bla.conv_upper_chol_banded_to_lu_banded_storage(ab=flipped_penalty_matb) - ) log_pseudo_det_sign, log_pseudo_det = bla.slogdet_lu_banded( lub_factorization=bla.lu_banded( l_and_u=flipped_l_and_u, From 1841b8e82e6485c6ec92d06375c93d88498fa34c Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 23:33:22 +0200 Subject: [PATCH 067/118] refactor: issue warning on too high difference order; added more information for warning; fixed deprecated NumPy to float warning --- chemotools/baseline/_air_pls.py | 1 + chemotools/baseline/_ar_pls.py | 1 + chemotools/utils/whittaker_base/main.py | 24 ++++++++++++++++++++++-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 717e378a..cef6326b 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -129,6 +129,7 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": n_data=X.shape[1], differences=self.polynomial_order, lam=self.lam, + child_class_name=self.__class__.__name__, ) return self diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 0c5368ee..c8ad49cc 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -137,6 +137,7 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": n_data=X.shape[1], differences=self.differences, lam=self.lam, + child_class_name=self.__class__.__name__, ) return self diff --git a/chemotools/utils/whittaker_base/main.py b/chemotools/utils/whittaker_base/main.py index 58122f54..b7802049 100644 --- a/chemotools/utils/whittaker_base/main.py +++ b/chemotools/utils/whittaker_base/main.py @@ -10,6 +10,7 @@ from math import exp from typing import Optional, Union +from warnings import warn import numpy as np @@ -70,6 +71,9 @@ class WhittakerLikeSolver: (``False``). It can only be used if the number of differences is 2 and the lambda parameter is fixed (and of course if ``pentapy`` is available). + __child_class_name : str + The name of the child class that inherits from this base class. It is used for + warning messages and debugging purposes. __dtype : type, default=np.float64 The data type to which the series to be smoothed will be converted to. To avoid numerical issues, all series are converted to double precision. @@ -96,6 +100,7 @@ def _setup_for_fit( n_data: int, differences: int, lam: init._LambdaSpecs, + child_class_name: str, ) -> None: """ Pre-computes everything that can be computed for the smoothing in general as @@ -111,6 +116,18 @@ def _setup_for_fit( self._lam_inter_: models.WhittakerSmoothLambda = init.get_checked_lambda( lam=lam ) + self.__child_class_name: str = child_class_name + + # if the difference order exceeds 2, a warning is issued because then the + # current implementation cannot guarantee numerical stability + if self.differences_ > 2: + warn( + f"\nWARNING: With the current implementation, the numerical stability " + f"of the smoothing cannot be guaranteed for difference orders higher " + f"than 2.\n" + f"Please refer to the documentation of the class " + f"'{self.__child_class_name}' for further information." + ) # the squared forward finite difference matrix D.T @ D is computed in band # storage format for LAPACK's banded LU decomposition @@ -188,7 +205,7 @@ def _solve( def _marginal_likelihood_objective( self, - log_lam: float, + log_lam: Union[np.ndarray, float], b: np.ndarray, w: Union[float, np.ndarray], w_plus_penalty_plus_n_samples_term: float, @@ -203,6 +220,9 @@ def _marginal_likelihood_objective( # first, the linear system of equations is solved with the given penalty weight # lambda + if isinstance(log_lam, np.ndarray): + log_lam = log_lam[0] + lam = exp(log_lam) # Case 1: no weights are provided @@ -226,7 +246,7 @@ def _marginal_likelihood_objective( # to be maximized) return (-1.0) * auto.get_log_marginal_likelihood( factorization=factorization, # type: ignore - log_lam=log_lam, + log_lam=log_lam, # type: ignore lam=lam, differences=self.differences_, diff_kernel_flipped=self._diff_kernel_flipped_, From 9ab87eeac59bb19eb08895f9aeb8e93d018e1e0a Mon Sep 17 00:00:00 2001 From: MothNik Date: Sat, 11 May 2024 23:33:46 +0200 Subject: [PATCH 068/118] tests/refactor: added NaN-values to noise-level to make test flexible --- .../spectrum_whittaker_auto_lambda.csv | 132 +++++++++--------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/tests/resources/spectrum_whittaker_auto_lambda.csv b/tests/resources/spectrum_whittaker_auto_lambda.csv index a65def00..8be189bf 100644 --- a/tests/resources/spectrum_whittaker_auto_lambda.csv +++ b/tests/resources/spectrum_whittaker_auto_lambda.csv @@ -1,15 +1,15 @@ -# x,y,noise_std +# x,y,std -5.000000000000000000e+00,3.913954426305540135e+00,3.096822336193649078e-02 -4.980000000000000426e+00,3.913295001715116683e+00,4.157579901324756633e-02 -4.959999999999999964e+00,3.967828933711835759e+00,2.021882259976331628e-02 -4.940000000000000391e+00,3.913240339328647899e+00,3.283002638476407303e-02 -4.919999999999999929e+00,3.906660793153114319e+00,2.629869250880701587e-02 -4.900000000000000355e+00,3.977753591201872219e+00,2.303151065858748819e-02 --4.879999999999999893e+00,3.918578498160504520e+00,4.030304002301314525e-02 +-4.879999999999999893e+00,3.918578498160504520e+00,nan -4.860000000000000320e+00,3.909640759183042213e+00,2.308082479438558487e-02 -4.839999999999999858e+00,3.928403360537521927e+00,2.120712443290193039e-02 -4.820000000000000284e+00,3.950831318015709748e+00,2.136998567160089402e-02 --4.799999999999999822e+00,3.931835980130326647e+00,3.665332100171989765e-02 +-4.799999999999999822e+00,3.931835980130326647e+00,nan -4.780000000000000249e+00,3.913960091908467831e+00,3.219054411264235527e-02 -4.759999999999999787e+00,3.944102620606957821e+00,4.689772360937648904e-02 -4.740000000000000213e+00,3.927372355534060500e+00,3.710244685607267529e-02 @@ -28,20 +28,20 @@ -4.480000000000000426e+00,3.943095738054793742e+00,2.516707436675810414e-02 -4.459999999999999964e+00,4.000466770834566255e+00,3.088943627207259290e-02 -4.439999999999999503e+00,3.964649483902519478e+00,2.963575571223017041e-02 --4.419999999999999929e+00,3.984856670488913455e+00,2.251854582721338899e-02 +-4.419999999999999929e+00,3.984856670488913455e+00,nan -4.400000000000000355e+00,3.971856089063352968e+00,2.495601165215273129e-02 -4.379999999999999893e+00,4.030635978593705815e+00,3.734265590577109939e-02 --4.360000000000000320e+00,3.981218782347658802e+00,3.320986912666832463e-02 +-4.360000000000000320e+00,3.981218782347658802e+00,nan -4.339999999999999858e+00,3.968899147970850194e+00,3.269095491082592980e-02 -4.320000000000000284e+00,3.982898269224011578e+00,2.946282537533189208e-02 -4.299999999999999822e+00,3.939100003418123386e+00,2.865522681337081753e-02 -4.280000000000000249e+00,3.945635266010562692e+00,3.472014240355719705e-02 --4.259999999999999787e+00,4.000947172556999831e+00,3.030321708700593059e-02 +-4.259999999999999787e+00,4.000947172556999831e+00,nan -4.240000000000000213e+00,4.020343398727571405e+00,3.157915029390712114e-02 -4.219999999999999751e+00,4.063889834011682467e+00,4.463116837108123403e-02 -4.200000000000000178e+00,4.019044487495743390e+00,2.820994850940350754e-02 -4.179999999999999716e+00,3.999693957024348379e+00,2.842345038179301675e-02 --4.160000000000000142e+00,4.014216556726715623e+00,2.068763011433182725e-02 +-4.160000000000000142e+00,4.014216556726715623e+00,nan -4.139999999999999680e+00,3.997568023205597232e+00,3.579063885698285929e-02 -4.120000000000000107e+00,3.969380819221903778e+00,3.710054561456280209e-02 -4.099999999999999645e+00,3.973378322910303240e+00,2.674569508640989302e-02 @@ -55,9 +55,9 @@ -3.939999999999999947e+00,4.015211499281781116e+00,1.965729088192920068e-02 -3.919999999999999929e+00,3.996739007072117822e+00,2.217006837373004455e-02 -3.899999999999999911e+00,4.006967280403993747e+00,3.548421177927273823e-02 --3.879999999999999893e+00,3.992420505343744708e+00,2.287452531123754587e-02 +-3.879999999999999893e+00,3.992420505343744708e+00,nan -3.859999999999999876e+00,3.989833605239252012e+00,1.982777448634700374e-02 --3.839999999999999858e+00,3.977133776300772361e+00,2.387953952540879110e-02 +-3.839999999999999858e+00,3.977133776300772361e+00,nan -3.820000000000000284e+00,3.968534106303187148e+00,4.404511893658814153e-02 -3.799999999999999822e+00,4.046166805023654334e+00,3.313448396509128396e-02 -3.780000000000000249e+00,3.981901684829826316e+00,2.802464965497018790e-02 @@ -67,22 +67,22 @@ -3.700000000000000178e+00,3.999624538760653270e+00,3.961449084431505063e-02 -3.679999999999999716e+00,3.959951862232888420e+00,2.127983196111482220e-02 -3.660000000000000142e+00,3.992008806185594505e+00,3.730351277723908954e-02 --3.639999999999999680e+00,3.983111142787472581e+00,2.560441937780641442e-02 +-3.639999999999999680e+00,3.983111142787472581e+00,nan -3.620000000000000107e+00,3.997789721210798675e+00,4.189555115844698086e-02 -3.599999999999999645e+00,3.978584700131533758e+00,3.025928312227217989e-02 -3.580000000000000071e+00,3.999259303534389520e+00,2.857730727887747985e-02 --3.560000000000000053e+00,3.990540510474509084e+00,2.321307742211498049e-02 +-3.560000000000000053e+00,3.990540510474509084e+00,nan -3.540000000000000036e+00,4.006843582528653691e+00,3.607893641315891575e-02 -3.520000000000000018e+00,3.989502366165125213e+00,5.280686333954774658e-02 -3.500000000000000000e+00,3.971463221592128612e+00,3.195062668002687095e-02 -3.479999999999999982e+00,3.984236422056169680e+00,3.653447718907179564e-02 --3.459999999999999964e+00,3.965390902218237645e+00,2.523632145940367388e-02 +-3.459999999999999964e+00,3.965390902218237645e+00,nan -3.439999999999999947e+00,3.988691848103080417e+00,3.194360253100081426e-02 -3.419999999999999929e+00,3.915564628228665800e+00,3.335365548095119148e-02 -3.399999999999999911e+00,3.933238574213607119e+00,1.955557934578588258e-02 -3.379999999999999893e+00,3.984902392991356290e+00,3.531213606899333973e-02 -3.359999999999999876e+00,3.966998602981461008e+00,3.858838304594502944e-02 --3.339999999999999858e+00,3.980905662625323860e+00,2.116723883099221049e-02 +-3.339999999999999858e+00,3.980905662625323860e+00,nan -3.320000000000000284e+00,3.914229362381833166e+00,3.160429335949033886e-02 -3.299999999999999822e+00,3.994917048087540667e+00,3.132077094697865299e-02 -3.280000000000000249e+00,3.943874923203797955e+00,2.512446779038992120e-02 @@ -97,9 +97,9 @@ -3.099999999999999645e+00,3.900075964511259929e+00,4.332776229755198882e-02 -3.080000000000000071e+00,3.935772299731806978e+00,2.449157550065346814e-02 -3.060000000000000053e+00,3.915896784887003879e+00,1.992532293741383978e-02 --3.040000000000000036e+00,3.904143184923001275e+00,2.719088101344899547e-02 +-3.040000000000000036e+00,3.904143184923001275e+00,nan -3.020000000000000018e+00,3.897907823509206615e+00,3.955967022312247006e-02 --3.000000000000000000e+00,3.866438996497719049e+00,3.584595008712544023e-02 +-3.000000000000000000e+00,3.866438996497719049e+00,nan -2.979999999999999982e+00,3.903357248827355352e+00,2.678761408580850739e-02 -2.959999999999999964e+00,3.867880849871169868e+00,4.056478219242153765e-02 -2.939999999999999947e+00,3.882397409790404375e+00,3.205584351100022877e-02 @@ -108,7 +108,7 @@ -2.879999999999999893e+00,3.874818538198867124e+00,3.363872375973592233e-02 -2.859999999999999876e+00,3.874819534869030413e+00,3.412069939766745885e-02 -2.839999999999999858e+00,3.852214585211989117e+00,3.848297388549758169e-02 --2.819999999999999840e+00,3.853548185955963934e+00,2.811664579389831775e-02 +-2.819999999999999840e+00,3.853548185955963934e+00,nan -2.799999999999999822e+00,3.854754553304260423e+00,2.534333019107942730e-02 -2.779999999999999805e+00,3.841071553729384025e+00,2.160748256322369762e-02 -2.759999999999999787e+00,3.906409499587252476e+00,3.989927560475871327e-02 @@ -127,7 +127,7 @@ -2.500000000000000000e+00,3.809526646402432881e+00,3.295109809401969292e-02 -2.479999999999999982e+00,3.747985523101889260e+00,4.050258952135226548e-02 -2.459999999999999964e+00,3.806809286093004729e+00,1.923621539387884369e-02 --2.439999999999999947e+00,3.781100587500339838e+00,2.316177455479511565e-02 +-2.439999999999999947e+00,3.781100587500339838e+00,nan -2.419999999999999929e+00,3.749411789432546716e+00,3.866553106380841293e-02 -2.399999999999999911e+00,3.792726529826150994e+00,5.083314405848265299e-02 -2.379999999999999893e+00,3.749106715790383237e+00,1.836938560312795232e-02 @@ -144,10 +144,10 @@ -2.160000000000000142e+00,3.604065445077755392e+00,4.082881623022253242e-02 -2.140000000000000124e+00,3.679127871616473033e+00,2.843288863848685011e-02 -2.120000000000000107e+00,3.723207343503561972e+00,3.295457755380135079e-02 --2.100000000000000089e+00,3.593677436562992877e+00,6.640207053241493906e-02 +-2.100000000000000089e+00,3.593677436562992877e+00,nan -2.080000000000000071e+00,3.730259422314661233e+00,9.533727857204726819e-02 -2.060000000000000053e+00,3.652676325481325481e+00,4.814528861415776767e-02 --2.040000000000000036e+00,3.664039414557082708e+00,4.667967662908709126e-02 +-2.040000000000000036e+00,3.664039414557082708e+00,nan -2.020000000000000018e+00,3.605572286811380867e+00,7.161423172160229222e-02 -2.000000000000000000e+00,3.539623085803978331e+00,6.708869557030820718e-02 -1.979999999999999982e+00,3.619072317723509347e+00,5.743569593331862566e-02 @@ -163,14 +163,14 @@ -1.779999999999999805e+00,3.608267192342823115e+00,1.205649991710633223e-01 -1.759999999999999787e+00,3.453201243804023601e+00,1.376335561212327918e-01 -1.739999999999999769e+00,3.607774685478800425e+00,1.665844544798526783e-01 --1.719999999999999751e+00,3.640026211733285333e+00,1.332907382461752388e-01 +-1.719999999999999751e+00,3.640026211733285333e+00,nan -1.699999999999999734e+00,3.642227320257549028e+00,1.268407969723837903e-01 -1.679999999999999716e+00,3.803890280567367999e+00,1.576114673042917080e-01 -1.660000000000000142e+00,3.735897857753192053e+00,2.214995227455958393e-01 --1.640000000000000124e+00,3.663913595669907153e+00,1.424227087882632414e-01 +-1.640000000000000124e+00,3.663913595669907153e+00,nan -1.620000000000000107e+00,3.641401992387191200e+00,1.551600530436650083e-01 -1.600000000000000089e+00,3.217048317236007815e+00,2.630495244546968703e-01 --1.580000000000000071e+00,3.892532742459869688e+00,2.368715709284081039e-01 +-1.580000000000000071e+00,3.892532742459869688e+00,nan -1.560000000000000053e+00,3.360368769070483985e+00,1.982329599456646163e-01 -1.540000000000000036e+00,3.752961584101674131e+00,2.460402241728126826e-01 -1.520000000000000018e+00,3.745930947639183550e+00,3.309654834926570310e-01 @@ -189,7 +189,7 @@ -1.259999999999999787e+00,4.559110373914525560e+00,3.273162563351011967e-01 -1.239999999999999769e+00,4.762139581965920243e+00,5.769640282217669558e-01 -1.219999999999999751e+00,3.280248253976095540e+00,5.275499350589685221e-01 --1.199999999999999734e+00,4.276145716520765028e+00,7.549125729618073599e-01 +-1.199999999999999734e+00,4.276145716520765028e+00,nan -1.179999999999999716e+00,4.600817916539200247e+00,6.092834368481748752e-01 -1.160000000000000142e+00,4.534854213018084224e+00,6.756216430995778399e-01 -1.140000000000000124e+00,5.024019049064923692e+00,5.385345533169113574e-01 @@ -200,14 +200,14 @@ -1.040000000000000036e+00,5.752138952060318999e+00,4.249735541418221407e-01 -1.020000000000000018e+00,4.159619060982223004e+00,8.032549364880220422e-01 -1.000000000000000000e+00,6.137115336978383517e+00,6.702421100272603072e-01 --9.799999999999995381e-01,5.872802388731122925e+00,5.096481823863698857e-01 +-9.799999999999995381e-01,5.872802388731122925e+00,nan -9.599999999999999645e-01,6.281236755604411748e+00,6.472217891646340204e-01 -9.399999999999995026e-01,5.565492156114431488e+00,6.959215464098150727e-01 -9.199999999999999289e-01,6.546745314566285501e+00,9.349172151308963175e-01 --9.000000000000003553e-01,4.235996624335836458e+00,5.674418073457194733e-01 +-9.000000000000003553e-01,4.235996624335836458e+00,nan -8.799999999999998934e-01,5.895946189351668032e+00,1.151543603478050626e+00 -8.600000000000003197e-01,8.090416456392947708e+00,9.886224714269703506e-01 --8.399999999999998579e-01,7.099654579905307017e+00,9.411573406527600216e-01 +-8.399999999999998579e-01,7.099654579905307017e+00,nan -8.200000000000002842e-01,7.783447599187353738e+00,1.563679351082028157e+00 -7.999999999999998224e-01,5.860696892165741190e+00,1.070756444618480741e+00 -7.800000000000002487e-01,6.197255583792041556e+00,7.878498533061101261e-01 @@ -221,7 +221,7 @@ -6.200000000000001066e-01,7.077991229023441022e+00,6.918590236314428044e-01 -5.999999999999996447e-01,6.213534208408828086e+00,9.083962692379881076e-01 -5.800000000000000711e-01,5.040827980153054000e+00,9.659222627816541618e-01 --5.599999999999996092e-01,6.088292857633700095e+00,9.970182802078784157e-01 +-5.599999999999996092e-01,6.088292857633700095e+00,nan -5.400000000000000355e-01,4.933795191345740960e+00,6.901311785896823148e-01 -5.199999999999995737e-01,4.549334630528049317e+00,5.649737176498387248e-01 -5.000000000000000000e-01,5.385998717145486836e+00,8.874764677140921654e-01 @@ -236,14 +236,14 @@ -3.200000000000002842e-01,5.074998639686343793e+00,5.939270740467554877e-01 -2.999999999999998224e-01,5.647855412945030906e+00,5.294028271246801198e-01 -2.800000000000002487e-01,4.105665388254435832e+00,6.313947350577137074e-01 --2.599999999999997868e-01,4.450308562157782255e+00,5.585069151830991396e-01 +-2.599999999999997868e-01,4.450308562157782255e+00,nan -2.400000000000002132e-01,4.071521720834863522e+00,4.594469840608414457e-01 -2.199999999999997513e-01,2.963422081644037753e+00,7.669847097205949593e-01 -2.000000000000001776e-01,3.868455002797947184e+00,4.251092352493783255e-01 -1.799999999999997158e-01,3.536676735169854702e+00,3.809115333435043049e-01 -1.600000000000001421e-01,3.211077182715823319e+00,3.539182369697171926e-01 -1.399999999999996803e-01,3.157142724539423462e+00,4.179098049588486075e-01 --1.200000000000001066e-01,3.253957454666461580e+00,2.287014051271014647e-01 +-1.200000000000001066e-01,3.253957454666461580e+00,nan -9.999999999999964473e-02,3.548219807515344382e+00,3.531347515347905142e-01 -8.000000000000007105e-02,3.507299359211579315e+00,2.648647778152579124e-01 -5.999999999999960920e-02,2.682990980990552998e+00,4.679258280625686051e-01 @@ -258,7 +258,7 @@ 1.200000000000001066e-01,2.960170259963005979e+00,1.938639502937708481e-01 1.399999999999996803e-01,2.949001397061277618e+00,2.123753112352188555e-01 1.600000000000001421e-01,3.049640755278711701e+00,1.228069598001861884e-01 -1.799999999999997158e-01,2.728754984418608753e+00,2.559880576432678745e-01 +1.799999999999997158e-01,2.728754984418608753e+00,nan 2.000000000000001776e-01,2.727849241201167452e+00,2.231426434949308757e-01 2.199999999999997513e-01,2.992638787130469158e+00,2.191637740227992959e-01 2.400000000000002132e-01,2.251918265667214758e+00,2.480313535065964692e-01 @@ -282,15 +282,15 @@ 6.000000000000005329e-01,2.831073027789893892e+00,3.175362955311629154e-01 6.200000000000001066e-01,3.161240012035097191e+00,1.997911486567817974e-01 6.399999999999996803e-01,2.956347334622390743e+00,4.285265589597905822e-01 -6.600000000000001421e-01,2.527608156534642880e+00,2.560510676401487329e-01 +6.600000000000001421e-01,2.527608156534642880e+00,nan 6.799999999999997158e-01,2.971776586979084644e+00,5.310266580236164824e-01 -7.000000000000001776e-01,2.877290794515752204e+00,5.240177912611390099e-01 +7.000000000000001776e-01,2.877290794515752204e+00,nan 7.199999999999997513e-01,3.389214360428408845e+00,4.743984853334949348e-01 7.400000000000002132e-01,2.522099656711926485e+00,4.929410237222000557e-01 7.599999999999997868e-01,2.155763592687337749e+00,3.841695803658565778e-01 7.800000000000002487e-01,2.799985290900282031e+00,5.452928167360916456e-01 -7.999999999999998224e-01,3.181999059836660315e+00,3.921740873565852659e-01 -8.200000000000002842e-01,3.057996485246090135e+00,3.338910048932451358e-01 +7.999999999999998224e-01,3.181999059836660315e+00,nan +8.200000000000002842e-01,3.057996485246090135e+00,nan 8.399999999999998579e-01,2.115576753796697851e+00,3.280525299425075536e-01 8.600000000000003197e-01,2.721097204259094404e+00,5.254098256952898227e-01 8.799999999999998934e-01,3.061150513259692563e+00,4.445749637844202407e-01 @@ -298,8 +298,8 @@ 9.199999999999999289e-01,3.627533156198332609e+00,6.026970045505178897e-01 9.400000000000003908e-01,2.765688681358758227e+00,5.117093613705018251e-01 9.599999999999999645e-01,3.192183883069087624e+00,3.587640992401981976e-01 -9.800000000000004263e-01,3.968383003330220760e+00,3.270829475816557363e-01 -1.000000000000000000e+00,2.338013832820840854e+00,5.943069716041308181e-01 +9.800000000000004263e-01,3.968383003330220760e+00,nan +1.000000000000000000e+00,2.338013832820840854e+00,nan 1.020000000000000462e+00,3.313408994267729835e+00,6.526803408414116880e-01 1.040000000000000036e+00,3.040682318981518861e+00,5.836565795219444963e-01 1.060000000000000497e+00,2.779849182033996335e+00,4.256561800169474385e-01 @@ -310,8 +310,8 @@ 1.160000000000000142e+00,2.241228078826833059e+00,3.469867901057105963e-01 1.179999999999999716e+00,3.814941693212685614e+00,3.599045368114944798e-01 1.200000000000000178e+00,3.265716891407834943e+00,2.892595048204359420e-01 -1.219999999999999751e+00,3.081223862719026485e+00,3.447948472735757330e-01 -1.240000000000000213e+00,2.922621628709752617e+00,3.363244971630394708e-01 +1.219999999999999751e+00,3.081223862719026485e+00,nan +1.240000000000000213e+00,2.922621628709752617e+00,nan 1.259999999999999787e+00,2.600552915231134854e+00,5.720317054932907430e-01 1.280000000000000249e+00,2.295526789533440581e+00,4.703156894625505902e-01 1.299999999999999822e+00,2.134518269816672920e+00,2.073651949699803210e-01 @@ -331,14 +331,14 @@ 1.580000000000000071e+00,2.255154594264408896e+00,2.813968051504651124e-01 1.600000000000000533e+00,1.664795947481280347e+00,2.310721598292219126e-01 1.620000000000000107e+00,1.898006700231108290e+00,2.653301084336139870e-01 -1.640000000000000568e+00,2.167986247869837069e+00,1.582119967000197303e-01 +1.640000000000000568e+00,2.167986247869837069e+00,nan 1.660000000000000142e+00,2.052209084359961633e+00,2.480990399266391477e-01 -1.679999999999999716e+00,2.030829428195580988e+00,1.746181670706817290e-01 -1.700000000000000178e+00,2.079538602632515332e+00,1.586178729041856916e-01 +1.679999999999999716e+00,2.030829428195580988e+00,nan +1.700000000000000178e+00,2.079538602632515332e+00,nan 1.719999999999999751e+00,1.447949949786532686e+00,1.487681488926025641e-01 1.740000000000000213e+00,1.984142869518430441e+00,2.126409080700615295e-01 1.759999999999999787e+00,1.594984196307964508e+00,1.531504013947989629e-01 -1.780000000000000249e+00,1.689744286673886542e+00,1.573895567145644492e-01 +1.780000000000000249e+00,1.689744286673886542e+00,nan 1.799999999999999822e+00,1.505327862233904135e+00,1.433670028956879894e-01 1.820000000000000284e+00,1.657357557672590298e+00,1.936888522083639597e-01 1.839999999999999858e+00,1.627728676663654062e+00,1.973138182971920507e-01 @@ -346,16 +346,16 @@ 1.879999999999999893e+00,1.495731507212113787e+00,1.246181847495901329e-01 1.900000000000000355e+00,1.696456855549726805e+00,1.184320222108378451e-01 1.919999999999999929e+00,1.753621707095358806e+00,1.654974896688882480e-01 -1.940000000000000391e+00,1.510696371828758267e+00,8.744842229968947944e-02 +1.940000000000000391e+00,1.510696371828758267e+00,nan 1.959999999999999964e+00,1.593205776328715517e+00,9.488123223315308996e-02 1.980000000000000426e+00,1.371252828445131744e+00,9.083416534289066868e-02 2.000000000000000000e+00,1.631767388510174444e+00,5.605529023766811503e-02 2.020000000000000462e+00,1.510870281561765172e+00,9.794045190385711197e-02 2.040000000000000036e+00,1.509917665204379755e+00,5.847087504586767626e-02 2.060000000000000497e+00,1.457511736987583184e+00,1.026291527197704528e-01 -2.080000000000000071e+00,1.337502930906462328e+00,5.273559726837979966e-02 +2.080000000000000071e+00,1.337502930906462328e+00,nan 2.100000000000000533e+00,1.428158853424737362e+00,6.186049157060879988e-02 -2.120000000000000107e+00,1.334146609616660761e+00,6.233053017040882798e-02 +2.120000000000000107e+00,1.334146609616660761e+00,nan 2.140000000000000568e+00,1.389119451915253656e+00,6.753635107838724949e-02 2.160000000000000142e+00,1.411502044605835504e+00,4.818573805837670504e-02 2.179999999999999716e+00,1.512147721837651781e+00,4.450992438755188574e-02 @@ -368,15 +368,15 @@ 2.320000000000000284e+00,1.337936194998987194e+00,4.090671416312927772e-02 2.339999999999999858e+00,1.276400380907636301e+00,5.126562293055141051e-02 2.360000000000000320e+00,1.300022331653995611e+00,5.490064925439376575e-02 -2.379999999999999893e+00,1.333665544228300748e+00,4.442743573097127779e-02 +2.379999999999999893e+00,1.333665544228300748e+00,nan 2.400000000000000355e+00,1.293747202699239374e+00,3.450761667350280004e-02 -2.419999999999999929e+00,1.244199007704499316e+00,4.892606653276775075e-02 -2.440000000000000391e+00,1.324761412428399510e+00,3.594625252613110195e-02 +2.419999999999999929e+00,1.244199007704499316e+00,nan +2.440000000000000391e+00,1.324761412428399510e+00,nan 2.459999999999999964e+00,1.235109059399808418e+00,2.588565954245041975e-02 -2.480000000000000426e+00,1.302786487079408806e+00,4.302629597811168799e-02 +2.480000000000000426e+00,1.302786487079408806e+00,nan 2.500000000000000000e+00,1.193293907935522258e+00,4.554179185929631279e-02 2.520000000000000462e+00,1.166485983069556198e+00,2.969047947990152009e-02 -2.540000000000000036e+00,1.227974018485518126e+00,3.211761999137055512e-02 +2.540000000000000036e+00,1.227974018485518126e+00,nan 2.560000000000000497e+00,1.304406572007846821e+00,2.881781570521451882e-02 2.580000000000000071e+00,1.118044531986680745e+00,5.065123842298295542e-02 2.600000000000000533e+00,1.176494597509851525e+00,3.111566005871567453e-02 @@ -406,9 +406,9 @@ 3.080000000000000071e+00,9.057035218946286603e-01,2.772343988678164020e-02 3.099999999999999645e+00,8.651708341540602643e-01,3.921165500460101205e-02 3.120000000000000995e+00,9.303704135098522787e-01,2.771220111105161463e-02 -3.140000000000000568e+00,9.193051217201706482e-01,3.402373318223596288e-02 +3.140000000000000568e+00,9.193051217201706482e-01,nan 3.160000000000000142e+00,9.352467420486022531e-01,3.588952176393950949e-02 -3.179999999999999716e+00,8.813652204922443056e-01,2.990696161174580953e-02 +3.179999999999999716e+00,8.813652204922443056e-01,nan 3.199999999999999289e+00,9.390011623128490248e-01,1.901648106737680380e-02 3.220000000000000639e+00,8.600902219662046599e-01,4.474787744605149969e-02 3.240000000000000213e+00,8.523875153073585675e-01,3.323721427792761945e-02 @@ -421,9 +421,9 @@ 3.380000000000000782e+00,7.915291710156696636e-01,2.941592650294502787e-02 3.400000000000000355e+00,7.742913156876776171e-01,3.018585662763769925e-02 3.419999999999999929e+00,8.052117292761523659e-01,3.534820799363974642e-02 -3.439999999999999503e+00,6.975738328763312346e-01,2.992469270581009366e-02 +3.439999999999999503e+00,6.975738328763312346e-01,nan 3.460000000000000853e+00,7.456754448582639805e-01,5.177506811051349467e-02 -3.480000000000000426e+00,7.971989053927358571e-01,3.183223099024329317e-02 +3.480000000000000426e+00,7.971989053927358571e-01,nan 3.500000000000000000e+00,7.500214809794597537e-01,2.973325203337600503e-02 3.519999999999999574e+00,7.405704628922389343e-01,2.644933588288904402e-02 3.540000000000000924e+00,7.010520360337462176e-01,2.751384680887634590e-02 @@ -433,13 +433,13 @@ 3.620000000000000995e+00,6.404736362845638853e-01,4.159673361489631821e-02 3.640000000000000568e+00,5.796863065161470541e-01,2.310965550118417033e-02 3.660000000000000142e+00,6.706090484334317203e-01,2.150927154134406327e-02 -3.679999999999999716e+00,6.590807197035827292e-01,2.415547493455632785e-02 +3.679999999999999716e+00,6.590807197035827292e-01,nan 3.700000000000001066e+00,6.579262974794020113e-01,3.664203532810380443e-02 -3.720000000000000639e+00,6.362810821527531413e-01,4.363591989867501852e-02 +3.720000000000000639e+00,6.362810821527531413e-01,nan 3.740000000000000213e+00,6.329963281254368246e-01,3.069766501312837931e-02 3.759999999999999787e+00,5.674844485932917237e-01,2.929212140056279556e-02 -3.779999999999999361e+00,6.020347859780290634e-01,2.453295123549698378e-02 -3.800000000000000711e+00,5.979345896309717912e-01,1.885462643881315875e-02 +3.779999999999999361e+00,6.020347859780290634e-01,nan +3.800000000000000711e+00,5.979345896309717912e-01,nan 3.820000000000000284e+00,5.921153801598458832e-01,2.472956858646184128e-02 3.839999999999999858e+00,5.168250498864187525e-01,3.932557515162073830e-02 3.859999999999999432e+00,5.804153801121019196e-01,2.396882167584396886e-02 @@ -448,16 +448,16 @@ 3.919999999999999929e+00,5.514372689831463781e-01,2.860219042728869357e-02 3.939999999999999503e+00,4.913097559383847734e-01,2.105444236715212428e-02 3.960000000000000853e+00,5.158491350566534184e-01,4.172547955572703665e-02 -3.980000000000000426e+00,5.006790115005972375e-01,3.660623110329951774e-02 +3.980000000000000426e+00,5.006790115005972375e-01,nan 4.000000000000000000e+00,5.296558230138243006e-01,2.904212449707989363e-02 4.019999999999999574e+00,5.527501560920484724e-01,3.635397445862977722e-02 4.040000000000000924e+00,5.121606975311043808e-01,2.941529899783447422e-02 4.060000000000000497e+00,4.620786835801224401e-01,3.957298503403856166e-02 4.080000000000000071e+00,5.631392727992968750e-01,3.838219534881322920e-02 4.099999999999999645e+00,5.001731340426340822e-01,2.855744716291427898e-02 -4.120000000000000995e+00,4.759932526974401945e-01,1.858866659018939063e-02 +4.120000000000000995e+00,4.759932526974401945e-01,nan 4.140000000000000568e+00,4.408134938285349635e-01,2.613790641807950052e-02 -4.160000000000000142e+00,4.661151650812371816e-01,2.028844154342078487e-02 +4.160000000000000142e+00,4.661151650812371816e-01,nan 4.179999999999999716e+00,4.170876717713623005e-01,3.040067984335515991e-02 4.200000000000001066e+00,4.673591396611029847e-01,3.570914616284417281e-02 4.220000000000000639e+00,4.763519606919671134e-01,2.539380830284991911e-02 @@ -469,13 +469,13 @@ 4.339999999999999858e+00,4.200430502438344815e-01,3.407525487923236673e-02 4.359999999999999432e+00,3.398977450477670414e-01,4.296195288458046213e-02 4.380000000000000782e+00,3.616536429337198211e-01,2.686004203200916332e-02 -4.400000000000000355e+00,3.430360370501685519e-01,3.390148210600547884e-02 +4.400000000000000355e+00,3.430360370501685519e-01,nan 4.419999999999999929e+00,3.413386186647102138e-01,4.121900071809197347e-02 -4.439999999999999503e+00,2.999581533858771132e-01,2.742344937660504481e-02 +4.439999999999999503e+00,2.999581533858771132e-01,nan 4.460000000000000853e+00,4.195723359185571377e-01,2.022926550192588255e-02 4.480000000000000426e+00,3.306936459388154503e-01,3.731986554227401803e-02 4.500000000000000000e+00,3.418964395084210794e-01,4.555894651088994440e-02 -4.519999999999999574e+00,3.042222781899822093e-01,3.797397839357180560e-02 +4.519999999999999574e+00,3.042222781899822093e-01,nan 4.540000000000000924e+00,2.945166790367058640e-01,3.255860667849282825e-02 4.560000000000000497e+00,3.353764562249985648e-01,4.859846603864388659e-02 4.580000000000000071e+00,2.817826789133563992e-01,2.650893947918774030e-02 @@ -485,7 +485,7 @@ 4.660000000000000142e+00,2.388318962249805499e-01,2.924970611005074661e-02 4.679999999999999716e+00,2.575495264893400416e-01,3.474081006829046747e-02 4.700000000000001066e+00,2.032770265411950605e-01,4.046234879416068381e-02 -4.720000000000000639e+00,2.754515805332317657e-01,2.425437302632402550e-02 +4.720000000000000639e+00,2.754515805332317657e-01,nan 4.740000000000000213e+00,2.510993908751023018e-01,3.621635024533232522e-02 4.759999999999999787e+00,2.865785354726087530e-01,3.795087884160799202e-02 4.779999999999999361e+00,3.109668724655918481e-01,4.300319067191369499e-02 @@ -496,7 +496,7 @@ 4.880000000000000782e+00,2.354648132723803911e-01,3.584108267447292523e-02 4.900000000000000355e+00,2.306952276361795284e-01,3.216812284508516889e-02 4.919999999999999929e+00,1.881273852558159876e-01,2.551945124320626965e-02 -4.939999999999999503e+00,1.949066051071076111e-01,2.446975641559565387e-02 +4.939999999999999503e+00,1.949066051071076111e-01,nan 4.960000000000000853e+00,2.230855359433417673e-01,2.520107939712273176e-02 4.980000000000000426e+00,1.271495425341684082e-01,3.617754646046948686e-02 5.000000000000000000e+00,1.665385684665513233e-01,2.618489741967522377e-02 From 9bb29fed0e50900fffe98b3ed57bc1ba669bcd09 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 12 May 2024 00:21:19 +0200 Subject: [PATCH 069/118] fix: removed dead 1D-weight generator branch --- chemotools/utils/whittaker_base/misc.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/chemotools/utils/whittaker_base/misc.py b/chemotools/utils/whittaker_base/misc.py index a4ecb587..35f8fd5e 100644 --- a/chemotools/utils/whittaker_base/misc.py +++ b/chemotools/utils/whittaker_base/misc.py @@ -23,11 +23,11 @@ def get_weight_generator( """ - # if the weights are neither None, nor a 1D- or a 2D-Array, an error is raised + # if the weights are neither None nor a 2D-Array, an error is raised if not (w is None or isinstance(w, np.ndarray)): raise TypeError( - f"The weights must either be None, a NumPy-1D-, or a NumPy-2D-Array, but " - f"they are of type '{type(w)}'." + f"The weights must either be None or a NumPy-2D-Array, but they are of " + f"type '{type(w)}'." ) # Case 1: No weights @@ -35,19 +35,14 @@ def get_weight_generator( for _ in range(n_series): yield 1.0 - # Case 2: 1D or 2D weights - elif w.ndim == 1: - for _ in range(n_series): - yield w - - # Case 3: 2D weights + # Case 2: 2D weights elif w.ndim == 2: for idx in range(0, n_series): yield w[idx] - # Case 4: Invalid weights - elif w.ndim > 2: + # Case 3: Invalid weights + elif w.ndim != 2: raise ValueError( - f"The weights must be either a 1D- or a 2D-array, but they are " + f"If provided as an Array, the weights must be a 2D-Array, but they are " f"{w.ndim}-dimensional with shape {w.shape}." ) From 4ed784fd2759efead717433e801a4f9b1b4d3df1 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 12 May 2024 00:21:55 +0200 Subject: [PATCH 070/118] tests/fix: removed dead direct call of fixture that caused an error --- tests/fixtures.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 98225ac9..25885bed 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -146,6 +146,3 @@ def reference_finite_differences() -> List[tuple[int, int, np.ndarray]]: ) return fin_diff_ordered_coeffs - - -spectrum_whittaker_auto_lambda() From c48445ce65314f2c43493aef6023671ef52da222 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 12 May 2024 00:26:12 +0200 Subject: [PATCH 071/118] refactor: made warning an explicit `UserWarning` --- chemotools/utils/whittaker_base/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chemotools/utils/whittaker_base/main.py b/chemotools/utils/whittaker_base/main.py index b7802049..98fcea93 100644 --- a/chemotools/utils/whittaker_base/main.py +++ b/chemotools/utils/whittaker_base/main.py @@ -126,7 +126,8 @@ def _setup_for_fit( f"of the smoothing cannot be guaranteed for difference orders higher " f"than 2.\n" f"Please refer to the documentation of the class " - f"'{self.__child_class_name}' for further information." + f"'{self.__child_class_name}' for further information.", + UserWarning, ) # the squared forward finite difference matrix D.T @ D is computed in band From 16ef45c8e81cd7b8c31307f802fb563e023a9c86 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 12 May 2024 00:57:05 +0200 Subject: [PATCH 072/118] fix: fixed internally broken wrong use of unchecked weights in weight checker; made error messages more descriptive --- chemotools/utils/check_inputs.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index e57c4c30..17489b59 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -48,22 +48,23 @@ def check_weights( f"Weights must have either 1 or {n_samples} rows, but they have " f"{weights_checked.shape[0]} rows." ) - elif weights_checked.shape[1] != n_features: + if weights_checked.shape[1] != n_features: raise ValueError( f"Weights must have {n_features} columns, but they have " f"{weights_checked.shape[1]} columns." ) # finally, it needs to be checked that the weights are all non-negative ... - if np.any(weights < 0.0): + if (weights_checked < 0.0).any(): raise ValueError( - f"Weights may not be negative, but {np.sum(weights < 0.0)} negative " - f"weights were found." + f"Weights may not be negative, but {(weights_checked < 0.0).sum(axis=1)} " + f"negative weights were found (one entry per vector)." ) # ... and also at least one of them is positive - elif np.sum(weights) <= 0.0: + if (weights_checked.sum(axis=1) <= 0.0).any(): raise ValueError( - "At least one weights needs to be > 0, but all weights were 0.0." + f"At least one weights needs to be > 0, but all weights were 0.0 for " + f"vector index {np.where(weights_checked.sum(axis=1) <= 0.0)[0]}." ) # the weights are returned together with a flag whether to apply the same weights From 3d54add053ce12129839d9a261bb00357ba0506a Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 12 May 2024 18:47:33 +0200 Subject: [PATCH 073/118] doc: added TODO for why if statement was excluded from coverage --- chemotools/utils/whittaker_base/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/chemotools/utils/whittaker_base/main.py b/chemotools/utils/whittaker_base/main.py index 98fcea93..0c0f34b1 100644 --- a/chemotools/utils/whittaker_base/main.py +++ b/chemotools/utils/whittaker_base/main.py @@ -278,7 +278,12 @@ def _solve_single_b_fixed_lam( # the weights and the weighted series are computed depending on whether weights # are provided or not # Case 1: no weights are provided - if isinstance(w, float): + # TODO: this case is not possible under the current implementations for the + # calls of any of the child classes because they either use weights or + # the most efficient way around going into this method in the first place; + # in the future this might change and thus, this case is kept for now, but + # ignored for coverage + if isinstance(w, float): # pragma: no cover return ( self._solve( lam=lam, From 629cef4059b898a4022f465ef44beaf7a7ec076f Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 12 May 2024 18:48:08 +0200 Subject: [PATCH 074/118] test/docs: commented model tests clearer --- tests/test_for_utils/test_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 16bbc452..c373623f 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -282,7 +282,7 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N # is raised if not isinstance(expected_result, ExpectedWhittakerSmoothLambda): for meth in methods: - with pytest.raises(expected_result): # type: ignore + with pytest.raises(expected_result): models.WhittakerSmoothLambda( bounds=lambda_value, # type: ignore method=meth, # type: ignore @@ -291,7 +291,7 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N return # if the expected result is a valid result, the class is instantiated and the - # attributes are tested + # generated object is compared to the expected result for meth in methods: lambda_model = models.WhittakerSmoothLambda( bounds=lambda_value, # type: ignore From a5a5864112beacbb0667a3aa685985c4c43ebaa2 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 12 May 2024 18:49:46 +0200 Subject: [PATCH 075/118] tests: added input checker tests --- tests/test_for_utils/test_check_inputs.py | 204 ++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 tests/test_for_utils/test_check_inputs.py diff --git a/tests/test_for_utils/test_check_inputs.py b/tests/test_for_utils/test_check_inputs.py new file mode 100644 index 00000000..87b6a7a9 --- /dev/null +++ b/tests/test_for_utils/test_check_inputs.py @@ -0,0 +1,204 @@ +""" +Test suite for the utility models in the :mod:`chemotools.utils.check_inputs` module. + +""" + +### Imports ### + +from typing import Optional, Tuple, Type, Union + +import numpy as np +import pytest + +from chemotools.utils.check_inputs import check_weights + +### Test Suite ### + + +@pytest.mark.parametrize( + "combination", + [ + ( # Number 0 (no weights; for all) + None, + (None, True), + ), + ( # Number 1 (valid 1D-weights; for all) + np.array([1.0, 2.0, 3.0]), + (np.array([[1.0, 2.0, 3.0]]), True), + ), + ( # Number 2 (valid 2D-weights; for all) + np.array([[1.0, 2.0, 3.0]]), + (np.array([[1.0, 2.0, 3.0]]), True), + ), + ( # Number 3 (valid 2D-weights; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]), + (np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]), False), + ), + ( # Number 4 (invalid 1D-weights with wrong column number; for all) + np.array([1.0, 2.0, 3.0, 4.0]), + ValueError, + ), + ( # Number 5 (invalid 1D-weights with wrong column number; for all) + np.array([1.0, 2.0]), + ValueError, + ), + ( # Number 6 (invalid 2D-weights with wrong column number; for all) + np.array([[1.0, 2.0, 3.0, 4.0]]), + ValueError, + ), + ( # Number 7 (invalid 2D-weights with wrong column number; for all) + np.array([[1.0, 2.0]]), + ValueError, + ), + ( # Number 8 (invalid 2D-weights with wrong row number; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), + ValueError, + ), + ( # Number 9 (invalid 2D-weights with wrong row number; individual) + np.array( + [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0]] + ), + ValueError, + ), + ( + # Number 10 (invalid 2D-weights with wrong row and column number; + # individual) + np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), + ValueError, + ), + ( + # Number 10 (invalid 2D-weights with wrong row and column number; + # individual) + np.array([[1.0, 2.0], [3.0, 4.0]]), + ValueError, + ), + ( + # Number 11 (invalid 2D-weights with wrong row and column number; + # individual) + np.array( + [ + [1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0], + [13.0, 14.0, 15.0, 16.0], + ] + ), + ValueError, + ), + ( + # Number 12 (invalid 2D-weights with wrong row and column number; + # individual) + np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]), + ValueError, + ), + ( # Number 13 (invalid 1D-weights with negative entry; for all) + np.array([1.0, 2.0, -1_000.0]), + ValueError, + ), + ( # Number 14 (invalid 2D-weights with negative entry; for all) + np.array([[1.0, 2.0, -1_000.0]]), + ValueError, + ), + ( # Number 15 (invalid 2D-weights with negative entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, -1_000.0], [7.0, 8.0, 9.0]]), + ValueError, + ), + ( # Number 16 (invalid 1D-weights with NaN entry; for all) + np.array([1.0, 2.0, np.nan]), + ValueError, + ), + ( # Number 17 (invalid 2D-weights with NaN entry; for all) + np.array([[1.0, 2.0, np.nan]]), + ValueError, + ), + ( # Number 18 (invalid 2D-weights with NaN entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, np.nan], [7.0, 8.0, 9.0]]), + ValueError, + ), + ( # Number 19 (invalid 1D-weights with inf entry; for all) + np.array([1.0, 2.0, np.inf]), + ValueError, + ), + ( # Number 20 (invalid 2D-weights with inf entry; for all) + np.array([[1.0, 2.0, np.inf]]), + ValueError, + ), + ( # Number 21 (invalid 2D-weights with inf entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, np.inf], [7.0, 8.0, 9.0]]), + ValueError, + ), + ( # Number 22 (invalid 1D-weights with -inf entry; for all) + np.array([1.0, 2.0, -np.inf]), + ValueError, + ), + ( # Number 23 (invalid 2D-weights with -inf entry; for all) + np.array([[1.0, 2.0, -np.inf]]), + ValueError, + ), + ( # Number 24 (invalid 2D-weights with -inf entry; individual) + np.array([[1.0, 2.0, 3.0], [4.0, 5.0, -np.inf], [7.0, 8.0, 9.0]]), + ValueError, + ), + ( # Number 25 (invalid 1D-weights with all zero entries; for all) + np.array([0.0, 0.0, 0.0]), + ValueError, + ), + ( # Number 26 (invalid 2D-weights with all zero entries; for all) + np.array([[0.0, 0.0, 0.0]]), + ValueError, + ), + ( # Number 27 (invalid 2D-weights with all zero entries; individual) + np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), + ValueError, + ), + ], +) +def test_weight_checks( + combination: Tuple[ + Optional[np.ndarray], Union[Tuple[Optional[np.ndarray], bool], Type[Exception]] + ] +) -> None: + """ + Tests the function :func:`chemotools.utils.check_inputs.check_weights` for different + valid and invalid input combinations. + + """ + + # the input parameters are unpacked ... + weights, expected_result = combination + # ... and the size of the matrix against which the weights are checked is set + n_samples, n_features = 3, 3 + + # if the expected output is an exception, the test is run in a context manager to + # check if the respective exception is raised + if not isinstance(expected_result, tuple): + with pytest.raises(expected_result): + check_weights(weights=weights, n_samples=n_samples, n_features=n_features) + + return + + # otherwise, the output is compared to the expected output + ref_weights, ref_same_weights_for_all = expected_result + checked_weights, same_weights_for_all = check_weights( + weights=weights, + n_samples=n_samples, + n_features=n_features, + ) + + # Case 1: the reference weights are None + if ref_weights is None: + assert checked_weights is None + assert same_weights_for_all is ref_same_weights_for_all + + return + + # Case 2: the reference weights are an Array and the checked weights are as well + if isinstance(ref_weights, np.ndarray) and isinstance(checked_weights, np.ndarray): + assert np.array_equal(checked_weights, ref_weights) + assert same_weights_for_all is ref_same_weights_for_all + + return + + raise AssertionError( + "The weights could not be checked correctly due to a type mismatch." + ) From 85563c153efd2afce41f3faeeb32f23e2b130c3e Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 13 May 2024 22:28:13 +0200 Subject: [PATCH 076/118] refactor: went from index-based to mask-based approach for performance --- chemotools/utils/whittaker_base/auto_lambda/logml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chemotools/utils/whittaker_base/auto_lambda/logml.py b/chemotools/utils/whittaker_base/auto_lambda/logml.py index 79ffc407..4c3a1dac 100644 --- a/chemotools/utils/whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/whittaker_base/auto_lambda/logml.py @@ -64,9 +64,9 @@ def get_log_marginal_likelihood_constant_term( # first, the constant terms of the log marginal likelihood are computed starting # from the log pseudo-determinant of the weight matrix, i.e., the product of the # non-zero elements of the weight vector - nonzero_w_idxs = np.where(w > w.max() * zero_weight_tol)[0] - nnz_w = nonzero_w_idxs.size - log_pseudo_det_w = np.log(w[nonzero_w_idxs]).sum() + nonzero_w_flags = w > w.max() * zero_weight_tol + nnz_w = nonzero_w_flags.sum() + log_pseudo_det_w = np.log(w[nonzero_w_flags]).sum() # the constant term of the log marginal likelihood is computed return ( From 1e11bc72a0852db65f25dfd1fbc900e4804a9b38 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 13 May 2024 22:36:08 +0200 Subject: [PATCH 077/118] feat: added true spectrum to the test dataset for the automated Whittaker smoothing --- tests/fixtures.py | 4 +- .../spectrum_whittaker_auto_lambda.csv | 1004 ++++++++--------- 2 files changed, 504 insertions(+), 504 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 25885bed..9176d3b6 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -107,7 +107,7 @@ def spectrum_whittaker_auto_lambda() -> np.ndarray: skiprows=1, ) - return spectral_data[::, 1] + return spectral_data[::, 2] @pytest.fixture @@ -118,7 +118,7 @@ def noise_level_whittaker_auto_lambda() -> np.ndarray: skiprows=1, ) - return spectral_data[::, 2] + return spectral_data[::, 3] @pytest.fixture diff --git a/tests/resources/spectrum_whittaker_auto_lambda.csv b/tests/resources/spectrum_whittaker_auto_lambda.csv index 8be189bf..87a6d774 100644 --- a/tests/resources/spectrum_whittaker_auto_lambda.csv +++ b/tests/resources/spectrum_whittaker_auto_lambda.csv @@ -1,502 +1,502 @@ -# x,y,std --5.000000000000000000e+00,3.913954426305540135e+00,3.096822336193649078e-02 --4.980000000000000426e+00,3.913295001715116683e+00,4.157579901324756633e-02 --4.959999999999999964e+00,3.967828933711835759e+00,2.021882259976331628e-02 --4.940000000000000391e+00,3.913240339328647899e+00,3.283002638476407303e-02 --4.919999999999999929e+00,3.906660793153114319e+00,2.629869250880701587e-02 --4.900000000000000355e+00,3.977753591201872219e+00,2.303151065858748819e-02 --4.879999999999999893e+00,3.918578498160504520e+00,nan --4.860000000000000320e+00,3.909640759183042213e+00,2.308082479438558487e-02 --4.839999999999999858e+00,3.928403360537521927e+00,2.120712443290193039e-02 --4.820000000000000284e+00,3.950831318015709748e+00,2.136998567160089402e-02 --4.799999999999999822e+00,3.931835980130326647e+00,nan --4.780000000000000249e+00,3.913960091908467831e+00,3.219054411264235527e-02 --4.759999999999999787e+00,3.944102620606957821e+00,4.689772360937648904e-02 --4.740000000000000213e+00,3.927372355534060500e+00,3.710244685607267529e-02 --4.719999999999999751e+00,3.935926500645895665e+00,4.757583831925741441e-02 --4.700000000000000178e+00,3.940090812557987210e+00,2.609806421525888431e-02 --4.679999999999999716e+00,4.006116813960685086e+00,4.372331764659243697e-02 --4.660000000000000142e+00,3.981343849480365460e+00,2.459103331165375314e-02 --4.639999999999999680e+00,3.957171589280387813e+00,3.317473331736146142e-02 --4.620000000000000107e+00,3.918252069140263139e+00,2.429907714232983768e-02 --4.599999999999999645e+00,3.989915028726924273e+00,4.662424792287282738e-02 --4.580000000000000071e+00,3.984671553543385869e+00,3.256017889923769337e-02 --4.559999999999999609e+00,4.036915022948532439e+00,2.863787788929512274e-02 --4.540000000000000036e+00,4.018106961074466632e+00,4.080141249468031972e-02 --4.519999999999999574e+00,3.935916303030416064e+00,4.359886618341140413e-02 --4.500000000000000000e+00,3.933475657586883667e+00,3.361348387306223318e-02 --4.480000000000000426e+00,3.943095738054793742e+00,2.516707436675810414e-02 --4.459999999999999964e+00,4.000466770834566255e+00,3.088943627207259290e-02 --4.439999999999999503e+00,3.964649483902519478e+00,2.963575571223017041e-02 --4.419999999999999929e+00,3.984856670488913455e+00,nan --4.400000000000000355e+00,3.971856089063352968e+00,2.495601165215273129e-02 --4.379999999999999893e+00,4.030635978593705815e+00,3.734265590577109939e-02 --4.360000000000000320e+00,3.981218782347658802e+00,nan --4.339999999999999858e+00,3.968899147970850194e+00,3.269095491082592980e-02 --4.320000000000000284e+00,3.982898269224011578e+00,2.946282537533189208e-02 --4.299999999999999822e+00,3.939100003418123386e+00,2.865522681337081753e-02 --4.280000000000000249e+00,3.945635266010562692e+00,3.472014240355719705e-02 --4.259999999999999787e+00,4.000947172556999831e+00,nan --4.240000000000000213e+00,4.020343398727571405e+00,3.157915029390712114e-02 --4.219999999999999751e+00,4.063889834011682467e+00,4.463116837108123403e-02 --4.200000000000000178e+00,4.019044487495743390e+00,2.820994850940350754e-02 --4.179999999999999716e+00,3.999693957024348379e+00,2.842345038179301675e-02 --4.160000000000000142e+00,4.014216556726715623e+00,nan --4.139999999999999680e+00,3.997568023205597232e+00,3.579063885698285929e-02 --4.120000000000000107e+00,3.969380819221903778e+00,3.710054561456280209e-02 --4.099999999999999645e+00,3.973378322910303240e+00,2.674569508640989302e-02 --4.080000000000000071e+00,4.018743284686774331e+00,4.125625535047831166e-02 --4.059999999999999609e+00,4.002858211233585983e+00,3.722005134771839280e-02 --4.040000000000000036e+00,4.021668953186641637e+00,2.647815044301944643e-02 --4.019999999999999574e+00,3.963600307242701959e+00,2.404215547884083759e-02 --4.000000000000000000e+00,4.001938886864440548e+00,2.316279516014600401e-02 --3.979999999999999982e+00,3.961121319087129944e+00,3.165838742547153611e-02 --3.959999999999999964e+00,3.988298094713540465e+00,2.927789155113174474e-02 --3.939999999999999947e+00,4.015211499281781116e+00,1.965729088192920068e-02 --3.919999999999999929e+00,3.996739007072117822e+00,2.217006837373004455e-02 --3.899999999999999911e+00,4.006967280403993747e+00,3.548421177927273823e-02 --3.879999999999999893e+00,3.992420505343744708e+00,nan --3.859999999999999876e+00,3.989833605239252012e+00,1.982777448634700374e-02 --3.839999999999999858e+00,3.977133776300772361e+00,nan --3.820000000000000284e+00,3.968534106303187148e+00,4.404511893658814153e-02 --3.799999999999999822e+00,4.046166805023654334e+00,3.313448396509128396e-02 --3.780000000000000249e+00,3.981901684829826316e+00,2.802464965497018790e-02 --3.759999999999999787e+00,3.968265699229948318e+00,3.089639612993963155e-02 --3.740000000000000213e+00,3.993594708763612111e+00,2.708670552718019792e-02 --3.719999999999999751e+00,3.978060837482736645e+00,3.452291125902230001e-02 --3.700000000000000178e+00,3.999624538760653270e+00,3.961449084431505063e-02 --3.679999999999999716e+00,3.959951862232888420e+00,2.127983196111482220e-02 --3.660000000000000142e+00,3.992008806185594505e+00,3.730351277723908954e-02 --3.639999999999999680e+00,3.983111142787472581e+00,nan --3.620000000000000107e+00,3.997789721210798675e+00,4.189555115844698086e-02 --3.599999999999999645e+00,3.978584700131533758e+00,3.025928312227217989e-02 --3.580000000000000071e+00,3.999259303534389520e+00,2.857730727887747985e-02 --3.560000000000000053e+00,3.990540510474509084e+00,nan --3.540000000000000036e+00,4.006843582528653691e+00,3.607893641315891575e-02 --3.520000000000000018e+00,3.989502366165125213e+00,5.280686333954774658e-02 --3.500000000000000000e+00,3.971463221592128612e+00,3.195062668002687095e-02 --3.479999999999999982e+00,3.984236422056169680e+00,3.653447718907179564e-02 --3.459999999999999964e+00,3.965390902218237645e+00,nan --3.439999999999999947e+00,3.988691848103080417e+00,3.194360253100081426e-02 --3.419999999999999929e+00,3.915564628228665800e+00,3.335365548095119148e-02 --3.399999999999999911e+00,3.933238574213607119e+00,1.955557934578588258e-02 --3.379999999999999893e+00,3.984902392991356290e+00,3.531213606899333973e-02 --3.359999999999999876e+00,3.966998602981461008e+00,3.858838304594502944e-02 --3.339999999999999858e+00,3.980905662625323860e+00,nan --3.320000000000000284e+00,3.914229362381833166e+00,3.160429335949033886e-02 --3.299999999999999822e+00,3.994917048087540667e+00,3.132077094697865299e-02 --3.280000000000000249e+00,3.943874923203797955e+00,2.512446779038992120e-02 --3.259999999999999787e+00,3.950489243789083726e+00,2.346503295241367942e-02 --3.240000000000000213e+00,3.927196810974445729e+00,3.915093278749896127e-02 --3.219999999999999751e+00,3.947917455856836622e+00,2.476543934279373110e-02 --3.200000000000000178e+00,3.969343297431183970e+00,3.412387431505126134e-02 --3.179999999999999716e+00,3.925906099200734189e+00,2.901476964540048928e-02 --3.160000000000000142e+00,3.923668551812799166e+00,2.540509909929494770e-02 --3.139999999999999680e+00,3.947226670336628995e+00,2.993884543265358869e-02 --3.120000000000000107e+00,3.928346744753318465e+00,3.630472353955733339e-02 --3.099999999999999645e+00,3.900075964511259929e+00,4.332776229755198882e-02 --3.080000000000000071e+00,3.935772299731806978e+00,2.449157550065346814e-02 --3.060000000000000053e+00,3.915896784887003879e+00,1.992532293741383978e-02 --3.040000000000000036e+00,3.904143184923001275e+00,nan --3.020000000000000018e+00,3.897907823509206615e+00,3.955967022312247006e-02 --3.000000000000000000e+00,3.866438996497719049e+00,nan --2.979999999999999982e+00,3.903357248827355352e+00,2.678761408580850739e-02 --2.959999999999999964e+00,3.867880849871169868e+00,4.056478219242153765e-02 --2.939999999999999947e+00,3.882397409790404375e+00,3.205584351100022877e-02 --2.919999999999999929e+00,3.936961090451624568e+00,3.332833371997186156e-02 --2.899999999999999911e+00,3.866043917754864445e+00,2.340248663650760716e-02 --2.879999999999999893e+00,3.874818538198867124e+00,3.363872375973592233e-02 --2.859999999999999876e+00,3.874819534869030413e+00,3.412069939766745885e-02 --2.839999999999999858e+00,3.852214585211989117e+00,3.848297388549758169e-02 --2.819999999999999840e+00,3.853548185955963934e+00,nan --2.799999999999999822e+00,3.854754553304260423e+00,2.534333019107942730e-02 --2.779999999999999805e+00,3.841071553729384025e+00,2.160748256322369762e-02 --2.759999999999999787e+00,3.906409499587252476e+00,3.989927560475871327e-02 --2.739999999999999769e+00,3.892863359620862163e+00,4.394563528675898834e-02 --2.719999999999999751e+00,3.865947691256906626e+00,4.331492627779613691e-02 --2.699999999999999734e+00,3.824190739531758254e+00,2.394046463981249276e-02 --2.680000000000000160e+00,3.910131192202466188e+00,2.358546713714732707e-02 --2.660000000000000142e+00,3.842141175582946655e+00,3.143049748680051825e-02 --2.640000000000000124e+00,3.823047218261981239e+00,3.201948188508568077e-02 --2.620000000000000107e+00,3.872275415949148147e+00,2.823873364965024529e-02 --2.600000000000000089e+00,3.805037068723069016e+00,2.923310143617065260e-02 --2.580000000000000071e+00,3.847414491372637801e+00,4.797550196894023039e-02 --2.560000000000000053e+00,3.843464708155962750e+00,3.266237082284365739e-02 --2.540000000000000036e+00,3.798922067708248473e+00,3.133758414632323297e-02 --2.520000000000000018e+00,3.827628701786526033e+00,2.317422859332170931e-02 --2.500000000000000000e+00,3.809526646402432881e+00,3.295109809401969292e-02 --2.479999999999999982e+00,3.747985523101889260e+00,4.050258952135226548e-02 --2.459999999999999964e+00,3.806809286093004729e+00,1.923621539387884369e-02 --2.439999999999999947e+00,3.781100587500339838e+00,nan --2.419999999999999929e+00,3.749411789432546716e+00,3.866553106380841293e-02 --2.399999999999999911e+00,3.792726529826150994e+00,5.083314405848265299e-02 --2.379999999999999893e+00,3.749106715790383237e+00,1.836938560312795232e-02 --2.359999999999999876e+00,3.757735405285781649e+00,2.169879163528952090e-02 --2.339999999999999858e+00,3.752632605069845528e+00,3.800387835900527478e-02 --2.319999999999999840e+00,3.746880786136597141e+00,3.890813293409937923e-02 --2.299999999999999822e+00,3.750215780850299829e+00,5.729187485296455273e-02 --2.279999999999999805e+00,3.711741146279374792e+00,3.449917492064095981e-02 --2.259999999999999787e+00,3.630930613745085189e+00,3.742083549552085997e-02 --2.239999999999999769e+00,3.713166467526545222e+00,3.498465794338397744e-02 --2.219999999999999751e+00,3.681110389341025968e+00,3.815477170164736920e-02 --2.199999999999999734e+00,3.702545598010270389e+00,3.134983031602142645e-02 --2.180000000000000160e+00,3.715653624123065768e+00,3.811872814876098720e-02 --2.160000000000000142e+00,3.604065445077755392e+00,4.082881623022253242e-02 --2.140000000000000124e+00,3.679127871616473033e+00,2.843288863848685011e-02 --2.120000000000000107e+00,3.723207343503561972e+00,3.295457755380135079e-02 --2.100000000000000089e+00,3.593677436562992877e+00,nan --2.080000000000000071e+00,3.730259422314661233e+00,9.533727857204726819e-02 --2.060000000000000053e+00,3.652676325481325481e+00,4.814528861415776767e-02 --2.040000000000000036e+00,3.664039414557082708e+00,nan --2.020000000000000018e+00,3.605572286811380867e+00,7.161423172160229222e-02 --2.000000000000000000e+00,3.539623085803978331e+00,6.708869557030820718e-02 --1.979999999999999982e+00,3.619072317723509347e+00,5.743569593331862566e-02 --1.959999999999999964e+00,3.711185604571868524e+00,8.364311972260785044e-02 --1.939999999999999947e+00,3.719738846211896988e+00,7.184953346731948187e-02 --1.919999999999999929e+00,3.640954715256664809e+00,6.558577658908354424e-02 --1.899999999999999911e+00,3.606185742205383793e+00,1.060721402286213394e-01 --1.879999999999999893e+00,3.649108418714726465e+00,9.092475993307447268e-02 --1.859999999999999876e+00,3.709944524969695401e+00,7.155454639976135955e-02 --1.839999999999999858e+00,3.419438852172148824e+00,1.228594590938914127e-01 --1.819999999999999840e+00,3.593174533405012916e+00,1.550942001530101755e-01 --1.799999999999999822e+00,3.520638523828726107e+00,1.219140064569037563e-01 --1.779999999999999805e+00,3.608267192342823115e+00,1.205649991710633223e-01 --1.759999999999999787e+00,3.453201243804023601e+00,1.376335561212327918e-01 --1.739999999999999769e+00,3.607774685478800425e+00,1.665844544798526783e-01 --1.719999999999999751e+00,3.640026211733285333e+00,nan --1.699999999999999734e+00,3.642227320257549028e+00,1.268407969723837903e-01 --1.679999999999999716e+00,3.803890280567367999e+00,1.576114673042917080e-01 --1.660000000000000142e+00,3.735897857753192053e+00,2.214995227455958393e-01 --1.640000000000000124e+00,3.663913595669907153e+00,nan --1.620000000000000107e+00,3.641401992387191200e+00,1.551600530436650083e-01 --1.600000000000000089e+00,3.217048317236007815e+00,2.630495244546968703e-01 --1.580000000000000071e+00,3.892532742459869688e+00,nan --1.560000000000000053e+00,3.360368769070483985e+00,1.982329599456646163e-01 --1.540000000000000036e+00,3.752961584101674131e+00,2.460402241728126826e-01 --1.520000000000000018e+00,3.745930947639183550e+00,3.309654834926570310e-01 --1.500000000000000000e+00,3.798556507140971927e+00,2.244332830917858124e-01 --1.479999999999999982e+00,3.810433536595719151e+00,2.921392931771470414e-01 --1.459999999999999964e+00,3.916929666201640181e+00,2.653814591464132566e-01 --1.439999999999999947e+00,4.031456282874307462e+00,3.642239947464401273e-01 --1.419999999999999929e+00,4.756015559092563905e+00,5.166291421943192130e-01 --1.399999999999999911e+00,3.594508053563978400e+00,3.764090765633805180e-01 --1.379999999999999893e+00,3.995552010376214991e+00,4.221795343405287393e-01 --1.359999999999999876e+00,3.802382220218354369e+00,4.175557607001874061e-01 --1.339999999999999858e+00,4.654621606587832972e+00,3.786510240332576771e-01 --1.319999999999999840e+00,4.498210774344039820e+00,3.245373535245575369e-01 --1.299999999999999822e+00,4.167062990719260895e+00,5.052235057933155415e-01 --1.279999999999999805e+00,4.216951262431840597e+00,6.532837690567284694e-01 --1.259999999999999787e+00,4.559110373914525560e+00,3.273162563351011967e-01 --1.239999999999999769e+00,4.762139581965920243e+00,5.769640282217669558e-01 --1.219999999999999751e+00,3.280248253976095540e+00,5.275499350589685221e-01 --1.199999999999999734e+00,4.276145716520765028e+00,nan --1.179999999999999716e+00,4.600817916539200247e+00,6.092834368481748752e-01 --1.160000000000000142e+00,4.534854213018084224e+00,6.756216430995778399e-01 --1.140000000000000124e+00,5.024019049064923692e+00,5.385345533169113574e-01 --1.120000000000000107e+00,5.239870345111208749e+00,5.402099371398212391e-01 --1.100000000000000089e+00,5.485974253161926661e+00,4.018729221391404982e-01 --1.080000000000000071e+00,5.318244642725155735e+00,6.373361947883031675e-01 --1.060000000000000053e+00,6.760430236052386910e+00,6.124735590859445855e-01 --1.040000000000000036e+00,5.752138952060318999e+00,4.249735541418221407e-01 --1.020000000000000018e+00,4.159619060982223004e+00,8.032549364880220422e-01 --1.000000000000000000e+00,6.137115336978383517e+00,6.702421100272603072e-01 --9.799999999999995381e-01,5.872802388731122925e+00,nan --9.599999999999999645e-01,6.281236755604411748e+00,6.472217891646340204e-01 --9.399999999999995026e-01,5.565492156114431488e+00,6.959215464098150727e-01 --9.199999999999999289e-01,6.546745314566285501e+00,9.349172151308963175e-01 --9.000000000000003553e-01,4.235996624335836458e+00,nan --8.799999999999998934e-01,5.895946189351668032e+00,1.151543603478050626e+00 --8.600000000000003197e-01,8.090416456392947708e+00,9.886224714269703506e-01 --8.399999999999998579e-01,7.099654579905307017e+00,nan --8.200000000000002842e-01,7.783447599187353738e+00,1.563679351082028157e+00 --7.999999999999998224e-01,5.860696892165741190e+00,1.070756444618480741e+00 --7.800000000000002487e-01,6.197255583792041556e+00,7.878498533061101261e-01 --7.599999999999997868e-01,5.755018464355096341e+00,8.295877048549399335e-01 --7.400000000000002132e-01,7.207746759775751855e+00,4.903793932628179864e-01 --7.199999999999997513e-01,6.849506693526540069e+00,8.751365338238001890e-01 --7.000000000000001776e-01,7.063283370964628105e+00,8.139025491327271933e-01 --6.799999999999997158e-01,3.945646506068891224e+00,1.013587577515847382e+00 --6.600000000000001421e-01,6.267470566415362043e+00,7.742101094832944952e-01 --6.399999999999996803e-01,6.558919601455357906e+00,7.859065498006260198e-01 --6.200000000000001066e-01,7.077991229023441022e+00,6.918590236314428044e-01 --5.999999999999996447e-01,6.213534208408828086e+00,9.083962692379881076e-01 --5.800000000000000711e-01,5.040827980153054000e+00,9.659222627816541618e-01 --5.599999999999996092e-01,6.088292857633700095e+00,nan --5.400000000000000355e-01,4.933795191345740960e+00,6.901311785896823148e-01 --5.199999999999995737e-01,4.549334630528049317e+00,5.649737176498387248e-01 --5.000000000000000000e-01,5.385998717145486836e+00,8.874764677140921654e-01 --4.799999999999995381e-01,5.833853142262386271e+00,8.402770111488416793e-01 --4.599999999999999645e-01,5.659937411166305665e+00,8.734890865389354220e-01 --4.399999999999995026e-01,3.821608855697117058e+00,6.361529153821200433e-01 --4.199999999999999289e-01,4.956658156492856016e+00,7.052981973370683333e-01 --3.999999999999994671e-01,4.579878580113756037e+00,5.913716519320653120e-01 --3.799999999999998934e-01,5.313046851124894943e+00,6.604572801362433854e-01 --3.600000000000003197e-01,3.276085072396378806e+00,5.970258352233933641e-01 --3.399999999999998579e-01,4.945262532692082047e+00,6.546549831258263730e-01 --3.200000000000002842e-01,5.074998639686343793e+00,5.939270740467554877e-01 --2.999999999999998224e-01,5.647855412945030906e+00,5.294028271246801198e-01 --2.800000000000002487e-01,4.105665388254435832e+00,6.313947350577137074e-01 --2.599999999999997868e-01,4.450308562157782255e+00,nan --2.400000000000002132e-01,4.071521720834863522e+00,4.594469840608414457e-01 --2.199999999999997513e-01,2.963422081644037753e+00,7.669847097205949593e-01 --2.000000000000001776e-01,3.868455002797947184e+00,4.251092352493783255e-01 --1.799999999999997158e-01,3.536676735169854702e+00,3.809115333435043049e-01 --1.600000000000001421e-01,3.211077182715823319e+00,3.539182369697171926e-01 --1.399999999999996803e-01,3.157142724539423462e+00,4.179098049588486075e-01 --1.200000000000001066e-01,3.253957454666461580e+00,nan --9.999999999999964473e-02,3.548219807515344382e+00,3.531347515347905142e-01 --8.000000000000007105e-02,3.507299359211579315e+00,2.648647778152579124e-01 --5.999999999999960920e-02,2.682990980990552998e+00,4.679258280625686051e-01 --4.000000000000003553e-02,2.988681850963338160e+00,4.187317005153019522e-01 --1.999999999999957367e-02,3.143729582095237340e+00,2.296789574801562372e-01 -0.000000000000000000e+00,2.421701960787510988e+00,3.810571805066982343e-01 -2.000000000000046185e-02,2.880319783599672778e+00,2.221249274397878171e-01 -4.000000000000003553e-02,3.722343570451513095e+00,2.199211530517474755e-01 -6.000000000000049738e-02,2.857603579947825878e+00,2.448851310521519553e-01 -8.000000000000007105e-02,3.095978508429543030e+00,3.840540878242945655e-01 -1.000000000000005329e-01,2.914637263903819164e+00,1.904934648461840685e-01 -1.200000000000001066e-01,2.960170259963005979e+00,1.938639502937708481e-01 -1.399999999999996803e-01,2.949001397061277618e+00,2.123753112352188555e-01 -1.600000000000001421e-01,3.049640755278711701e+00,1.228069598001861884e-01 -1.799999999999997158e-01,2.728754984418608753e+00,nan -2.000000000000001776e-01,2.727849241201167452e+00,2.231426434949308757e-01 -2.199999999999997513e-01,2.992638787130469158e+00,2.191637740227992959e-01 -2.400000000000002132e-01,2.251918265667214758e+00,2.480313535065964692e-01 -2.599999999999997868e-01,2.889134209031075429e+00,2.459585819361506664e-01 -2.800000000000002487e-01,2.424532517989040237e+00,9.053335818983149208e-02 -2.999999999999998224e-01,2.165867195473284212e+00,1.703346508348992372e-01 -3.200000000000002842e-01,2.834687805641520786e+00,2.862061388394843697e-01 -3.399999999999998579e-01,2.506058393497093029e+00,2.187887655218112293e-01 -3.600000000000003197e-01,2.878502009412273299e+00,3.009170618786285445e-01 -3.799999999999998934e-01,2.538209946113609838e+00,2.513645475901037374e-01 -4.000000000000003553e-01,2.323923192893809109e+00,3.558336096401703008e-01 -4.199999999999999289e-01,2.335711674975006424e+00,2.440863314527701444e-01 -4.400000000000003908e-01,3.188210899075321514e+00,2.757914055844488677e-01 -4.599999999999999645e-01,2.392353676431516529e+00,2.469282079526576301e-01 -4.800000000000004263e-01,2.758761172409790241e+00,3.680833929312681274e-01 -5.000000000000000000e-01,3.169012719153432300e+00,3.436627873706666336e-01 -5.200000000000004619e-01,2.033325531486355864e+00,2.563589488507343472e-01 -5.400000000000000355e-01,2.606195281701388033e+00,2.688410546078896513e-01 -5.600000000000004974e-01,2.581526900163175764e+00,2.837953043520541341e-01 -5.800000000000000711e-01,2.467688230198862964e+00,3.711213884661984097e-01 -6.000000000000005329e-01,2.831073027789893892e+00,3.175362955311629154e-01 -6.200000000000001066e-01,3.161240012035097191e+00,1.997911486567817974e-01 -6.399999999999996803e-01,2.956347334622390743e+00,4.285265589597905822e-01 -6.600000000000001421e-01,2.527608156534642880e+00,nan -6.799999999999997158e-01,2.971776586979084644e+00,5.310266580236164824e-01 -7.000000000000001776e-01,2.877290794515752204e+00,nan -7.199999999999997513e-01,3.389214360428408845e+00,4.743984853334949348e-01 -7.400000000000002132e-01,2.522099656711926485e+00,4.929410237222000557e-01 -7.599999999999997868e-01,2.155763592687337749e+00,3.841695803658565778e-01 -7.800000000000002487e-01,2.799985290900282031e+00,5.452928167360916456e-01 -7.999999999999998224e-01,3.181999059836660315e+00,nan -8.200000000000002842e-01,3.057996485246090135e+00,nan -8.399999999999998579e-01,2.115576753796697851e+00,3.280525299425075536e-01 -8.600000000000003197e-01,2.721097204259094404e+00,5.254098256952898227e-01 -8.799999999999998934e-01,3.061150513259692563e+00,4.445749637844202407e-01 -9.000000000000003553e-01,2.669388958342580498e+00,4.072730575664818553e-01 -9.199999999999999289e-01,3.627533156198332609e+00,6.026970045505178897e-01 -9.400000000000003908e-01,2.765688681358758227e+00,5.117093613705018251e-01 -9.599999999999999645e-01,3.192183883069087624e+00,3.587640992401981976e-01 -9.800000000000004263e-01,3.968383003330220760e+00,nan -1.000000000000000000e+00,2.338013832820840854e+00,nan -1.020000000000000462e+00,3.313408994267729835e+00,6.526803408414116880e-01 -1.040000000000000036e+00,3.040682318981518861e+00,5.836565795219444963e-01 -1.060000000000000497e+00,2.779849182033996335e+00,4.256561800169474385e-01 -1.080000000000000071e+00,2.904547044921763188e+00,3.041189794730353446e-01 -1.100000000000000533e+00,2.986790168515752075e+00,3.526160436787704744e-01 -1.120000000000000107e+00,3.510619812891737190e+00,4.589691190502168672e-01 -1.140000000000000568e+00,3.610893289988389654e+00,5.683411129188775668e-01 -1.160000000000000142e+00,2.241228078826833059e+00,3.469867901057105963e-01 -1.179999999999999716e+00,3.814941693212685614e+00,3.599045368114944798e-01 -1.200000000000000178e+00,3.265716891407834943e+00,2.892595048204359420e-01 -1.219999999999999751e+00,3.081223862719026485e+00,nan -1.240000000000000213e+00,2.922621628709752617e+00,nan -1.259999999999999787e+00,2.600552915231134854e+00,5.720317054932907430e-01 -1.280000000000000249e+00,2.295526789533440581e+00,4.703156894625505902e-01 -1.299999999999999822e+00,2.134518269816672920e+00,2.073651949699803210e-01 -1.320000000000000284e+00,2.505688380794310000e+00,3.565010685590284667e-01 -1.339999999999999858e+00,2.657778015600396060e+00,2.054430250094419763e-01 -1.360000000000000320e+00,2.880342521059242600e+00,3.269488151166322631e-01 -1.379999999999999893e+00,2.744123595985678232e+00,3.190639167541055032e-01 -1.400000000000000355e+00,2.834484243824526128e+00,3.600015743496797760e-01 -1.419999999999999929e+00,3.111700384330541080e+00,3.055750774141779980e-01 -1.440000000000000391e+00,1.984596428286149017e+00,2.535374835126482096e-01 -1.459999999999999964e+00,2.184618266171591827e+00,4.125742803545879300e-01 -1.480000000000000426e+00,2.541586847516879466e+00,3.075604621153351093e-01 -1.500000000000000000e+00,1.886697152297341695e+00,3.969470656999851688e-01 -1.520000000000000462e+00,2.042829442663604667e+00,3.025771102881774643e-01 -1.540000000000000036e+00,2.260314403072414713e+00,2.367490966951061038e-01 -1.560000000000000497e+00,1.967071821134906751e+00,3.089001509687088198e-01 -1.580000000000000071e+00,2.255154594264408896e+00,2.813968051504651124e-01 -1.600000000000000533e+00,1.664795947481280347e+00,2.310721598292219126e-01 -1.620000000000000107e+00,1.898006700231108290e+00,2.653301084336139870e-01 -1.640000000000000568e+00,2.167986247869837069e+00,nan -1.660000000000000142e+00,2.052209084359961633e+00,2.480990399266391477e-01 -1.679999999999999716e+00,2.030829428195580988e+00,nan -1.700000000000000178e+00,2.079538602632515332e+00,nan -1.719999999999999751e+00,1.447949949786532686e+00,1.487681488926025641e-01 -1.740000000000000213e+00,1.984142869518430441e+00,2.126409080700615295e-01 -1.759999999999999787e+00,1.594984196307964508e+00,1.531504013947989629e-01 -1.780000000000000249e+00,1.689744286673886542e+00,nan -1.799999999999999822e+00,1.505327862233904135e+00,1.433670028956879894e-01 -1.820000000000000284e+00,1.657357557672590298e+00,1.936888522083639597e-01 -1.839999999999999858e+00,1.627728676663654062e+00,1.973138182971920507e-01 -1.860000000000000320e+00,1.562003428819664252e+00,1.075696220393412700e-01 -1.879999999999999893e+00,1.495731507212113787e+00,1.246181847495901329e-01 -1.900000000000000355e+00,1.696456855549726805e+00,1.184320222108378451e-01 -1.919999999999999929e+00,1.753621707095358806e+00,1.654974896688882480e-01 -1.940000000000000391e+00,1.510696371828758267e+00,nan -1.959999999999999964e+00,1.593205776328715517e+00,9.488123223315308996e-02 -1.980000000000000426e+00,1.371252828445131744e+00,9.083416534289066868e-02 -2.000000000000000000e+00,1.631767388510174444e+00,5.605529023766811503e-02 -2.020000000000000462e+00,1.510870281561765172e+00,9.794045190385711197e-02 -2.040000000000000036e+00,1.509917665204379755e+00,5.847087504586767626e-02 -2.060000000000000497e+00,1.457511736987583184e+00,1.026291527197704528e-01 -2.080000000000000071e+00,1.337502930906462328e+00,nan -2.100000000000000533e+00,1.428158853424737362e+00,6.186049157060879988e-02 -2.120000000000000107e+00,1.334146609616660761e+00,nan -2.140000000000000568e+00,1.389119451915253656e+00,6.753635107838724949e-02 -2.160000000000000142e+00,1.411502044605835504e+00,4.818573805837670504e-02 -2.179999999999999716e+00,1.512147721837651781e+00,4.450992438755188574e-02 -2.200000000000000178e+00,1.342215912911993270e+00,5.676913138113579699e-02 -2.219999999999999751e+00,1.533350868768530884e+00,6.970841109848661954e-02 -2.240000000000000213e+00,1.398941725956053928e+00,6.789068920224745896e-02 -2.259999999999999787e+00,1.315483430698168199e+00,4.651874555391048022e-02 -2.280000000000000249e+00,1.373201771977372143e+00,4.002537228954656384e-02 -2.299999999999999822e+00,1.285003032494459685e+00,3.701739336027300153e-02 -2.320000000000000284e+00,1.337936194998987194e+00,4.090671416312927772e-02 -2.339999999999999858e+00,1.276400380907636301e+00,5.126562293055141051e-02 -2.360000000000000320e+00,1.300022331653995611e+00,5.490064925439376575e-02 -2.379999999999999893e+00,1.333665544228300748e+00,nan -2.400000000000000355e+00,1.293747202699239374e+00,3.450761667350280004e-02 -2.419999999999999929e+00,1.244199007704499316e+00,nan -2.440000000000000391e+00,1.324761412428399510e+00,nan -2.459999999999999964e+00,1.235109059399808418e+00,2.588565954245041975e-02 -2.480000000000000426e+00,1.302786487079408806e+00,nan -2.500000000000000000e+00,1.193293907935522258e+00,4.554179185929631279e-02 -2.520000000000000462e+00,1.166485983069556198e+00,2.969047947990152009e-02 -2.540000000000000036e+00,1.227974018485518126e+00,nan -2.560000000000000497e+00,1.304406572007846821e+00,2.881781570521451882e-02 -2.580000000000000071e+00,1.118044531986680745e+00,5.065123842298295542e-02 -2.600000000000000533e+00,1.176494597509851525e+00,3.111566005871567453e-02 -2.620000000000000107e+00,1.138071003896516276e+00,1.588287971280955349e-02 -2.640000000000000568e+00,1.182284596889840778e+00,3.734954384966652358e-02 -2.660000000000000142e+00,1.142711990112235343e+00,2.222259134544606399e-02 -2.679999999999999716e+00,1.172697850945799924e+00,3.267050585695246284e-02 -2.700000000000000178e+00,1.125106052110490307e+00,2.879231061755915463e-02 -2.719999999999999751e+00,1.063066446130191745e+00,3.453316633484269926e-02 -2.740000000000000213e+00,1.091014756190950008e+00,3.593049971171793922e-02 -2.759999999999999787e+00,1.117608734824651284e+00,3.620445660684060152e-02 -2.780000000000000249e+00,1.070946674106443952e+00,2.945674602064272604e-02 -2.799999999999999822e+00,1.158073375390544557e+00,4.317271897172083456e-02 -2.820000000000000284e+00,1.068532570468847309e+00,3.608477525373053607e-02 -2.839999999999999858e+00,1.073138054167278588e+00,3.920116041946775098e-02 -2.860000000000000320e+00,1.051324237483512336e+00,3.217497612874499480e-02 -2.879999999999999893e+00,1.048394425887490877e+00,2.863637825214125979e-02 -2.900000000000000355e+00,1.043197653893642496e+00,2.303674148816653097e-02 -2.919999999999999929e+00,9.927415994114875408e-01,2.750601863091974020e-02 -2.940000000000000391e+00,1.041076924330221587e+00,2.705651838895691344e-02 -2.959999999999999964e+00,9.525405867745834199e-01,1.703397553553930369e-02 -2.980000000000000426e+00,9.813458308074753944e-01,3.980445657729349968e-02 -3.000000000000000000e+00,9.788892868312137896e-01,3.501065160424287476e-02 -3.019999999999999574e+00,9.791733570405278808e-01,2.074376790171749904e-02 -3.040000000000000924e+00,9.484615006489022226e-01,3.100365139014824370e-02 -3.060000000000000497e+00,9.351859625054994574e-01,3.481369483420125588e-02 -3.080000000000000071e+00,9.057035218946286603e-01,2.772343988678164020e-02 -3.099999999999999645e+00,8.651708341540602643e-01,3.921165500460101205e-02 -3.120000000000000995e+00,9.303704135098522787e-01,2.771220111105161463e-02 -3.140000000000000568e+00,9.193051217201706482e-01,nan -3.160000000000000142e+00,9.352467420486022531e-01,3.588952176393950949e-02 -3.179999999999999716e+00,8.813652204922443056e-01,nan -3.199999999999999289e+00,9.390011623128490248e-01,1.901648106737680380e-02 -3.220000000000000639e+00,8.600902219662046599e-01,4.474787744605149969e-02 -3.240000000000000213e+00,8.523875153073585675e-01,3.323721427792761945e-02 -3.259999999999999787e+00,8.644647000854875918e-01,2.951322030495437398e-02 -3.279999999999999361e+00,8.690682320477158829e-01,2.307464008203872000e-02 -3.300000000000000711e+00,8.079756093381198490e-01,2.212612398486654697e-02 -3.320000000000000284e+00,8.294698933376599692e-01,3.349877650881752378e-02 -3.339999999999999858e+00,7.773164878496438002e-01,2.792524629966190025e-02 -3.359999999999999432e+00,7.987031000048978591e-01,2.519442004395512635e-02 -3.380000000000000782e+00,7.915291710156696636e-01,2.941592650294502787e-02 -3.400000000000000355e+00,7.742913156876776171e-01,3.018585662763769925e-02 -3.419999999999999929e+00,8.052117292761523659e-01,3.534820799363974642e-02 -3.439999999999999503e+00,6.975738328763312346e-01,nan -3.460000000000000853e+00,7.456754448582639805e-01,5.177506811051349467e-02 -3.480000000000000426e+00,7.971989053927358571e-01,nan -3.500000000000000000e+00,7.500214809794597537e-01,2.973325203337600503e-02 -3.519999999999999574e+00,7.405704628922389343e-01,2.644933588288904402e-02 -3.540000000000000924e+00,7.010520360337462176e-01,2.751384680887634590e-02 -3.560000000000000497e+00,7.748798508620088343e-01,2.238844522514950308e-02 -3.580000000000000071e+00,7.037393802241858065e-01,3.022398023282772683e-02 -3.599999999999999645e+00,6.853447274267203682e-01,3.659059286972034947e-02 -3.620000000000000995e+00,6.404736362845638853e-01,4.159673361489631821e-02 -3.640000000000000568e+00,5.796863065161470541e-01,2.310965550118417033e-02 -3.660000000000000142e+00,6.706090484334317203e-01,2.150927154134406327e-02 -3.679999999999999716e+00,6.590807197035827292e-01,nan -3.700000000000001066e+00,6.579262974794020113e-01,3.664203532810380443e-02 -3.720000000000000639e+00,6.362810821527531413e-01,nan -3.740000000000000213e+00,6.329963281254368246e-01,3.069766501312837931e-02 -3.759999999999999787e+00,5.674844485932917237e-01,2.929212140056279556e-02 -3.779999999999999361e+00,6.020347859780290634e-01,nan -3.800000000000000711e+00,5.979345896309717912e-01,nan -3.820000000000000284e+00,5.921153801598458832e-01,2.472956858646184128e-02 -3.839999999999999858e+00,5.168250498864187525e-01,3.932557515162073830e-02 -3.859999999999999432e+00,5.804153801121019196e-01,2.396882167584396886e-02 -3.880000000000000782e+00,5.121237847876588534e-01,2.891589906210544070e-02 -3.900000000000000355e+00,5.784753349303056735e-01,3.652093289156662509e-02 -3.919999999999999929e+00,5.514372689831463781e-01,2.860219042728869357e-02 -3.939999999999999503e+00,4.913097559383847734e-01,2.105444236715212428e-02 -3.960000000000000853e+00,5.158491350566534184e-01,4.172547955572703665e-02 -3.980000000000000426e+00,5.006790115005972375e-01,nan -4.000000000000000000e+00,5.296558230138243006e-01,2.904212449707989363e-02 -4.019999999999999574e+00,5.527501560920484724e-01,3.635397445862977722e-02 -4.040000000000000924e+00,5.121606975311043808e-01,2.941529899783447422e-02 -4.060000000000000497e+00,4.620786835801224401e-01,3.957298503403856166e-02 -4.080000000000000071e+00,5.631392727992968750e-01,3.838219534881322920e-02 -4.099999999999999645e+00,5.001731340426340822e-01,2.855744716291427898e-02 -4.120000000000000995e+00,4.759932526974401945e-01,nan -4.140000000000000568e+00,4.408134938285349635e-01,2.613790641807950052e-02 -4.160000000000000142e+00,4.661151650812371816e-01,nan -4.179999999999999716e+00,4.170876717713623005e-01,3.040067984335515991e-02 -4.200000000000001066e+00,4.673591396611029847e-01,3.570914616284417281e-02 -4.220000000000000639e+00,4.763519606919671134e-01,2.539380830284991911e-02 -4.240000000000000213e+00,4.092221112259533666e-01,2.153629549044803679e-02 -4.259999999999999787e+00,3.932138318109248964e-01,2.659889927443474106e-02 -4.279999999999999361e+00,4.201976468779460294e-01,3.705805557891760083e-02 -4.300000000000000711e+00,3.600548568063514643e-01,2.534162187361805110e-02 -4.320000000000000284e+00,4.174107488585798165e-01,3.201413680890515673e-02 -4.339999999999999858e+00,4.200430502438344815e-01,3.407525487923236673e-02 -4.359999999999999432e+00,3.398977450477670414e-01,4.296195288458046213e-02 -4.380000000000000782e+00,3.616536429337198211e-01,2.686004203200916332e-02 -4.400000000000000355e+00,3.430360370501685519e-01,nan -4.419999999999999929e+00,3.413386186647102138e-01,4.121900071809197347e-02 -4.439999999999999503e+00,2.999581533858771132e-01,nan -4.460000000000000853e+00,4.195723359185571377e-01,2.022926550192588255e-02 -4.480000000000000426e+00,3.306936459388154503e-01,3.731986554227401803e-02 -4.500000000000000000e+00,3.418964395084210794e-01,4.555894651088994440e-02 -4.519999999999999574e+00,3.042222781899822093e-01,nan -4.540000000000000924e+00,2.945166790367058640e-01,3.255860667849282825e-02 -4.560000000000000497e+00,3.353764562249985648e-01,4.859846603864388659e-02 -4.580000000000000071e+00,2.817826789133563992e-01,2.650893947918774030e-02 -4.599999999999999645e+00,3.504444880940863483e-01,3.514300648032542274e-02 -4.620000000000000995e+00,2.864676182506220581e-01,3.684391263051964294e-02 -4.640000000000000568e+00,2.634071544168652967e-01,2.308952122403817517e-02 -4.660000000000000142e+00,2.388318962249805499e-01,2.924970611005074661e-02 -4.679999999999999716e+00,2.575495264893400416e-01,3.474081006829046747e-02 -4.700000000000001066e+00,2.032770265411950605e-01,4.046234879416068381e-02 -4.720000000000000639e+00,2.754515805332317657e-01,nan -4.740000000000000213e+00,2.510993908751023018e-01,3.621635024533232522e-02 -4.759999999999999787e+00,2.865785354726087530e-01,3.795087884160799202e-02 -4.779999999999999361e+00,3.109668724655918481e-01,4.300319067191369499e-02 -4.800000000000000711e+00,1.785131240197480318e-01,3.270046984938442675e-02 -4.820000000000000284e+00,2.571486778006862317e-01,2.718603598796379486e-02 -4.839999999999999858e+00,2.159567786508689025e-01,2.758167160029438916e-02 -4.859999999999999432e+00,1.759826199015482873e-01,3.354081722595050885e-02 -4.880000000000000782e+00,2.354648132723803911e-01,3.584108267447292523e-02 -4.900000000000000355e+00,2.306952276361795284e-01,3.216812284508516889e-02 -4.919999999999999929e+00,1.881273852558159876e-01,2.551945124320626965e-02 -4.939999999999999503e+00,1.949066051071076111e-01,nan -4.960000000000000853e+00,2.230855359433417673e-01,2.520107939712273176e-02 -4.980000000000000426e+00,1.271495425341684082e-01,3.617754646046948686e-02 -5.000000000000000000e+00,1.665385684665513233e-01,2.618489741967522377e-02 +# x,y_true, y_noisy,std +-5.000000000000000000e+00,3.917848549326277130e+00,3.913954426305540135e+00,3.096822336193649078e-02 +-4.980000000000000426e+00,3.921217953957871316e+00,3.913295001715116683e+00,4.157579901324756633e-02 +-4.959999999999999964e+00,3.924518194950613825e+00,3.967828933711835759e+00,2.021882259976331628e-02 +-4.940000000000000391e+00,3.927749153496185919e+00,3.913240339328647899e+00,3.283002638476407303e-02 +-4.919999999999999929e+00,3.930910713280428848e+00,3.906660793153114319e+00,2.629869250880701587e-02 +-4.900000000000000355e+00,3.934002760487532058e+00,3.977753591201872219e+00,2.303151065858748819e-02 +-4.879999999999999893e+00,3.937025183804129469e+00,3.918578498160504520e+00,nan +-4.860000000000000320e+00,3.939977874423308712e+00,3.909640759183042213e+00,2.308082479438558487e-02 +-4.839999999999999858e+00,3.942860726048525777e+00,3.928403360537521927e+00,2.120712443290193039e-02 +-4.820000000000000284e+00,3.945673634897434390e+00,3.950831318015709748e+00,2.136998567160089402e-02 +-4.799999999999999822e+00,3.948416499705618588e+00,3.931835980130326647e+00,nan +-4.780000000000000249e+00,3.951089221730241796e+00,3.913960091908467831e+00,3.219054411264235527e-02 +-4.759999999999999787e+00,3.953691704753600877e+00,3.944102620606957821e+00,4.689772360937648904e-02 +-4.740000000000000213e+00,3.956223855086586916e+00,3.927372355534060500e+00,3.710244685607267529e-02 +-4.719999999999999751e+00,3.958685581572061629e+00,3.935926500645895665e+00,4.757583831925741441e-02 +-4.700000000000000178e+00,3.961076795588137855e+00,3.940090812557987210e+00,2.609806421525888431e-02 +-4.679999999999999716e+00,3.963397411051368557e+00,4.006116813960685086e+00,4.372331764659243697e-02 +-4.660000000000000142e+00,3.965647344419848785e+00,3.981343849480365460e+00,2.459103331165375314e-02 +-4.639999999999999680e+00,3.967826514696219053e+00,3.957171589280387813e+00,3.317473331736146142e-02 +-4.620000000000000107e+00,3.969934843430585225e+00,3.918252069140263139e+00,2.429907714232983768e-02 +-4.599999999999999645e+00,3.971972254723340701e+00,3.989915028726924273e+00,4.662424792287282738e-02 +-4.580000000000000071e+00,3.973938675227898898e+00,3.984671553543385869e+00,3.256017889923769337e-02 +-4.559999999999999609e+00,3.975834034153333807e+00,4.036915022948532439e+00,2.863787788929512274e-02 +-4.540000000000000036e+00,3.977658263266929062e+00,4.018106961074466632e+00,4.080141249468031972e-02 +-4.519999999999999574e+00,3.979411296896634198e+00,3.935916303030416064e+00,4.359886618341140413e-02 +-4.500000000000000000e+00,3.981093071933426764e+00,3.933475657586883667e+00,3.361348387306223318e-02 +-4.480000000000000426e+00,3.982703527833586943e+00,3.943095738054793742e+00,2.516707436675810414e-02 +-4.459999999999999964e+00,3.984242606620877147e+00,4.000466770834566255e+00,3.088943627207259290e-02 +-4.439999999999999503e+00,3.985710252888627458e+00,3.964649483902519478e+00,2.963575571223017041e-02 +-4.419999999999999929e+00,3.987106413801730032e+00,3.984856670488913455e+00,nan +-4.400000000000000355e+00,3.988431039098542463e+00,3.971856089063352968e+00,2.495601165215273129e-02 +-4.379999999999999893e+00,3.989684081092698342e+00,4.030635978593705815e+00,3.734265590577109939e-02 +-4.360000000000000320e+00,3.990865494674820546e+00,3.981218782347658802e+00,nan +-4.339999999999999858e+00,3.991975237314147051e+00,3.968899147970850194e+00,3.269095491082592980e-02 +-4.320000000000000284e+00,3.993013269060063486e+00,3.982898269224011578e+00,2.946282537533189208e-02 +-4.299999999999999822e+00,3.993979552543539313e+00,3.939100003418123386e+00,2.865522681337081753e-02 +-4.280000000000000249e+00,3.994874052978472534e+00,3.945635266010562692e+00,3.472014240355719705e-02 +-4.259999999999999787e+00,3.995696738162945127e+00,4.000947172556999831e+00,nan +-4.240000000000000213e+00,3.996447578480378571e+00,4.020343398727571405e+00,3.157915029390712114e-02 +-4.219999999999999751e+00,3.997126546900602762e+00,4.063889834011682467e+00,4.463116837108123403e-02 +-4.200000000000000178e+00,3.997733618980828574e+00,4.019044487495743390e+00,2.820994850940350754e-02 +-4.179999999999999716e+00,3.998268772866525378e+00,3.999693957024348379e+00,2.842345038179301675e-02 +-4.160000000000000142e+00,3.998731989292211964e+00,4.014216556726715623e+00,nan +-4.139999999999999680e+00,3.999123251582146654e+00,3.997568023205597232e+00,3.579063885698285929e-02 +-4.120000000000000107e+00,3.999442545650929937e+00,3.969380819221903778e+00,3.710054561456280209e-02 +-4.099999999999999645e+00,3.999689860004008501e+00,3.973378322910303240e+00,2.674569508640989302e-02 +-4.080000000000000071e+00,3.999865185738094020e+00,4.018743284686774331e+00,4.125625535047831166e-02 +-4.059999999999999609e+00,3.999968516541477559e+00,4.002858211233585983e+00,3.722005134771839280e-02 +-4.040000000000000036e+00,3.999999848694262283e+00,4.021668953186641637e+00,2.647815044301944643e-02 +-4.019999999999999574e+00,3.999959181068493574e+00,3.963600307242701959e+00,2.404215547884083759e-02 +-4.000000000000000000e+00,3.999846515128201663e+00,4.001938886864440548e+00,2.316279516014600401e-02 +-3.979999999999999982e+00,3.999661854929348337e+00,3.961121319087129944e+00,3.165838742547153611e-02 +-3.959999999999999964e+00,3.999405207119680838e+00,3.988298094713540465e+00,2.927789155113174474e-02 +-3.939999999999999947e+00,3.999076580938492498e+00,4.015211499281781116e+00,1.965729088192920068e-02 +-3.919999999999999929e+00,3.998675988216290555e+00,3.996739007072117822e+00,2.217006837373004455e-02 +-3.899999999999999911e+00,3.998203443374369392e+00,4.006967280403993747e+00,3.548421177927273823e-02 +-3.879999999999999893e+00,3.997658963424292722e+00,3.992420505343744708e+00,nan +-3.859999999999999876e+00,3.997042567967279414e+00,3.989833605239252012e+00,1.982777448634700374e-02 +-3.839999999999999858e+00,3.996354279193499615e+00,3.977133776300772361e+00,nan +-3.820000000000000284e+00,3.995594121881275385e+00,3.968534106303187148e+00,4.404511893658814153e-02 +-3.799999999999999822e+00,3.994762123396186304e+00,4.046166805023654334e+00,3.313448396509128396e-02 +-3.780000000000000249e+00,3.993858313690090256e+00,3.981901684829826316e+00,2.802464965497018790e-02 +-3.759999999999999787e+00,3.992882725300037183e+00,3.968265699229948318e+00,3.089639612993963155e-02 +-3.740000000000000213e+00,3.991835393347104688e+00,3.993594708763612111e+00,2.708670552718019792e-02 +-3.719999999999999751e+00,3.990716355535129711e+00,3.978060837482736645e+00,3.452291125902230001e-02 +-3.700000000000000178e+00,3.989525652149353174e+00,3.999624538760653270e+00,3.961449084431505063e-02 +-3.679999999999999716e+00,3.988263326054968250e+00,3.959951862232888420e+00,2.127983196111482220e-02 +-3.660000000000000142e+00,3.986929422695578484e+00,3.992008806185594505e+00,3.730351277723908954e-02 +-3.639999999999999680e+00,3.985523990091560886e+00,3.983111142787472581e+00,nan +-3.620000000000000107e+00,3.984047078838337974e+00,3.997789721210798675e+00,4.189555115844698086e-02 +-3.599999999999999645e+00,3.982498742104557010e+00,3.978584700131533758e+00,3.025928312227217989e-02 +-3.580000000000000071e+00,3.980879035630175089e+00,3.999259303534389520e+00,2.857730727887747985e-02 +-3.560000000000000053e+00,3.979188017724452298e+00,3.990540510474509084e+00,nan +-3.540000000000000036e+00,3.977425749263855170e+00,4.006843582528653691e+00,3.607893641315891575e-02 +-3.520000000000000018e+00,3.975592293689864665e+00,3.989502366165125213e+00,5.280686333954774658e-02 +-3.500000000000000000e+00,3.973687717006691322e+00,3.971463221592128612e+00,3.195062668002687095e-02 +-3.479999999999999982e+00,3.971712087778907385e+00,3.984236422056169680e+00,3.653447718907179564e-02 +-3.459999999999999964e+00,3.969665477128972331e+00,3.965390902218237645e+00,nan +-3.439999999999999947e+00,3.967547958734682023e+00,3.988691848103080417e+00,3.194360253100081426e-02 +-3.419999999999999929e+00,3.965359608826521054e+00,3.915564628228665800e+00,3.335365548095119148e-02 +-3.399999999999999911e+00,3.963100506184926264e+00,3.933238574213607119e+00,1.955557934578588258e-02 +-3.379999999999999893e+00,3.960770732137463668e+00,3.984902392991356290e+00,3.531213606899333973e-02 +-3.359999999999999876e+00,3.958370370555916118e+00,3.966998602981461008e+00,3.858838304594502944e-02 +-3.339999999999999858e+00,3.955899507853295471e+00,3.980905662625323860e+00,nan +-3.320000000000000284e+00,3.953358232980767717e+00,3.914229362381833166e+00,3.160429335949033886e-02 +-3.299999999999999822e+00,3.950746637424507934e+00,3.994917048087540667e+00,3.132077094697865299e-02 +-3.280000000000000249e+00,3.948064815202489974e+00,3.943874923203797955e+00,2.512446779038992120e-02 +-3.259999999999999787e+00,3.945312862861222403e+00,3.950489243789083726e+00,2.346503295241367942e-02 +-3.240000000000000213e+00,3.942490879472442256e+00,3.927196810974445729e+00,3.915093278749896127e-02 +-3.219999999999999751e+00,3.939598966629797694e+00,3.947917455856836622e+00,2.476543934279373110e-02 +-3.200000000000000178e+00,3.936637228445541759e+00,3.969343297431183970e+00,3.412387431505126134e-02 +-3.179999999999999716e+00,3.933605771547286523e+00,3.925906099200734189e+00,2.901476964540048928e-02 +-3.160000000000000142e+00,3.930504705074882921e+00,3.923668551812799166e+00,2.540509909929494770e-02 +-3.139999999999999680e+00,3.927334140677494201e+00,3.947226670336628995e+00,2.993884543265358869e-02 +-3.120000000000000107e+00,3.924094192511003776e+00,3.928346744753318465e+00,3.630472353955733339e-02 +-3.099999999999999645e+00,3.920784977235900914e+00,3.900075964511259929e+00,4.332776229755198882e-02 +-3.080000000000000071e+00,3.917406614015876531e+00,3.935772299731806978e+00,2.449157550065346814e-02 +-3.060000000000000053e+00,3.913959224517423507e+00,3.915896784887003879e+00,1.992532293741383978e-02 +-3.040000000000000036e+00,3.910442932910866976e+00,3.904143184923001275e+00,nan +-3.020000000000000018e+00,3.906857865873376578e+00,3.897907823509206615e+00,3.955967022312247006e-02 +-3.000000000000000000e+00,3.903204152594716536e+00,3.866438996497719049e+00,nan +-2.979999999999999982e+00,3.899481924786757148e+00,3.903357248827355352e+00,2.678761408580850739e-02 +-2.959999999999999964e+00,3.895691316698114193e+00,3.867880849871169868e+00,4.056478219242153765e-02 +-2.939999999999999947e+00,3.891832465135722785e+00,3.882397409790404375e+00,3.205584351100022877e-02 +-2.919999999999999929e+00,3.887905509495784173e+00,3.936961090451624568e+00,3.332833371997186156e-02 +-2.899999999999999911e+00,3.883910591807264279e+00,3.866043917754864445e+00,2.340248663650760716e-02 +-2.879999999999999893e+00,3.879847856792183247e+00,3.874818538198867124e+00,3.363872375973592233e-02 +-2.859999999999999876e+00,3.875717451948193837e+00,3.874819534869030413e+00,3.412069939766745885e-02 +-2.839999999999999858e+00,3.871519527660709503e+00,3.852214585211989117e+00,3.848297388549758169e-02 +-2.819999999999999840e+00,3.867254237353940471e+00,3.853548185955963934e+00,nan +-2.799999999999999822e+00,3.862921737693025825e+00,3.854754553304260423e+00,2.534333019107942730e-02 +-2.779999999999999805e+00,3.858522188852927748e+00,3.841071553729384025e+00,2.160748256322369762e-02 +-2.759999999999999787e+00,3.854055754874210926e+00,3.906409499587252476e+00,3.989927560475871327e-02 +-2.739999999999999769e+00,3.849522604131409675e+00,3.892863359620862163e+00,4.394563528675898834e-02 +-2.719999999999999751e+00,3.844922909946673517e+00,3.865947691256906626e+00,4.331492627779613691e-02 +-2.699999999999999734e+00,3.840256851390052351e+00,3.824190739531758254e+00,2.394046463981249276e-02 +-2.680000000000000160e+00,3.835524614318547076e+00,3.910131192202466188e+00,2.358546713714732707e-02 +-2.660000000000000142e+00,3.830726392719251638e+00,3.842141175582946655e+00,3.143049748680051825e-02 +-2.640000000000000124e+00,3.825862390438136362e+00,3.823047218261981239e+00,3.201948188508568077e-02 +-2.620000000000000107e+00,3.820932823395655653e+00,3.872275415949148147e+00,2.823873364965024529e-02 +-2.600000000000000089e+00,3.815937922414227135e+00,3.805037068723069016e+00,2.923310143617065260e-02 +-2.580000000000000071e+00,3.810877936811229549e+00,3.847414491372637801e+00,4.797550196894023039e-02 +-2.560000000000000053e+00,3.805753138945383807e+00,3.843464708155962750e+00,3.266237082284365739e-02 +-2.540000000000000036e+00,3.800563829944950900e+00,3.798922067708248473e+00,3.133758414632323297e-02 +-2.520000000000000018e+00,3.795310346894044784e+00,3.827628701786526033e+00,2.317422859332170931e-02 +-2.500000000000000000e+00,3.789993071809343306e+00,3.809526646402432881e+00,3.295109809401969292e-02 +-2.479999999999999982e+00,3.784612442804545118e+00,3.747985523101889260e+00,4.050258952135226548e-02 +-2.459999999999999964e+00,3.779168967914883659e+00,3.806809286093004729e+00,1.923621539387884369e-02 +-2.439999999999999947e+00,3.773663242139748686e+00,3.781100587500339838e+00,nan +-2.419999999999999929e+00,3.768095968358509573e+00,3.749411789432546716e+00,3.866553106380841293e-02 +-2.399999999999999911e+00,3.762467982883645590e+00,3.792726529826150994e+00,5.083314405848265299e-02 +-2.379999999999999893e+00,3.756780286536163693e+00,3.749106715790383237e+00,1.836938560312795232e-02 +-2.359999999999999876e+00,3.751034082261140767e+00,3.757735405285781649e+00,2.169879163528952090e-02 +-2.339999999999999858e+00,3.745230820445019315e+00,3.752632605069845528e+00,3.800387835900527478e-02 +-2.319999999999999840e+00,3.739372253249956035e+00,3.746880786136597141e+00,3.890813293409937923e-02 +-2.299999999999999822e+00,3.733460499441795921e+00,3.750215780850299829e+00,5.729187485296455273e-02 +-2.279999999999999805e+00,3.727498121354196670e+00,3.711741146279374792e+00,3.449917492064095981e-02 +-2.259999999999999787e+00,3.721488215798009147e+00,3.630930613745085189e+00,3.742083549552085997e-02 +-2.239999999999999769e+00,3.715434520886934155e+00,3.713166467526545222e+00,3.498465794338397744e-02 +-2.219999999999999751e+00,3.709341540901138323e+00,3.681110389341025968e+00,3.815477170164736920e-02 +-2.199999999999999734e+00,3.703214691441905515e+00,3.702545598010270389e+00,3.134983031602142645e-02 +-2.180000000000000160e+00,3.697060467232880399e+00,3.715653624123065768e+00,3.811872814876098720e-02 +-2.160000000000000142e+00,3.690886634985821591e+00,3.604065445077755392e+00,4.082881623022253242e-02 +-2.140000000000000124e+00,3.684702453758294638e+00,3.679127871616473033e+00,2.843288863848685011e-02 +-2.120000000000000107e+00,3.678518925173064336e+00,3.723207343503561972e+00,3.295457755380135079e-02 +-2.100000000000000089e+00,3.672349075728456036e+00,3.593677436562992877e+00,nan +-2.080000000000000071e+00,3.666208273188952926e+00,3.730259422314661233e+00,9.533727857204726819e-02 +-2.060000000000000053e+00,3.660114578688402620e+00,3.652676325481325481e+00,4.814528861415776767e-02 +-2.040000000000000036e+00,3.654089135686744871e+00,3.664039414557082708e+00,nan +-2.020000000000000018e+00,3.648156596278114172e+00,3.605572286811380867e+00,7.161423172160229222e-02 +-2.000000000000000000e+00,3.642345584537503900e+00,3.539623085803978331e+00,6.708869557030820718e-02 +-1.979999999999999982e+00,3.636689195601250812e+00,3.619072317723509347e+00,5.743569593331862566e-02 +-1.959999999999999964e+00,3.631225527992778090e+00,3.711185604571868524e+00,8.364311972260785044e-02 +-1.939999999999999947e+00,3.625998245323204472e+00,3.719738846211896988e+00,7.184953346731948187e-02 +-1.919999999999999929e+00,3.621057161915839373e+00,3.640954715256664809e+00,6.558577658908354424e-02 +-1.899999999999999911e+00,3.616458845130710476e+00,3.606185742205383793e+00,1.060721402286213394e-01 +-1.879999999999999893e+00,3.612267225214248700e+00,3.649108418714726465e+00,9.092475993307447268e-02 +-1.859999999999999876e+00,3.608554201394059913e+00,3.709944524969695401e+00,7.155454639976135955e-02 +-1.839999999999999858e+00,3.605400230713310616e+00,3.419438852172148824e+00,1.228594590938914127e-01 +-1.819999999999999840e+00,3.602894883799263592e+00,3.593174533405012916e+00,1.550942001530101755e-01 +-1.799999999999999822e+00,3.601137349443074065e+00,3.520638523828726107e+00,1.219140064569037563e-01 +-1.779999999999999805e+00,3.600236867602554458e+00,3.608267192342823115e+00,1.205649991710633223e-01 +-1.759999999999999787e+00,3.600313068307190889e+00,3.453201243804023601e+00,1.376335561212327918e-01 +-1.739999999999999769e+00,3.601496192037469957e+00,3.607774685478800425e+00,1.665844544798526783e-01 +-1.719999999999999751e+00,3.603927165569829061e+00,3.640026211733285333e+00,nan +-1.699999999999999734e+00,3.607757506133415060e+00,3.642227320257549028e+00,1.268407969723837903e-01 +-1.679999999999999716e+00,3.613149026128606334e+00,3.803890280567367999e+00,1.576114673042917080e-01 +-1.660000000000000142e+00,3.620273310724677618e+00,3.735897857753192053e+00,2.214995227455958393e-01 +-1.640000000000000124e+00,3.629310941496409093e+00,3.663913595669907153e+00,nan +-1.620000000000000107e+00,3.640450440980700364e+00,3.641401992387191200e+00,1.551600530436650083e-01 +-1.600000000000000089e+00,3.653886915724804396e+00,3.217048317236007815e+00,2.630495244546968703e-01 +-1.580000000000000071e+00,3.669820379129825394e+00,3.892532742459869688e+00,nan +-1.560000000000000053e+00,3.688453740214230780e+00,3.360368769070483985e+00,1.982329599456646163e-01 +-1.540000000000000036e+00,3.709990450350114877e+00,3.752961584101674131e+00,2.460402241728126826e-01 +-1.520000000000000018e+00,3.734631807042549934e+00,3.745930947639183550e+00,3.309654834926570310e-01 +-1.500000000000000000e+00,3.762573921872505167e+00,3.798556507140971927e+00,2.244332830917858124e-01 +-1.479999999999999982e+00,3.794004368706074271e+00,3.810433536595719151e+00,2.921392931771470414e-01 +-1.459999999999999964e+00,3.829098538041090460e+00,3.916929666201640181e+00,2.653814591464132566e-01 +-1.439999999999999947e+00,3.868015733723898286e+00,4.031456282874307462e+00,3.642239947464401273e-01 +-1.419999999999999929e+00,3.910895058985774497e+00,4.756015559092563905e+00,5.166291421943192130e-01 +-1.399999999999999911e+00,3.957851149539173718e+00,3.594508053563978400e+00,3.764090765633805180e-01 +-1.379999999999999893e+00,4.008969822019845530e+00,3.995552010376214991e+00,4.221795343405287393e-01 +-1.359999999999999876e+00,4.064303716012481082e+00,3.802382220218354369e+00,4.175557607001874061e-01 +-1.339999999999999858e+00,4.123868016884467913e+00,4.654621606587832972e+00,3.786510240332576771e-01 +-1.319999999999999840e+00,4.187636354293537977e+00,4.498210774344039820e+00,3.245373535245575369e-01 +-1.299999999999999822e+00,4.255536977153194123e+00,4.167062990719260895e+00,5.052235057933155415e-01 +-1.279999999999999805e+00,4.327449309673636613e+00,4.216951262431840597e+00,6.532837690567284694e-01 +-1.259999999999999787e+00,4.403200994517432854e+00,4.559110373914525560e+00,3.273162563351011967e-01 +-1.239999999999999769e+00,4.482565527836990249e+00,4.762139581965920243e+00,5.769640282217669558e-01 +-1.219999999999999751e+00,4.565260586777036167e+00,3.280248253976095540e+00,5.275499350589685221e-01 +-1.199999999999999734e+00,4.650947142787573263e+00,4.276145716520765028e+00,nan +-1.179999999999999716e+00,4.739229443748227411e+00,4.600817916539200247e+00,6.092834368481748752e-01 +-1.160000000000000142e+00,4.829655934498800818e+00,4.534854213018084224e+00,6.756216430995778399e-01 +-1.140000000000000124e+00,4.921721169053698830e+00,5.024019049064923692e+00,5.385345533169113574e-01 +-1.120000000000000107e+00,5.014868748808485321e+00,5.239870345111208749e+00,5.402099371398212391e-01 +-1.100000000000000089e+00,5.108495299791377064e+00,5.485974253161926661e+00,4.018729221391404982e-01 +-1.080000000000000071e+00,5.201955478938754318e+00,5.318244642725155735e+00,6.373361947883031675e-01 +-1.060000000000000053e+00,5.294567975042502539e+00,6.760430236052386910e+00,6.124735590859445855e-01 +-1.040000000000000036e+00,5.385622445066512221e+00,5.752138952060318999e+00,4.249735541418221407e-01 +-1.020000000000000018e+00,5.474387301659097815e+00,4.159619060982223004e+00,8.032549364880220422e-01 +-1.000000000000000000e+00,5.560118243634827095e+00,6.137115336978383517e+00,6.702421100272603072e-01 +-9.799999999999995381e-01,5.642067398715594706e+00,5.872802388731122925e+00,nan +-9.599999999999999645e-01,5.719492927646578906e+00,6.281236755604411748e+00,6.472217891646340204e-01 +-9.399999999999995026e-01,5.791668921639725909e+00,5.565492156114431488e+00,6.959215464098150727e-01 +-9.199999999999999289e-01,5.857895411580281930e+00,6.546745314566285501e+00,9.349172151308963175e-01 +-9.000000000000003553e-01,5.917508298105428999e+00,4.235996624335836458e+00,nan +-8.799999999999998934e-01,5.969889006956252686e+00,5.895946189351668032e+00,1.151543603478050626e+00 +-8.600000000000003197e-01,6.014473674211297194e+00,8.090416456392947708e+00,9.886224714269703506e-01 +-8.399999999999998579e-01,6.050761671276578824e+00,7.099654579905307017e+00,nan +-8.200000000000002842e-01,6.078323289820192876e+00,7.783447599187353738e+00,1.563679351082028157e+00 +-7.999999999999998224e-01,6.096806422023442806e+00,5.860696892165741190e+00,1.070756444618480741e+00 +-7.800000000000002487e-01,6.105942091237313463e+00,6.197255583792041556e+00,7.878498533061101261e-01 +-7.599999999999997868e-01,6.105548711894689617e+00,5.755018464355096341e+00,8.295877048549399335e-01 +-7.400000000000002132e-01,6.095534984708045556e+00,7.207746759775751855e+00,4.903793932628179864e-01 +-7.199999999999997513e-01,6.075901363035685065e+00,6.849506693526540069e+00,8.751365338238001890e-01 +-7.000000000000001776e-01,6.046740057990412254e+00,7.063283370964628105e+00,8.139025491327271933e-01 +-6.799999999999997158e-01,6.008233582492343849e+00,3.945646506068891224e+00,1.013587577515847382e+00 +-6.600000000000001421e-01,5.960651867099278967e+00,6.267470566415362043e+00,7.742101094832944952e-01 +-6.399999999999996803e-01,5.904348012150392400e+00,6.558919601455357906e+00,7.859065498006260198e-01 +-6.200000000000001066e-01,5.839752770631418954e+00,7.077991229023441022e+00,6.918590236314428044e-01 +-5.999999999999996447e-01,5.767367883374772575e+00,6.213534208408828086e+00,9.083962692379881076e-01 +-5.800000000000000711e-01,5.687758412002065889e+00,5.040827980153054000e+00,9.659222627816541618e-01 +-5.599999999999996092e-01,5.601544234771209219e+00,6.088292857633700095e+00,nan +-5.400000000000000355e-01,5.509390885716415553e+00,4.933795191345740960e+00,6.901311785896823148e-01 +-5.199999999999995737e-01,5.411999927827702450e+00,4.549334630528049317e+00,5.649737176498387248e-01 +-5.000000000000000000e-01,5.310099056329699252e+00,5.385998717145486836e+00,8.874764677140921654e-01 +-4.799999999999995381e-01,5.204432128371307087e+00,5.833853142262386271e+00,8.402770111488416793e-01 +-4.599999999999999645e-01,5.095749310771889640e+00,5.659937411166305665e+00,8.734890865389354220e-01 +-4.399999999999995026e-01,4.984797528177232095e+00,3.821608855697117058e+00,6.361529153821200433e-01 +-4.199999999999999289e-01,4.872311380485815135e+00,4.956658156492856016e+00,7.052981973370683333e-01 +-3.999999999999994671e-01,4.759004681251896685e+00,4.579878580113756037e+00,5.913716519320653120e-01 +-3.799999999999998934e-01,4.645562748588058177e+00,5.313046851124894943e+00,6.604572801362433854e-01 +-3.600000000000003197e-01,4.532635557571438767e+00,3.276085072396378806e+00,5.970258352233933641e-01 +-3.399999999999998579e-01,4.420831839040548772e+00,4.945262532692082047e+00,6.546549831258263730e-01 +-3.200000000000002842e-01,4.310714184698730023e+00,5.074998639686343793e+00,5.939270740467554877e-01 +-2.999999999999998224e-01,4.202795193348162428e+00,5.647855412945030906e+00,5.294028271246801198e-01 +-2.800000000000002487e-01,4.097534668557971216e+00,4.105665388254435832e+00,6.313947350577137074e-01 +-2.599999999999997868e-01,3.995337854753744811e+00,4.450308562157782255e+00,nan +-2.400000000000002132e-01,3.896554677160165792e+00,4.071521720834863522e+00,4.594469840608414457e-01 +-2.199999999999997513e-01,3.801479931696105652e+00,2.963422081644037753e+00,7.669847097205949593e-01 +-2.000000000000001776e-01,3.710354354176940817e+00,3.868455002797947184e+00,4.251092352493783255e-01 +-1.799999999999997158e-01,3.623366484278372823e+00,3.536676735169854702e+00,3.809115333435043049e-01 +-1.600000000000001421e-01,3.540655228812251032e+00,3.211077182715823319e+00,3.539182369697171926e-01 +-1.399999999999996803e-01,3.462313021005367197e+00,3.157142724539423462e+00,4.179098049588486075e-01 +-1.200000000000001066e-01,3.388389467611172101e+00,3.253957454666461580e+00,nan +-9.999999999999964473e-02,3.318895373688555139e+00,3.548219807515344382e+00,3.531347515347905142e-01 +-8.000000000000007105e-02,3.253807035547039650e+00,3.507299359211579315e+00,2.648647778152579124e-01 +-5.999999999999960920e-02,3.193070695419219618e+00,2.682990980990552998e+00,4.679258280625686051e-01 +-4.000000000000003553e-02,3.136607056572494390e+00,2.988681850963338160e+00,4.187317005153019522e-01 +-1.999999999999957367e-02,3.084315764476654120e+00,3.143729582095237340e+00,2.296789574801562372e-01 +0.000000000000000000e+00,3.036079767953566932e+00,2.421701960787510988e+00,3.810571805066982343e-01 +2.000000000000046185e-02,2.991769483600934976e+00,2.880319783599672778e+00,2.221249274397878171e-01 +4.000000000000003553e-02,2.951246696869161745e+00,3.722343570451513095e+00,2.199211530517474755e-01 +6.000000000000049738e-02,2.914368143665852173e+00,2.857603579947825878e+00,2.448851310521519553e-01 +8.000000000000007105e-02,2.880988726986476234e+00,3.095978508429543030e+00,3.840540878242945655e-01 +1.000000000000005329e-01,2.850964333578392740e+00,2.914637263903819164e+00,1.904934648461840685e-01 +1.200000000000001066e-01,2.824154225837062882e+00,2.960170259963005979e+00,1.938639502937708481e-01 +1.399999999999996803e-01,2.800422993847327291e+00,2.949001397061277618e+00,2.123753112352188555e-01 +1.600000000000001421e-01,2.779642061600731573e+00,3.049640755278711701e+00,1.228069598001861884e-01 +1.799999999999997158e-01,2.761690749861532179e+00,2.728754984418608753e+00,nan +2.000000000000001776e-01,2.746456905873486232e+00,2.727849241201167452e+00,2.231426434949308757e-01 +2.199999999999997513e-01,2.733837117079805257e+00,2.992638787130469158e+00,2.191637740227992959e-01 +2.400000000000002132e-01,2.723736532275115696e+00,2.251918265667214758e+00,2.480313535065964692e-01 +2.599999999999997868e-01,2.716068319142801890e+00,2.889134209031075429e+00,2.459585819361506664e-01 +2.800000000000002487e-01,2.710752791984413435e+00,2.424532517989040237e+00,9.053335818983149208e-02 +2.999999999999998224e-01,2.707716247654336161e+00,2.165867195473284212e+00,1.703346508348992372e-01 +3.200000000000002842e-01,2.706889551304180852e+00,2.834687805641520786e+00,2.862061388394843697e-01 +3.399999999999998579e-01,2.708206516542300157e+00,2.506058393497093029e+00,2.187887655218112293e-01 +3.600000000000003197e-01,2.711602127038729826e+00,2.878502009412273299e+00,3.009170618786285445e-01 +3.799999999999998934e-01,2.717010648456478439e+00,2.538209946113609838e+00,2.513645475901037374e-01 +4.000000000000003553e-01,2.724363680854988701e+00,2.323923192893809109e+00,3.558336096401703008e-01 +4.199999999999999289e-01,2.733588202366449948e+00,2.335711674975006424e+00,2.440863314527701444e-01 +4.400000000000003908e-01,2.744604654955604239e+00,3.188210899075321514e+00,2.757914055844488677e-01 +4.599999999999999645e-01,2.757325122396069972e+00,2.392353676431516529e+00,2.469282079526576301e-01 +4.800000000000004263e-01,2.771651649184879052e+00,2.758761172409790241e+00,3.680833929312681274e-01 +5.000000000000000000e-01,2.787474746926924762e+00,3.169012719153432300e+00,3.436627873706666336e-01 +5.200000000000004619e-01,2.804672131714068595e+00,2.033325531486355864e+00,2.563589488507343472e-01 +5.400000000000000355e-01,2.823107732173131268e+00,2.606195281701388033e+00,2.688410546078896513e-01 +5.600000000000004974e-01,2.842631003153919700e+00,2.581526900163175764e+00,2.837953043520541341e-01 +5.800000000000000711e-01,2.863076574485301506e+00,2.467688230198862964e+00,3.711213884661984097e-01 +6.000000000000005329e-01,2.884264257883038418e+00,2.831073027789893892e+00,3.175362955311629154e-01 +6.200000000000001066e-01,2.905999428015011876e+00,3.161240012035097191e+00,1.997911486567817974e-01 +6.399999999999996803e-01,2.928073786015819024e+00,2.956347334622390743e+00,4.285265589597905822e-01 +6.600000000000001421e-01,2.950266505522154858e+00,2.527608156534642880e+00,nan +6.799999999999997158e-01,2.972345752731148316e+00,2.971776586979084644e+00,5.310266580236164824e-01 +7.000000000000001776e-01,2.994070563250425199e+00,2.877290794515752204e+00,nan +7.199999999999997513e-01,3.015193049817741500e+00,3.389214360428408845e+00,4.743984853334949348e-01 +7.400000000000002132e-01,3.035460906542356785e+00,2.522099656711926485e+00,4.929410237222000557e-01 +7.599999999999997868e-01,3.054620167391726149e+00,2.155763592687337749e+00,3.841695803658565778e-01 +7.800000000000002487e-01,3.072418169448793535e+00,2.799985290900282031e+00,5.452928167360916456e-01 +7.999999999999998224e-01,3.088606665222755066e+00,3.181999059836660315e+00,nan +8.200000000000002842e-01,3.102945023219247478e+00,3.057996485246090135e+00,nan +8.399999999999998579e-01,3.115203452249224458e+00,2.115576753796697851e+00,3.280525299425075536e-01 +8.600000000000003197e-01,3.125166182731077491e+00,2.721097204259094404e+00,5.254098256952898227e-01 +8.799999999999998934e-01,3.132634537629653160e+00,3.061150513259692563e+00,4.445749637844202407e-01 +9.000000000000003553e-01,3.137429826744533301e+00,2.669388958342580498e+00,4.072730575664818553e-01 +9.199999999999999289e-01,3.139396000823940547e+00,3.627533156198332609e+00,6.026970045505178897e-01 +9.400000000000003908e-01,3.138402006403019140e+00,2.765688681358758227e+00,5.117093613705018251e-01 +9.599999999999999645e-01,3.134343788256103824e+00,3.192183883069087624e+00,3.587640992401981976e-01 +9.800000000000004263e-01,3.127145893770689078e+00,3.968383003330220760e+00,nan +1.000000000000000000e+00,3.116762642207336675e+00,2.338013832820840854e+00,nan +1.020000000000000462e+00,3.103178831473855315e+00,3.313408994267729835e+00,6.526803408414116880e-01 +1.040000000000000036e+00,3.086409965448550174e+00,3.040682318981518861e+00,5.836565795219444963e-01 +1.060000000000000497e+00,3.066501995744913334e+00,2.779849182033996335e+00,4.256561800169474385e-01 +1.080000000000000071e+00,3.043530582812236762e+00,2.904547044921763188e+00,3.041189794730353446e-01 +1.100000000000000533e+00,3.017599892100946590e+00,2.986790168515752075e+00,3.526160436787704744e-01 +1.120000000000000107e+00,2.988840951381988020e+00,3.510619812891737190e+00,4.589691190502168672e-01 +1.140000000000000568e+00,2.957409604905899325e+00,3.610893289988389654e+00,5.683411129188775668e-01 +1.160000000000000142e+00,2.923484108655963531e+00,2.241228078826833059e+00,3.469867901057105963e-01 +1.179999999999999716e+00,2.887262418262057651e+00,3.814941693212685614e+00,3.599045368114944798e-01 +1.200000000000000178e+00,2.848959227012042739e+00,3.265716891407834943e+00,2.892595048204359420e-01 +1.219999999999999751e+00,2.808802815688143717e+00,3.081223862719026485e+00,nan +1.240000000000000213e+00,2.767031778581812596e+00,2.922621628709752617e+00,nan +1.259999999999999787e+00,2.723891690971223412e+00,2.600552915231134854e+00,5.720317054932907430e-01 +1.280000000000000249e+00,2.679631782603588697e+00,2.295526789533440581e+00,4.703156894625505902e-01 +1.299999999999999822e+00,2.634501679384664286e+00,2.134518269816672920e+00,2.073651949699803210e-01 +1.320000000000000284e+00,2.588748271661790223e+00,2.505688380794310000e+00,3.565010685590284667e-01 +1.339999999999999858e+00,2.542612762358322342e+00,2.657778015600396060e+00,2.054430250094419763e-01 +1.360000000000000320e+00,2.496327941974050724e+00,2.880342521059242600e+00,3.269488151166322631e-01 +1.379999999999999893e+00,2.450115730332577790e+00,2.744123595985678232e+00,3.190639167541055032e-01 +1.400000000000000355e+00,2.404185017174084393e+00,2.834484243824526128e+00,3.600015743496797760e-01 +1.419999999999999929e+00,2.358729825510707645e+00,3.111700384330541080e+00,3.055750774141779980e-01 +1.440000000000000391e+00,2.313927813331210892e+00,1.984596428286149017e+00,2.535374835126482096e-01 +1.459999999999999964e+00,2.269939121002487958e+00,2.184618266171591827e+00,4.125742803545879300e-01 +1.480000000000000426e+00,2.226905563791397480e+00,2.541586847516879466e+00,3.075604621153351093e-01 +1.500000000000000000e+00,2.184950161522252188e+00,1.886697152297341695e+00,3.969470656999851688e-01 +1.520000000000000462e+00,2.144176990664828697e+00,2.042829442663604667e+00,3.025771102881774643e-01 +1.540000000000000036e+00,2.104671338254583635e+00,2.260314403072414713e+00,2.367490966951061038e-01 +1.560000000000000497e+00,2.066500132085010044e+00,1.967071821134906751e+00,3.089001509687088198e-01 +1.580000000000000071e+00,2.029712617650177631e+00,2.255154594264408896e+00,2.813968051504651124e-01 +1.600000000000000533e+00,1.994341249385676251e+00,1.664795947481280347e+00,2.310721598292219126e-01 +1.620000000000000107e+00,1.960402761857115372e+00,1.898006700231108290e+00,2.653301084336139870e-01 +1.640000000000000568e+00,1.927899385643779162e+00,2.167986247869837069e+00,nan +1.660000000000000142e+00,1.896820172700660834e+00,2.052209084359961633e+00,2.480990399266391477e-01 +1.679999999999999716e+00,1.867142396870883125e+00,2.030829428195580988e+00,nan +1.700000000000000178e+00,1.838832996860484403e+00,2.079538602632515332e+00,nan +1.719999999999999751e+00,1.811850031262943528e+00,1.447949949786532686e+00,1.487681488926025641e-01 +1.740000000000000213e+00,1.786144118008170745e+00,1.984142869518430441e+00,2.126409080700615295e-01 +1.759999999999999787e+00,1.761659833782886153e+00,1.594984196307964508e+00,1.531504013947989629e-01 +1.780000000000000249e+00,1.738337052400659166e+00,1.689744286673886542e+00,nan +1.799999999999999822e+00,1.716112204670008801e+00,1.505327862233904135e+00,1.433670028956879894e-01 +1.820000000000000284e+00,1.694919445906048372e+00,1.657357557672590298e+00,1.936888522083639597e-01 +1.839999999999999858e+00,1.674691720755129953e+00,1.627728676663654062e+00,1.973138182971920507e-01 +1.860000000000000320e+00,1.655361718365914392e+00,1.562003428819664252e+00,1.075696220393412700e-01 +1.879999999999999893e+00,1.636862714072504277e+00,1.495731507212113787e+00,1.246181847495901329e-01 +1.900000000000000355e+00,1.619129296599083778e+00,1.696456855549726805e+00,1.184320222108378451e-01 +1.919999999999999929e+00,1.602097982310095814e+00,1.753621707095358806e+00,1.654974896688882480e-01 +1.940000000000000391e+00,1.585707720189084124e+00,1.510696371828758267e+00,nan +1.959999999999999964e+00,1.569900293020817461e+00,1.593205776328715517e+00,9.488123223315308996e-02 +1.980000000000000426e+00,1.554620621675399939e+00,1.371252828445131744e+00,9.083416534289066868e-02 +2.000000000000000000e+00,1.539816980460987805e+00,1.631767388510174444e+00,5.605529023766811503e-02 +2.020000000000000462e+00,1.525441132243590348e+00,1.510870281561765172e+00,9.794045190385711197e-02 +2.040000000000000036e+00,1.511448392455632117e+00,1.509917665204379755e+00,5.847087504586767626e-02 +2.060000000000000497e+00,1.497797631262067775e+00,1.457511736987583184e+00,1.026291527197704528e-01 +2.080000000000000071e+00,1.484451223059952474e+00,1.337502930906462328e+00,nan +2.100000000000000533e+00,1.471374952192380681e+00,1.428158853424737362e+00,6.186049157060879988e-02 +2.120000000000000107e+00,1.458537883298729465e+00,1.334146609616660761e+00,nan +2.140000000000000568e+00,1.445912204137405288e+00,1.389119451915253656e+00,6.753635107838724949e-02 +2.160000000000000142e+00,1.433473048039973996e+00,1.411502044605835504e+00,4.818573805837670504e-02 +2.179999999999999716e+00,1.421198302418976933e+00,1.512147721837651781e+00,4.450992438755188574e-02 +2.200000000000000178e+00,1.409068408984782028e+00,1.342215912911993270e+00,5.676913138113579699e-02 +2.219999999999999751e+00,1.397066160554620851e+00,1.533350868768530884e+00,6.970841109848661954e-02 +2.240000000000000213e+00,1.385176498580572568e+00,1.398941725956053928e+00,6.789068920224745896e-02 +2.259999999999999787e+00,1.373386314799874164e+00,1.315483430698168199e+00,4.651874555391048022e-02 +2.280000000000000249e+00,1.361684259733650704e+00,1.373201771977372143e+00,4.002537228954656384e-02 +2.299999999999999822e+00,1.350060560138784949e+00,1.285003032494459685e+00,3.701739336027300153e-02 +2.320000000000000284e+00,1.338506846958248842e+00,1.337936194998987194e+00,4.090671416312927772e-02 +2.339999999999999858e+00,1.327015994821441502e+00,1.276400380907636301e+00,5.126562293055141051e-02 +2.360000000000000320e+00,1.315581973718558784e+00,1.300022331653995611e+00,5.490064925439376575e-02 +2.379999999999999893e+00,1.304199713110817394e+00,1.333665544228300748e+00,nan +2.400000000000000355e+00,1.292864978438369006e+00,1.293747202699239374e+00,3.450761667350280004e-02 +2.419999999999999929e+00,1.281574259746148581e+00,1.244199007704499316e+00,nan +2.440000000000000391e+00,1.270324671959447871e+00,1.324761412428399510e+00,nan +2.459999999999999964e+00,1.259113866200388898e+00,1.235109059399808418e+00,2.588565954245041975e-02 +2.480000000000000426e+00,1.247939951437686767e+00,1.302786487079408806e+00,nan +2.500000000000000000e+00,1.236801425699423307e+00,1.193293907935522258e+00,4.554179185929631279e-02 +2.520000000000000462e+00,1.225697116046164714e+00,1.166485983069556198e+00,2.969047947990152009e-02 +2.540000000000000036e+00,1.214626126494402270e+00,1.227974018485518126e+00,nan +2.560000000000000497e+00,1.203587793092782743e+00,1.304406572007846821e+00,2.881781570521451882e-02 +2.580000000000000071e+00,1.192581645381629141e+00,1.118044531986680745e+00,5.065123842298295542e-02 +2.600000000000000533e+00,1.181607373505606073e+00,1.176494597509851525e+00,3.111566005871567453e-02 +2.620000000000000107e+00,1.170664800296814967e+00,1.138071003896516276e+00,1.588287971280955349e-02 +2.640000000000000568e+00,1.159753857697950474e+00,1.182284596889840778e+00,3.734954384966652358e-02 +2.660000000000000142e+00,1.148874566950204290e+00,1.142711990112235343e+00,2.222259134544606399e-02 +2.679999999999999716e+00,1.138027022026154800e+00,1.172697850945799924e+00,3.267050585695246284e-02 +2.700000000000000178e+00,1.127211375842620544e+00,1.125106052110490307e+00,2.879231061755915463e-02 +2.719999999999999751e+00,1.116427828841014547e+00,1.063066446130191745e+00,3.453316633484269926e-02 +2.740000000000000213e+00,1.105676619572390829e+00,1.091014756190950008e+00,3.593049971171793922e-02 +2.759999999999999787e+00,1.094958016970547288e+00,1.117608734824651284e+00,3.620445660684060152e-02 +2.780000000000000249e+00,1.084272314038812191e+00,1.070946674106443952e+00,2.945674602064272604e-02 +2.799999999999999822e+00,1.073619822714509731e+00,1.158073375390544557e+00,4.317271897172083456e-02 +2.820000000000000284e+00,1.063000869709371354e+00,1.068532570468847309e+00,3.608477525373053607e-02 +2.839999999999999858e+00,1.052415793154658274e+00,1.073138054167278588e+00,3.920116041946775098e-02 +2.860000000000000320e+00,1.041864939906452348e+00,1.051324237483512336e+00,3.217497612874499480e-02 +2.879999999999999893e+00,1.031348663389939579e+00,1.048394425887490877e+00,2.863637825214125979e-02 +2.900000000000000355e+00,1.020867321881581136e+00,1.043197653893642496e+00,2.303674148816653097e-02 +2.919999999999999929e+00,1.010421277145405083e+00,9.927415994114875408e-01,2.750601863091974020e-02 +2.940000000000000391e+00,1.000010893354289720e+00,1.041076924330221587e+00,2.705651838895691344e-02 +2.959999999999999964e+00,9.896365362395938003e-01,9.525405867745834199e-01,1.703397553553930369e-02 +2.980000000000000426e+00,9.792985724228823186e-01,9.813458308074753944e-01,3.980445657729349968e-02 +3.000000000000000000e+00,9.689973688922464135e-01,9.788892868312137896e-01,3.501065160424287476e-02 +3.019999999999999574e+00,9.587332925929010763e-01,9.791733570405278808e-01,2.074376790171749904e-02 +3.040000000000000924e+00,9.485067101077330198e-01,9.484615006489022226e-01,3.100365139014824370e-02 +3.060000000000000497e+00,9.383179874083366068e-01,9.351859625054994574e-01,3.481369483420125588e-02 +3.080000000000000071e+00,9.281674896610263570e-01,9.057035218946286603e-01,2.772343988678164020e-02 +3.099999999999999645e+00,9.180555810756014790e-01,8.651708341540602643e-01,3.921165500460101205e-02 +3.120000000000000995e+00,9.079826247871616296e-01,9.303704135098522787e-01,2.771220111105161463e-02 +3.140000000000000568e+00,8.979489827634143939e-01,9.193051217201706482e-01,nan +3.160000000000000142e+00,8.879550157315235781e-01,9.352467420486022531e-01,3.588952176393950949e-02 +3.179999999999999716e+00,8.780010831199275279e-01,8.813652204922443056e-01,nan +3.199999999999999289e+00,8.680875430115327918e-01,9.390011623128490248e-01,1.901648106737680380e-02 +3.220000000000000639e+00,8.582147521055579764e-01,8.600902219662046599e-01,4.474787744605149969e-02 +3.240000000000000213e+00,8.483830656859036035e-01,8.523875153073585675e-01,3.323721427792761945e-02 +3.259999999999999787e+00,8.385928375944491364e-01,8.644647000854875918e-01,2.951322030495437398e-02 +3.279999999999999361e+00,8.288444202080517131e-01,8.690682320477158829e-01,2.307464008203872000e-02 +3.300000000000000711e+00,8.191381644183171051e-01,8.079756093381198490e-01,2.212612398486654697e-02 +3.320000000000000284e+00,8.094744196134425751e-01,8.294698933376599692e-01,3.349877650881752378e-02 +3.339999999999999858e+00,7.998535336616054980e-01,7.773164878496438002e-01,2.792524629966190025e-02 +3.359999999999999432e+00,7.902758528955147188e-01,7.987031000048978591e-01,2.519442004395512635e-02 +3.380000000000000782e+00,7.807417220978202232e-01,7.915291710156696636e-01,2.941592650294502787e-02 +3.400000000000000355e+00,7.712514844871715125e-01,7.742913156876776171e-01,3.018585662763769925e-02 +3.419999999999999929e+00,7.618054817047594796e-01,8.052117292761523659e-01,3.534820799363974642e-02 +3.439999999999999503e+00,7.524040538012307655e-01,6.975738328763312346e-01,nan +3.460000000000000853e+00,7.430475392238857779e-01,7.456754448582639805e-01,5.177506811051349467e-02 +3.480000000000000426e+00,7.337362748040932026e-01,7.971989053927358571e-01,nan +3.500000000000000000e+00,7.244705957448805966e-01,7.500214809794597537e-01,2.973325203337600503e-02 +3.519999999999999574e+00,7.152508356086680896e-01,7.405704628922389343e-01,2.644933588288904402e-02 +3.540000000000000924e+00,7.060773263051204340e-01,7.010520360337462176e-01,2.751384680887634590e-02 +3.560000000000000497e+00,6.969503980791000863e-01,7.748798508620088343e-01,2.238844522514950308e-02 +3.580000000000000071e+00,6.878703794987069964e-01,7.037393802241858065e-01,3.022398023282772683e-02 +3.599999999999999645e+00,6.788375974434043281e-01,6.853447274267203682e-01,3.659059286972034947e-02 +3.620000000000000995e+00,6.698523770922192311e-01,6.404736362845638853e-01,4.159673361489631821e-02 +3.640000000000000568e+00,6.609150419120116693e-01,5.796863065161470541e-01,2.310965550118417033e-02 +3.660000000000000142e+00,6.520259136458131932e-01,6.706090484334317203e-01,2.150927154134406327e-02 +3.679999999999999716e+00,6.431853123012337692e-01,6.590807197035827292e-01,nan +3.700000000000001066e+00,6.343935561389366651e-01,6.579262974794020113e-01,3.664203532810380443e-02 +3.720000000000000639e+00,6.256509616611743985e-01,6.362810821527531413e-01,nan +3.740000000000000213e+00,6.169578436003895217e-01,6.329963281254368246e-01,3.069766501312837931e-02 +3.759999999999999787e+00,6.083145149078831304e-01,5.674844485932917237e-01,2.929212140056279556e-02 +3.779999999999999361e+00,5.997212867425509852e-01,6.020347859780290634e-01,nan +3.800000000000000711e+00,5.911784684596717021e-01,5.979345896309717912e-01,nan +3.820000000000000284e+00,5.826863675997813186e-01,5.921153801598458832e-01,2.472956858646184128e-02 +3.839999999999999858e+00,5.742452898775902703e-01,5.168250498864187525e-01,3.932557515162073830e-02 +3.859999999999999432e+00,5.658555391709854110e-01,5.804153801121019196e-01,2.396882167584396886e-02 +3.880000000000000782e+00,5.575174175100824359e-01,5.121237847876588534e-01,2.891589906210544070e-02 +3.900000000000000355e+00,5.492312250663633488e-01,5.784753349303056735e-01,3.652093289156662509e-02 +3.919999999999999929e+00,5.409972601418588933e-01,5.514372689831463781e-01,2.860219042728869357e-02 +3.939999999999999503e+00,5.328158191584188019e-01,4.913097559383847734e-01,2.105444236715212428e-02 +3.960000000000000853e+00,5.246871966470317838e-01,5.158491350566534184e-01,4.172547955572703665e-02 +3.980000000000000426e+00,5.166116852372329982e-01,5.006790115005972375e-01,nan +4.000000000000000000e+00,5.085895756465597106e-01,5.296558230138243006e-01,2.904212449707989363e-02 +4.019999999999999574e+00,5.006211566700918825e-01,5.527501560920484724e-01,3.635397445862977722e-02 +4.040000000000000924e+00,4.927067151700498249e-01,5.121606975311043808e-01,2.941529899783447422e-02 +4.060000000000000497e+00,4.848465360654752865e-01,4.620786835801224401e-01,3.957298503403856166e-02 +4.080000000000000071e+00,4.770409023219653322e-01,5.631392727992968750e-01,3.838219534881322920e-02 +4.099999999999999645e+00,4.692900949414935408e-01,5.001731340426340822e-01,2.855744716291427898e-02 +4.120000000000000995e+00,4.615943929522858813e-01,4.759932526974401945e-01,nan +4.140000000000000568e+00,4.539540733987852406e-01,4.408134938285349635e-01,2.613790641807950052e-02 +4.160000000000000142e+00,4.463694113316689638e-01,4.661151650812371816e-01,nan +4.179999999999999716e+00,4.388406797979540475e-01,4.170876717713623005e-01,3.040067984335515991e-02 +4.200000000000001066e+00,4.313681498311596751e-01,4.673591396611029847e-01,3.570914616284417281e-02 +4.220000000000000639e+00,4.239520904415605695e-01,4.763519606919671134e-01,2.539380830284991911e-02 +4.240000000000000213e+00,4.165927686064918589e-01,4.092221112259533666e-01,2.153629549044803679e-02 +4.259999999999999787e+00,4.092904492607467581e-01,3.932138318109248964e-01,2.659889927443474106e-02 +4.279999999999999361e+00,4.020453952870306491e-01,4.201976468779460294e-01,3.705805557891760083e-02 +4.300000000000000711e+00,3.948578675065055332e-01,3.600548568063514643e-01,2.534162187361805110e-02 +4.320000000000000284e+00,3.877281246693948802e-01,4.174107488585798165e-01,3.201413680890515673e-02 +4.339999999999999858e+00,3.806564234456719653e-01,4.200430502438344815e-01,3.407525487923236673e-02 +4.359999999999999432e+00,3.736430184158152645e-01,3.398977450477670414e-01,4.296195288458046213e-02 +4.380000000000000782e+00,3.666881620616491144e-01,3.616536429337198211e-01,2.686004203200916332e-02 +4.400000000000000355e+00,3.597921047572512077e-01,3.430360370501685519e-01,nan +4.419999999999999929e+00,3.529550947599409128e-01,3.413386186647102138e-01,4.121900071809197347e-02 +4.439999999999999503e+00,3.461773782013377598e-01,2.999581533858771132e-01,nan +4.460000000000000853e+00,3.394591990785065239e-01,4.195723359185571377e-01,2.022926550192588255e-02 +4.480000000000000426e+00,3.328007992451702535e-01,3.306936459388154503e-01,3.731986554227401803e-02 +4.500000000000000000e+00,3.262024184030050122e-01,3.418964395084210794e-01,4.555894651088994440e-02 +4.519999999999999574e+00,3.196642940930061183e-01,3.042222781899822093e-01,nan +4.540000000000000924e+00,3.131866616869440900e-01,2.945166790367058640e-01,3.255860667849282825e-02 +4.560000000000000497e+00,3.067697543788856507e-01,3.353764562249985648e-01,4.859846603864388659e-02 +4.580000000000000071e+00,3.004138031768017747e-01,2.817826789133563992e-01,2.650893947918774030e-02 +4.599999999999999645e+00,2.941190368942476763e-01,3.504444880940863483e-01,3.514300648032542274e-02 +4.620000000000000995e+00,2.878856821421302836e-01,2.864676182506220581e-01,3.684391263051964294e-02 +4.640000000000000568e+00,2.817139633205476557e-01,2.634071544168652967e-01,2.308952122403817517e-02 +4.660000000000000142e+00,2.756041026107118874e-01,2.388318962249805499e-01,2.924970611005074661e-02 +4.679999999999999716e+00,2.695563199669466226e-01,2.575495264893400416e-01,3.474081006829046747e-02 +4.700000000000001066e+00,2.635708331087747158e-01,2.032770265411950605e-01,4.046234879416068381e-02 +4.720000000000000639e+00,2.576478575130769499e-01,2.754515805332317657e-01,nan +4.740000000000000213e+00,2.517876064063357955e-01,2.510993908751023018e-01,3.621635024533232522e-02 +4.759999999999999787e+00,2.459902907569562203e-01,2.865785354726087530e-01,3.795087884160799202e-02 +4.779999999999999361e+00,2.402561192676773150e-01,3.109668724655918481e-01,4.300319067191369499e-02 +4.800000000000000711e+00,2.345852983680514203e-01,1.785131240197480318e-01,3.270046984938442675e-02 +4.820000000000000284e+00,2.289780322070213980e-01,2.571486778006862317e-01,2.718603598796379486e-02 +4.839999999999999858e+00,2.234345226455607403e-01,2.159567786508689025e-01,2.758167160029438916e-02 +4.859999999999999432e+00,2.179549692494162638e-01,1.759826199015482873e-01,3.354081722595050885e-02 +4.880000000000000782e+00,2.125395692819176396e-01,2.354648132723803911e-01,3.584108267447292523e-02 +4.900000000000000355e+00,2.071885176968804032e-01,2.306952276361795284e-01,3.216812284508516889e-02 +4.919999999999999929e+00,2.019020071315820175e-01,1.881273852558159876e-01,2.551945124320626965e-02 +4.939999999999999503e+00,1.966802278998327491e-01,1.949066051071076111e-01,nan +4.960000000000000853e+00,1.915233679851200410e-01,2.230855359433417673e-01,2.520107939712273176e-02 +4.980000000000000426e+00,1.864316130338459221e-01,1.271495425341684082e-01,3.617754646046948686e-02 +5.000000000000000000e+00,1.814051463486365812e-01,1.665385684665513233e-01,2.618489741967522377e-02 From de173169aa9335f953a26bd02c6a47b3d97585f6 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 19 May 2024 21:52:30 +0200 Subject: [PATCH 078/118] refactor: went for optimization scheme that can better be ported to low level languages --- .../auto_lambda/optimization.py | 91 ++++++++----------- chemotools/utils/whittaker_base/main.py | 5 +- 2 files changed, 39 insertions(+), 57 deletions(-) diff --git a/chemotools/utils/whittaker_base/auto_lambda/optimization.py b/chemotools/utils/whittaker_base/auto_lambda/optimization.py index c488b36f..1b44f71b 100644 --- a/chemotools/utils/whittaker_base/auto_lambda/optimization.py +++ b/chemotools/utils/whittaker_base/auto_lambda/optimization.py @@ -10,7 +10,7 @@ from math import ceil, exp from typing import Callable, Tuple -from scipy.optimize import OptimizeResult, brute, minimize_scalar +from scipy.optimize import minimize_scalar from chemotools.utils.models import WhittakerSmoothLambda @@ -18,77 +18,62 @@ _LN_TEN: float = 2.302585092994046 # ln(10) _half_log_decade: float = 0.5 * _LN_TEN -_X_ABS_LOG_TOL: float = 0.05 +_X_ABS_LOG_TOL: float = 0.0049 # ~0.5% when converted from log to real ### Optimization Functions ### -def finish_lambda_optimization( - fun: Callable[..., float], - xmin: float, - args: Tuple, -) -> OptimizeResult: - """ - This function is used to finish the optimization of the penalty weight lambda - after the initial optimization has been performed with the ``brute`` method. - - It spans an interval of +- half a decade around the minimum found by the brute force - method and then performs a scalar optimization with the ``minimize_scalar`` method. - - """ - - # first, the bounds for the scalar optimization are set - bounds = (xmin - _half_log_decade, xmin + _half_log_decade) - - # now, the scalar optimization is performed - return minimize_scalar( - fun=fun, - bounds=bounds, - args=args, - method="bounded", - options={"xatol": _X_ABS_LOG_TOL}, - ) - - def get_optimized_lambda( fun: Callable[..., float], lam: WhittakerSmoothLambda, args: Tuple, ) -> float: """ - This function optimizes the penalty weight lambda with the brute force method. + This function optimises the penalty weight lambda with the brute force method. + + Since the number of optimisations carried out is so little, the function uses a + custom from-scratch-implementation of a brute force search to tackle the problem + directly without too much overhead. + This will also allow for a more direct control in case this is taken to a lower + level implementation in the future. """ - # first, the number of steps is computed in a way that the step size is roughly - # half a decade - # if the bounds are at max one decade apart, the finish optimization can be run - # directly + # unless the search space spans less than 1 decade, i.e., ln(10) ~= 2.3, a grid + # search is carried out to shrink the search space for the final optimization; + # the grid is spanned with an integer number of steps of half a decade log_low_bound, log_upp_bound = lam.log_auto_bounds bound_log_diff = log_upp_bound - log_low_bound - if bound_log_diff <= _LN_TEN: - return minimize_scalar( + if bound_log_diff > _LN_TEN: + target_best = float("inf") + n_steps = 1 + ceil(bound_log_diff / _half_log_decade) + # NOTE: the following ensures that the upper bound is not exceeded + step_size = bound_log_diff / (n_steps - 1) + + # all the trial values are evaluated and the best one is stored + for trial in range(0, n_steps): + log_lam_curr = log_low_bound + trial * step_size + target_curr = fun(log_lam_curr, *args) + + if target_curr < target_best: + log_lam_best = log_lam_curr + target_best = target_curr + + # then, the bounds for the final optimization are shrunk to plus/minus half + # a decade around the best trial value + # NOTE: the following ensures that the bounds are not violated + log_low_bound = max(log_lam_best - _half_log_decade, log_low_bound) + log_upp_bound = min(log_lam_best + _half_log_decade, log_upp_bound) + + # finally, a scalar optimization is performed + # NOTE: since the optimization is carried out over the log of lambda, the + # exponential of the result is returned + return exp( + minimize_scalar( fun=fun, bounds=(log_low_bound, log_upp_bound), args=args, method="bounded", options={"xatol": _X_ABS_LOG_TOL}, ).x - - # otherwise, the number of steps is computed ... - n_steps = 1 + ceil(bound_log_diff / _half_log_decade) - - # ...and the brute force optimization with final polish is performed - # NOTE: ``brute`` can work with floats internally and this is exploited here - # NOTE: since the optimization is carried out over the log of lambda, the - # exponential of the result is returned - return exp( - brute( # type: ignore - func=fun, - ranges=(lam.log_auto_bounds,), - Ns=n_steps, - args=args, - finish=finish_lambda_optimization, - full_output=False, - ) ) diff --git a/chemotools/utils/whittaker_base/main.py b/chemotools/utils/whittaker_base/main.py index 0c0f34b1..eb205bdc 100644 --- a/chemotools/utils/whittaker_base/main.py +++ b/chemotools/utils/whittaker_base/main.py @@ -206,7 +206,7 @@ def _solve( def _marginal_likelihood_objective( self, - log_lam: Union[np.ndarray, float], + log_lam: float, b: np.ndarray, w: Union[float, np.ndarray], w_plus_penalty_plus_n_samples_term: float, @@ -221,9 +221,6 @@ def _marginal_likelihood_objective( # first, the linear system of equations is solved with the given penalty weight # lambda - if isinstance(log_lam, np.ndarray): - log_lam = log_lam[0] - lam = exp(log_lam) # Case 1: no weights are provided From 435d6b8bf213a09918f4caf705c745c835825ab9 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 19 May 2024 21:53:06 +0200 Subject: [PATCH 079/118] refactor: replaced subtraction by not-equals-comparison --- chemotools/utils/banded_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index f72d6bb8..c2b51b8e 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -395,7 +395,7 @@ def slogdet_lu_banded( step=1, dtype=lub_factorization.ipiv.dtype, ) - num_row_exchanges = np.count_nonzero(lub_factorization.ipiv - unchanged_row_idxs) + num_row_exchanges = np.count_nonzero(lub_factorization.ipiv != unchanged_row_idxs) # the sign-prefactor of the determinant is either +1 or -1 depending on whether the # number of row exchanges is even or odd From 160d3c39f02f5ac640a8bd6ee93a08c51dc98ddd Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 19 May 2024 23:45:08 +0200 Subject: [PATCH 080/118] tests/refactor: split up `utils` in functions and models --- tests/test_for_utils/test_banded_linalg.py | 2 +- .../test_for_utils/test_finite_differences.py | 2 +- tests/test_for_utils/test_models.py | 2 +- .../{utils.py => utils_funcs.py} | 523 ++++++++++++++++-- tests/test_for_utils/utils_models.py | 60 ++ 5 files changed, 542 insertions(+), 47 deletions(-) rename tests/test_for_utils/{utils.py => utils_funcs.py} (58%) create mode 100644 tests/test_for_utils/utils_models.py diff --git a/tests/test_for_utils/test_banded_linalg.py b/tests/test_for_utils/test_banded_linalg.py index 7fe84ed2..d3ece0ab 100644 --- a/tests/test_for_utils/test_banded_linalg.py +++ b/tests/test_for_utils/test_banded_linalg.py @@ -16,7 +16,7 @@ lu_solve_banded, slogdet_lu_banded, ) -from tests.test_for_utils.utils import get_banded_slogdet +from tests.test_for_utils.utils_funcs import get_banded_slogdet ### Test Suite ### diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index 4d74886b..43e34628 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -16,7 +16,7 @@ gen_squ_fw_fin_diff_mat_cho_banded, ) from tests.fixtures import reference_finite_differences # noqa: F401 -from tests.test_for_utils.utils import ( +from tests.test_for_utils.utils_funcs import ( conv_upper_cho_banded_storage_to_sparse, multiply_vect_with_squ_fw_fin_diff_orig_first, multiply_vect_with_squ_fw_fin_diff_transpose_first, diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index c373623f..28b4d232 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -11,7 +11,7 @@ import pytest from chemotools.utils import models -from tests.test_for_utils.utils import ExpectedWhittakerSmoothLambda +from tests.test_for_utils.utils_models import ExpectedWhittakerSmoothLambda ### Type aliases ### diff --git a/tests/test_for_utils/utils.py b/tests/test_for_utils/utils_funcs.py similarity index 58% rename from tests/test_for_utils/utils.py rename to tests/test_for_utils/utils_funcs.py index 10ea645a..0e91bada 100644 --- a/tests/test_for_utils/utils.py +++ b/tests/test_for_utils/utils_funcs.py @@ -10,16 +10,18 @@ ### Imports ### -from dataclasses import dataclass -from math import isnan -from typing import Tuple +from math import exp, isnan +from typing import Tuple, Union import numpy as np from scipy.linalg import eigvals_banded -from scipy.sparse import csr_matrix +from scipy.optimize import brute, minimize_scalar +from scipy.sparse import csc_matrix, csr_matrix from scipy.sparse import diags as sp_diags +from scipy.sparse import linalg as spla -from chemotools.utils import models +from chemotools.utils.finite_differences import calc_forward_diff_kernel +from chemotools.utils.whittaker_base import WhittakerLikeSolver ### Utility Functions ### @@ -32,7 +34,7 @@ def float_is_bit_equal(value: float, reference: float) -> bool: Doctests -------- >>> # Imports - >>> from tests.test_for_utils.utils import float_is_bit_equal + >>> from tests.test_for_utils.utils_funcs import float_is_bit_equal >>> # Test 1 >>> float_is_bit_equal(value=1.0, reference=1.0) @@ -76,7 +78,9 @@ def conv_upper_cho_banded_storage_to_sparse( >>> # Imports >>> import numpy as np >>> from numpy import nan - >>> from tests.test_for_utils.utils import conv_upper_cho_banded_storage_to_sparse + >>> from tests.test_for_utils.utils_funcs import ( + ... conv_upper_cho_banded_storage_to_sparse, + ... ) >>> # Generating a set of test matrices >>> # Matrix 1 @@ -216,7 +220,9 @@ def conv_lu_banded_storage_to_sparse( >>> # Imports >>> import numpy as np >>> from numpy import nan - >>> from tests.test_for_utils.utils import conv_lu_banded_storage_to_sparse + >>> from tests.test_for_utils.utils_funcs import ( + ... conv_lu_banded_storage_to_sparse, + ... ) >>> # Generating a set of test matrices >>> # Matrix 1 @@ -373,7 +379,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( -------- >>> # Imports >>> import numpy as np - >>> from tests.test_for_utils.utils import ( + >>> from tests.test_for_utils.utils_funcs import ( ... multiply_vect_with_squ_fw_fin_diff_orig_first, ... ) @@ -513,7 +519,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( -------- >>> # Imports >>> import numpy as np - >>> from tests.test_for_utils.utils import ( + >>> from tests.test_for_utils.utils_funcs import ( ... multiply_vect_with_squ_fw_fin_diff_transpose_first, ... ) @@ -642,7 +648,7 @@ def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: -------- >>> # Imports >>> import numpy as np - >>> from tests.test_for_utils.utils import ( + >>> from tests.test_for_utils.utils_funcs import ( ... conv_upper_cho_banded_storage_to_sparse, ... get_banded_slogdet, ... ) @@ -755,57 +761,486 @@ def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: return sign, logabsdet -### Dataclasses ### +def get_dense_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: + """ + Creates a dense forward finite difference matrix ``D`` of a given difference order. + + Doctests + -------- + >>> # Imports + >>> from tests.test_for_utils.utils_funcs import get_dense_fw_fin_diff_mat + + >>> # Matrix 1 + >>> n_data, differences = 5, 1 + >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + array([[-1., 1., 0., 0., 0.], + [ 0., -1., 1., 0., 0.], + [ 0., 0., -1., 1., 0.], + [ 0., 0., 0., -1., 1.]]) + + >>> # Matrix 2 + >>> n_data, differences = 10, 1 + >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + array([[-1., 1., 0., 0., 0., 0., 0., 0., 0., 0.], + [ 0., -1., 1., 0., 0., 0., 0., 0., 0., 0.], + [ 0., 0., -1., 1., 0., 0., 0., 0., 0., 0.], + [ 0., 0., 0., -1., 1., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 0., -1., 1., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 0., -1., 1., 0., 0., 0.], + [ 0., 0., 0., 0., 0., 0., -1., 1., 0., 0.], + [ 0., 0., 0., 0., 0., 0., 0., -1., 1., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 0., -1., 1.]]) + + >>> # Matrix 3 + >>> n_data, differences = 5, 2 + >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + array([[ 1., -2., 1., 0., 0.], + [ 0., 1., -2., 1., 0.], + [ 0., 0., 1., -2., 1.]]) + + >>> # Matrix 4 + >>> n_data, differences = 10, 2 + >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + array([[ 1., -2., 1., 0., 0., 0., 0., 0., 0., 0.], + [ 0., 1., -2., 1., 0., 0., 0., 0., 0., 0.], + [ 0., 0., 1., -2., 1., 0., 0., 0., 0., 0.], + [ 0., 0., 0., 1., -2., 1., 0., 0., 0., 0.], + [ 0., 0., 0., 0., 1., -2., 1., 0., 0., 0.], + [ 0., 0., 0., 0., 0., 1., -2., 1., 0., 0.], + [ 0., 0., 0., 0., 0., 0., 1., -2., 1., 0.], + [ 0., 0., 0., 0., 0., 0., 0., 1., -2., 1.]]) + + >>> # Matrix 4 + >>> n_data, differences = 5, 3 + >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + array([[-1., 3., -3., 1., 0.], + [ 0., -1., 3., -3., 1.]]) + + >>> # Matrix 5 + >>> n_data, differences = 10, 3 + >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + array([[-1., 3., -3., 1., 0., 0., 0., 0., 0., 0.], + [ 0., -1., 3., -3., 1., 0., 0., 0., 0., 0.], + [ 0., 0., -1., 3., -3., 1., 0., 0., 0., 0.], + [ 0., 0., 0., -1., 3., -3., 1., 0., 0., 0.], + [ 0., 0., 0., 0., -1., 3., -3., 1., 0., 0.], + [ 0., 0., 0., 0., 0., -1., 3., -3., 1., 0.], + [ 0., 0., 0., 0., 0., 0., -1., 3., -3., 1.]]) + + """ + + # first, the required constants are obtained from the ``WhittakerLikeSolver``-class + dtype = WhittakerLikeSolver._WhittakerLikeSolver__dtype # type: ignore + # then, the dense finite difference matrix D is created from the forward difference + # kernel + diff_kernel = calc_forward_diff_kernel(differences=differences) + offsets = np.arange(start=0, stop=diff_kernel.size, step=1, dtype=np.int64) + return sp_diags( + diagonals=diff_kernel, + offsets=offsets, # type: ignore + shape=(n_data - diff_kernel.size + 1, n_data), + dtype=dtype, + format="csc", + ) -@dataclass -class ExpectedWhittakerSmoothLambda: + +def sparse_slogdet_from_superlu(splu: spla.SuperLU) -> Tuple[float, float]: """ - Dataclass for checking the expected results for the class :class:`WhittakerSmoothLambda` - from the module :mod:`chemotools.utils.models`. + Computes the sign and the logarithm of the determinant of a sparse matrix from its + SuperLU decomposition. + + References + ---------- + This function is based on the following GIST and its discussion: + https://gist.github.com/luizfelippesr/5965a536d202b913beda9878a2f8ef3e + + Doctests + -------- + >>> # Imports + >>> import numpy as np + >>> import scipy.sparse as sprs + + >>> from tests.test_for_utils.utils_funcs import ( + ... sparse_slogdet_from_superlu, + ... ) + + >>> # Setup of a test with random matrices + >>> np.random.seed(42) + >>> n_rows = np.random.randint(low=10, high=1_001, size=20) + >>> density = 0.5 # chosen to have a high probability of a solvable system + >>> n_rows + array([112, 445, 870, 280, 116, 81, 710, 30, 624, 131, 476, 224, 340, + 468, 97, 382, 109, 881, 673, 140]) + + >>> # Running the tests in a loop + >>> for m in n_rows: + ... iter_i = 0 + ... attempts = 10 + ... failed = False + ... while iter_i < 10: + ... # a random matrix is generated and if the LU decomposition fails, the + ... # test is repeated (this test is not there to test the LU decomposition) + ... attempts += 1 + ... matrix = sprs.random(m=m, n=m, density=density, format="csc") + ... try: + ... splu = sprs.linalg.splu(matrix) + ... except RuntimeError: + ... continue + ... + ... # first, the utility function is used to compute the sign and the log + ... # determinant of the matrix + ... sign, logabsdet = sparse_slogdet_from_superlu(splu=splu) + ... + ... # then, the sign and the log determinant are computed by NumPy's dense + ... # log determinant function for comparison + ... sign_ref, logabsdet_ref = np.linalg.slogdet(matrix.toarray()) + ... + ... # the results are compared and if they differ, the test is stopped + ... # with a diagnostic message + ... if not ( + ... np.isclose(sign, sign_ref) + ... and np.isclose(logabsdet, logabsdet_ref) + ... ): + ... print( + ... f"Failed for matrix with shape {m}x{m}: " + ... f"sign: {sign} vs. {sign_ref} and " + ... f"logabsdet: {logabsdet} vs. {logabsdet_ref}" + ... ) + ... failed = True + ... break + ... + ... # if the test is successful, the loop is continued if the number of + ... # attempts is less than 100 + ... del splu + ... iter_i += 1 + ... if attempts >= 100: + ... print( + ... f"Could not generate a solvable system for matrix with shape " + ... f"{m}x{m}" + ... ) + ... + ... if failed: + ... break + + """ + + ### Auxiliary Function ### + + def find_min_num_swaps(arr: np.ndarray): + """ + Minimum number of swaps needed to order a permutation array. + + """ + # from https://www.thepoorcoder.com/hackerrank-minimum-swaps-2-solution/ + a = dict(enumerate(arr)) + b = {v: k for k, v in a.items()} + count = 0 + for i in a: + x = a[i] + if x != i: + y = b[i] + a[y] = x + b[x] = y + count += 1 + + return count + + ### Main Part ### + + # the logarithm of the determinant is the sum of the logarithms of the diagonal + # elements of the LU decomposition, but since L is unit lower triangular, only the + # diagonal elements of U are considered + diagU = splu.U.diagonal() + logabsdet = np.log(np.abs(diagU)).sum() + + # then, the sign is determined from the diagonal elements of U as well as the row + # and column permutations + # NOTE: odd number of negative elements/swaps leads to a negative sign + fact_sign = -1 if np.count_nonzero(diagU < 0.0) % 2 == 1 else 1 + row_sign = -1 if find_min_num_swaps(splu.perm_r) % 2 == 1 else 1 + col_sign = -1 if find_min_num_swaps(splu.perm_c) % 2 == 1 else 1 + sign = -1.0 if fact_sign * row_sign * col_sign < 0 else 1.0 + + return sign, logabsdet + + +def calc_whittaker_smooth_log_marginal_likelihood_const_term( + differences: int, + diff_mat: csr_matrix, + weight_vect: np.ndarray, +) -> float: + """ + Calculates the constant term of the log marginal likelihood of a Whittaker smoother + with a given set of parameters. + + It is given by + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(pseudo_det(D.T @ D))`` + + or better + + ``(n^ - d) * ln(2 * pi) - ln(pseudo_det(W)) - ln(det(D @ D.T))`` + + For further details, please see the documentation of the function :func:`get_log_marginal_likelihood_constant_term` + from the module :mod:`chemotools.utils.whittaker_base.logml`. + + Doctest + ------- + >>> # Imports + >>> import numpy as np + >>> from tests.test_for_utils.utils_funcs import ( + ... calc_whittaker_smooth_log_marginal_likelihood_const_term, + ... get_dense_fw_fin_diff_mat, + ... ) + + >>> # Generation of the weight matrix W and the finite difference matrix D + >>> weights = np.array([0.5, 1.0, 0.5, 1.0, 0.5]) + >>> n_data, differences = weights.size, 1 + >>> diff_mat = get_dense_fw_fin_diff_mat( + ... n_data=n_data, + ... differences=differences, + ... ) + >>> diff_mat_dense = diff_mat.toarray() + + >>> # Test 1 with all weights being non-zero + + >>> # Calculation of the log pseudo-determinant of the weight matrix W + >>> # since it is diagonal, the log-determinant is the sum of the logarithms of the + >>> # diagonal elements + >>> log_pseudo_det_w = np.log(weights).sum() + >>> log_pseudo_det_w + -2.0794415416798357 + + >>> # Calculation of the log pseudo-determinant via the Cholesky decomposition of + >>> # the product D @ D.T + >>> squ_diff_mat_chol = np.linalg.cholesky(diff_mat_dense @ diff_mat_dense.T) + >>> squ_diff_mat_chol + array([[ 1.41421356, 0. , 0. , 0. ], + [-0.70710678, 1.22474487, 0. , 0. ], + [ 0. , -0.81649658, 1.15470054, 0. ], + [ 0. , 0. , -0.8660254 , 1.11803399]]) + >>> # the sum of the doubled logarithms of the main diagonal elements is the log + >>> # pseudo-determinant of the matrix D.T @ D + >>> log_pseudo_det_dtd = 2.0 * np.log(np.diag(squ_diff_mat_chol)).sum() + >>> log_pseudo_det_dtd + 1.6094379124341003 + + >>> # Calculation of the theoretical constant term + >>> logml_theoretical = ( + ... (n_data - differences) * np.log(2.0 * np.pi) + ... - log_pseudo_det_w + ... - log_pseudo_det_dtd + ... ) + + >>> # Calculation of the constant term via the utility function + >>> logml_via_function = calc_whittaker_smooth_log_marginal_likelihood_const_term( + ... differences=differences, + ... diff_mat=diff_mat, + ... weight_vect=weights, + ... ) + >>> logml_via_function + 7.821511894883117 + >>> np.isclose(logml_via_function, logml_theoretical) + True + + >>> # Test 2 with 2 weights being zero + >>> weights[1] = 0.0 + >>> weights[3] = 0.0 + >>> nonzero_weights_flags = weights > 0.0 + >>> log_pseudo_det_w = np.log(weights[nonzero_weights_flags]).sum() + + >>> # Calculation of the theoretical constant term + >>> logml_theoretical = ( + ... (nonzero_weights_flags.sum() - differences) * np.log(2.0 * np.pi) + ... - log_pseudo_det_w + ... - log_pseudo_det_dtd + ... ) + + >>> # Calculation of the constant term via the utility function + >>> logml_via_function = calc_whittaker_smooth_log_marginal_likelihood_const_term( + ... differences=differences, + ... diff_mat=diff_mat, + ... weight_vect=weights, + ... ) + >>> logml_via_function + 4.145757762064426 + >>> np.isclose(logml_via_function, logml_theoretical) + True """ # noqa: E501 - fixed_lambda: float - auto_bounds: Tuple[float, float] - fit_auto: bool - method_used: models.WhittakerSmoothMethods - log_auto_bounds: Tuple[float, float] = (0.0, 0.0) + ### Pre-computation of the constant term ### + + # first, the required constants are obtained from the ``WhittakerLikeSolver``-class + zero_weight_tol = WhittakerLikeSolver._WhittakerLikeSolver__zero_weight_tol # type: ignore + + # for W, the log pseudo-determinant is calculated ... + w_nonzero_idxs = weight_vect > weight_vect.max() * zero_weight_tol + nnz_w = w_nonzero_idxs.sum() + w_log_pseudo_det = np.log(weight_vect[w_nonzero_idxs]).sum() + + # ... followed by the log pseudo-determinant of the penalty matrix D.T @ D which is + # equivalent to the determinant of the flipped matrix D @ D.T which is not + # rank-deficient + _, penalty_log_pseudo_det = sparse_slogdet_from_superlu( + splu=spla.splu(A=diff_mat @ diff_mat.T) + ) + + # from all of this, the constant term is computed + return ( + (nnz_w - differences) * np.log(2.0 * np.pi) + - w_log_pseudo_det + - penalty_log_pseudo_det + ) + + +def find_whittaker_smooth_opt_lambda_log_marginal_likelihood( + b_vect: np.ndarray, + weight_vect: np.ndarray, + differences: int, + log_lambda_bounds: Tuple[float, float], + n_opts: int, +) -> Tuple[float, float, np.ndarray]: + """ + Finds the optimal lambda value for a Whittaker smoother by maximising the log + marginal likelihood via a nested brute-force optimisation followed by a bounded + scalar minimisation. + + Since it relies purely on dense linear algebra for highly sparse matrices, this + utility function is only suitable for small to medium-sized datasets (n < 500 ... + 1000). - def assert_is_equal_to(self, other: models.WhittakerSmoothLambda) -> None: + """ + + ### Definition of the target function ### + + def get_smooth_solution( + log_lam: Union[np.ndarray, float] + ) -> Tuple[np.ndarray, np.ndarray, float, float]: """ - Checks if the current instance is equal to another instance of the same class. + Computes the smooth solution for the Whittaker smoother. """ - assert other.fit_auto is self.fit_auto - assert other.method_used == self.method_used - # NOTE: since NAN-values are used, the comparison is split into two parts for - # the fixed lambda value and each of the bounds - assert float_is_bit_equal( - value=other.fixed_lambda, - reference=self.fixed_lambda, - ) - assert float_is_bit_equal( - value=other.auto_bounds[0], reference=self.auto_bounds[0] - ) - assert float_is_bit_equal( - value=other.auto_bounds[1], - reference=self.auto_bounds[1], - ) - assert float_is_bit_equal( - value=other.log_auto_bounds[0], - reference=self.log_auto_bounds[0], + # first, the linear system (left hand side) has to be set up for calculating the + # smooth solution + if isinstance(log_lam, np.ndarray): + log_lam = log_lam[0] + + lam = exp(log_lam) + + lhs_matrix = lam * penalty_mat_dense + np.fill_diagonal(a=lhs_matrix, val=np.diag(lhs_matrix) + weight_vect) + + # then, the solution is obtained + smooth_solution = np.linalg.solve(lhs_matrix, weight_vect * b_vect) + + return ( + smooth_solution, + lhs_matrix, + lam, + log_lam, # type: ignore ) - assert float_is_bit_equal( - value=other.log_auto_bounds[1], - reference=self.log_auto_bounds[1], + + def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: + """ + The target function to minimize for maximizing the log marginal likelihood. + + """ + + # first, the smooth solution is calculated together with the left-hand side + # matrix and the lambda value + smooth_solution, lhs_matrix, lam, log_lam = get_smooth_solution(log_lam=log_lam) + + # the log-determinant of the lhs matrix is calculated + _, logdet_lhs = np.linalg.slogdet(lhs_matrix) + + # finally, the log marginal likelihood is computed from: + # 1) the weighted residual sum of squares + wrss = (weight_vect * np.square(b_vect - smooth_solution)).sum() + + # 2) the sum of squared penalties + # NOTE: the order of multiplications for the following term is important because + # the last multiplication is a matrix-vector resulting in another vector; + # the other way around would result in another matrix followed by + # a matrix-vector multiplication + pss = lam * (smooth_solution @ (penalty_mat_dense @ smooth_solution)) + + # 3) the log-determinant of the lhs matrix and the constant term + # NOTE: the sign is positive because the log marginal likelihood is maximised + # and not minimised + return 0.5 * ( + wrss + + pss + - (b_vect.size - differences) * log_lam + + logdet_lhs + + logml_constant_term ) + ### Pre-computations ### + + # then, some pre-computations are made + n_data = b_vect.size + log_lambda_min, log_lambda_max = log_lambda_bounds + diff_mat_dense = get_dense_fw_fin_diff_mat( + n_data=n_data, + differences=differences, + ) + penalty_mat_dense = diff_mat_dense.transpose() @ diff_mat_dense + logml_constant_term = calc_whittaker_smooth_log_marginal_likelihood_const_term( + differences=differences, + diff_mat_dense=diff_mat_dense, + weight_vect=weight_vect, + ) + + ### Running the optimisation ### + + # the first optimisation is run with the target function to narrow down the + # search space + opt_log_lam = brute( + func=logml_target_func, + ranges=((log_lambda_min, log_lambda_max),), + Ns=n_opts, + finish=None, + full_output=False, + ) + + # the search space is narrowed down for the second optimisation to roughly one + # decade in the natural log space + log_lambda_min = opt_log_lam - 1.2 # type: ignore + log_lambda_max = opt_log_lam + 1.2 # type: ignore + + # the second optimisation is run with the target function to find the optimal lambda + opt_log_lam = brute( + func=logml_target_func, + ranges=((log_lambda_min, log_lambda_max),), + Ns=n_opts, + finish=None, + full_output=False, + ) + + # one more optimisation is run to ensure that the optimal lambda is found + log_lambda_min = opt_log_lam - 0.1 # type: ignore + log_lambda_max = opt_log_lam + 0.1 # type: ignore + opt_log_lam = minimize_scalar( + fun=logml_target_func, + bounds=(log_lambda_min, log_lambda_max), + method="bounded", + ).x + + # finally, the solutions for the optimal lambda are returned + return ( + exp(opt_log_lam), + (-1.0) * logml_target_func(log_lam=opt_log_lam), + get_smooth_solution(log_lam=opt_log_lam)[0], + ) + ### Doctests ### if __name__ == "__main__": # pragma: no cover + import doctest doctest.testmod() diff --git a/tests/test_for_utils/utils_models.py b/tests/test_for_utils/utils_models.py new file mode 100644 index 00000000..55723dc2 --- /dev/null +++ b/tests/test_for_utils/utils_models.py @@ -0,0 +1,60 @@ +""" +This script implements utility models required for testing the +:mod:`chemotools.utils` module. + +""" + +### Imports ### + +from dataclasses import dataclass +from typing import Tuple + +from chemotools.utils import models +from tests.test_for_utils.utils_funcs import float_is_bit_equal + +### Dataclasses ### + + +@dataclass +class ExpectedWhittakerSmoothLambda: + """ + Dataclass for checking the expected results for the class :class:`WhittakerSmoothLambda` + from the module :mod:`chemotools.utils.models`. + + """ # noqa: E501 + + fixed_lambda: float + auto_bounds: Tuple[float, float] + fit_auto: bool + method_used: models.WhittakerSmoothMethods + log_auto_bounds: Tuple[float, float] = (0.0, 0.0) + + def assert_is_equal_to(self, other: models.WhittakerSmoothLambda) -> None: + """ + Checks if the current instance is equal to another instance of the same class. + + """ + + assert other.fit_auto is self.fit_auto + assert other.method_used == self.method_used + # NOTE: since NAN-values are used, the comparison is split into two parts for + # the fixed lambda value and each of the bounds + assert float_is_bit_equal( + value=other.fixed_lambda, + reference=self.fixed_lambda, + ) + assert float_is_bit_equal( + value=other.auto_bounds[0], reference=self.auto_bounds[0] + ) + assert float_is_bit_equal( + value=other.auto_bounds[1], + reference=self.auto_bounds[1], + ) + assert float_is_bit_equal( + value=other.log_auto_bounds[0], + reference=self.log_auto_bounds[0], + ) + assert float_is_bit_equal( + value=other.log_auto_bounds[1], + reference=self.log_auto_bounds[1], + ) From 48da8b5a3ddb31ea869962dcb8db353a8f693268 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 19 May 2024 23:47:44 +0200 Subject: [PATCH 081/118] test/refactor/feat: implemented auto log lambda utility functions based on sparse matrices to fix the ultra long tests with dense matrices --- tests/test_for_utils/utils_funcs.py | 59 ++++++++++++++++------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/tests/test_for_utils/utils_funcs.py b/tests/test_for_utils/utils_funcs.py index 0e91bada..5c1c1bbe 100644 --- a/tests/test_for_utils/utils_funcs.py +++ b/tests/test_for_utils/utils_funcs.py @@ -64,9 +64,7 @@ def float_is_bit_equal(value: float, reference: float) -> bool: return value == reference -def conv_upper_cho_banded_storage_to_sparse( - ab: np.ndarray, -) -> csr_matrix: +def conv_upper_cho_banded_storage_to_sparse(ab: np.ndarray) -> csr_matrix: """ Converts a banded matrix stored in the upper banded storage used for LAPACK's banded Cholesky decomposition to a sparse ``CSR`` matrix. @@ -207,7 +205,8 @@ def conv_upper_cho_banded_storage_to_sparse( def conv_lu_banded_storage_to_sparse( - ab: np.ndarray, l_and_u: Tuple[int, int] + ab: np.ndarray, + l_and_u: Tuple[int, int], ) -> csr_matrix: """ Converts a banded matrix stored in the banded storage used for LAPACK's banded LU @@ -761,18 +760,18 @@ def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: return sign, logabsdet -def get_dense_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: +def get_sparse_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: """ Creates a dense forward finite difference matrix ``D`` of a given difference order. Doctests -------- >>> # Imports - >>> from tests.test_for_utils.utils_funcs import get_dense_fw_fin_diff_mat + >>> from tests.test_for_utils.utils_funcs import get_sparse_fw_fin_diff_mat >>> # Matrix 1 >>> n_data, differences = 5, 1 - >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() array([[-1., 1., 0., 0., 0.], [ 0., -1., 1., 0., 0.], [ 0., 0., -1., 1., 0.], @@ -780,7 +779,7 @@ def get_dense_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: >>> # Matrix 2 >>> n_data, differences = 10, 1 - >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() array([[-1., 1., 0., 0., 0., 0., 0., 0., 0., 0.], [ 0., -1., 1., 0., 0., 0., 0., 0., 0., 0.], [ 0., 0., -1., 1., 0., 0., 0., 0., 0., 0.], @@ -793,14 +792,14 @@ def get_dense_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: >>> # Matrix 3 >>> n_data, differences = 5, 2 - >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() array([[ 1., -2., 1., 0., 0.], [ 0., 1., -2., 1., 0.], [ 0., 0., 1., -2., 1.]]) >>> # Matrix 4 >>> n_data, differences = 10, 2 - >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() array([[ 1., -2., 1., 0., 0., 0., 0., 0., 0., 0.], [ 0., 1., -2., 1., 0., 0., 0., 0., 0., 0.], [ 0., 0., 1., -2., 1., 0., 0., 0., 0., 0.], @@ -812,13 +811,13 @@ def get_dense_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: >>> # Matrix 4 >>> n_data, differences = 5, 3 - >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() array([[-1., 3., -3., 1., 0.], [ 0., -1., 3., -3., 1.]]) >>> # Matrix 5 >>> n_data, differences = 10, 3 - >>> get_dense_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() array([[-1., 3., -3., 1., 0., 0., 0., 0., 0., 0.], [ 0., -1., 3., -3., 1., 0., 0., 0., 0., 0.], [ 0., 0., -1., 3., -3., 1., 0., 0., 0., 0.], @@ -967,7 +966,7 @@ def find_min_num_swaps(arr: np.ndarray): def calc_whittaker_smooth_log_marginal_likelihood_const_term( differences: int, - diff_mat: csr_matrix, + diff_mat: csc_matrix, weight_vect: np.ndarray, ) -> float: """ @@ -991,13 +990,13 @@ def calc_whittaker_smooth_log_marginal_likelihood_const_term( >>> import numpy as np >>> from tests.test_for_utils.utils_funcs import ( ... calc_whittaker_smooth_log_marginal_likelihood_const_term, - ... get_dense_fw_fin_diff_mat, + ... get_sparse_fw_fin_diff_mat, ... ) >>> # Generation of the weight matrix W and the finite difference matrix D >>> weights = np.array([0.5, 1.0, 0.5, 1.0, 0.5]) >>> n_data, differences = weights.size, 1 - >>> diff_mat = get_dense_fw_fin_diff_mat( + >>> diff_mat = get_sparse_fw_fin_diff_mat( ... n_data=n_data, ... differences=differences, ... ) @@ -1084,7 +1083,7 @@ def calc_whittaker_smooth_log_marginal_likelihood_const_term( # equivalent to the determinant of the flipped matrix D @ D.T which is not # rank-deficient _, penalty_log_pseudo_det = sparse_slogdet_from_superlu( - splu=spla.splu(A=diff_mat @ diff_mat.T) + splu=spla.splu(A=diff_mat @ diff_mat.transpose()) ) # from all of this, the constant term is computed @@ -1117,7 +1116,7 @@ def find_whittaker_smooth_opt_lambda_log_marginal_likelihood( def get_smooth_solution( log_lam: Union[np.ndarray, float] - ) -> Tuple[np.ndarray, np.ndarray, float, float]: + ) -> Tuple[np.ndarray, spla.SuperLU, float, float]: """ Computes the smooth solution for the Whittaker smoother. @@ -1130,15 +1129,21 @@ def get_smooth_solution( lam = exp(log_lam) - lhs_matrix = lam * penalty_mat_dense - np.fill_diagonal(a=lhs_matrix, val=np.diag(lhs_matrix) + weight_vect) + lhs_mat = lam * penalty_mat + lhs_mat += sp_diags( + diagonals=weight_vect, + offsets=0, + shape=(b_vect.size, b_vect.size), + format="csc", + ) # then, the solution is obtained - smooth_solution = np.linalg.solve(lhs_matrix, weight_vect * b_vect) + lhs_splu = spla.splu(A=lhs_mat) + smooth_solution = lhs_splu.solve(rhs=weight_vect * b_vect) return ( smooth_solution, - lhs_matrix, + lhs_splu, lam, log_lam, # type: ignore ) @@ -1151,10 +1156,10 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: # first, the smooth solution is calculated together with the left-hand side # matrix and the lambda value - smooth_solution, lhs_matrix, lam, log_lam = get_smooth_solution(log_lam=log_lam) + smooth_solution, lhs_splu, lam, log_lam = get_smooth_solution(log_lam=log_lam) # the log-determinant of the lhs matrix is calculated - _, logdet_lhs = np.linalg.slogdet(lhs_matrix) + _, logdet_lhs = sparse_slogdet_from_superlu(splu=lhs_splu) # finally, the log marginal likelihood is computed from: # 1) the weighted residual sum of squares @@ -1165,7 +1170,7 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: # the last multiplication is a matrix-vector resulting in another vector; # the other way around would result in another matrix followed by # a matrix-vector multiplication - pss = lam * (smooth_solution @ (penalty_mat_dense @ smooth_solution)) + pss = lam * (smooth_solution @ (penalty_mat @ smooth_solution)) # 3) the log-determinant of the lhs matrix and the constant term # NOTE: the sign is positive because the log marginal likelihood is maximised @@ -1183,14 +1188,14 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: # then, some pre-computations are made n_data = b_vect.size log_lambda_min, log_lambda_max = log_lambda_bounds - diff_mat_dense = get_dense_fw_fin_diff_mat( + diff_mat = get_sparse_fw_fin_diff_mat( n_data=n_data, differences=differences, ) - penalty_mat_dense = diff_mat_dense.transpose() @ diff_mat_dense + penalty_mat = (diff_mat.transpose() @ diff_mat).tocsc() # type: ignore logml_constant_term = calc_whittaker_smooth_log_marginal_likelihood_const_term( differences=differences, - diff_mat_dense=diff_mat_dense, + diff_mat=diff_mat, weight_vect=weight_vect, ) From b924a7dcda2109e7969ee18dd3a8927d88055372 Mon Sep 17 00:00:00 2001 From: MothNik Date: Sun, 19 May 2024 23:48:44 +0200 Subject: [PATCH 082/118] test/feat: implemented full tests for whittaker base --- tests/test_for_utils/test_whittaker_base.py | 558 ++++++++++++++++++++ 1 file changed, 558 insertions(+) create mode 100644 tests/test_for_utils/test_whittaker_base.py diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py new file mode 100644 index 00000000..562ec707 --- /dev/null +++ b/tests/test_for_utils/test_whittaker_base.py @@ -0,0 +1,558 @@ +""" +Test suite for the utility functions in the :mod:`chemotools.utils.whittaker_base` +module. + +""" + +### Imports ### + +from math import log +from typing import Any, Tuple, Type, Union + +import numpy as np +import pytest + +from chemotools.utils import models +from chemotools.utils.whittaker_base.auto_lambda.shared import get_smooth_wrss +from chemotools.utils.whittaker_base.initialisation import ( + get_checked_lambda, + get_penalty_log_pseudo_det, +) +from chemotools.utils.whittaker_base.main import WhittakerLikeSolver +from chemotools.utils.whittaker_base.misc import get_weight_generator +from chemotools.utils.whittaker_base.solvers import solve_normal_equations +from tests.fixtures import noise_level_whittaker_auto_lambda # noqa: F401 +from tests.fixtures import spectrum_whittaker_auto_lambda # noqa: F401 +from tests.test_for_utils.utils_funcs import ( + find_whittaker_smooth_opt_lambda_log_marginal_likelihood, +) +from tests.test_for_utils.utils_models import ExpectedWhittakerSmoothLambda + +### Type Aliases ### + +_RealNumeric = Union[float, int] +_WhittakerMethod = Union[str, models.WhittakerSmoothMethods] +_LambdaSpecs = Union[_RealNumeric, Tuple[_RealNumeric, _RealNumeric, _WhittakerMethod]] +_LambdaSpecsOrFlawed = Union[_LambdaSpecs, str] + +### Constants ### + +_NAN: float = float("nan") + +### Test Suite ### + + +@pytest.mark.parametrize( + "combination", + [ + ( # Number 0 (fixed float) + 100.0, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 1 (fixed integer) + 100, + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 2 (float search space, log marginal likelihood method enum) + (100.0, 10_000.0, models.WhittakerSmoothMethods.LOGML), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 3 (float search space, log marginal likelihood method string) + (100.0, 10_000.0, "logml"), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 4 (integer search space, log marginal likelihood method enum) + (100, 10_000, models.WhittakerSmoothMethods.LOGML), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 5 (integer search space, log marginal likelihood method string) + (100, 10_000, "logml"), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 6 (dataclass float specification; fixed method) + models.WhittakerSmoothLambda( + bounds=100.0, + method=models.WhittakerSmoothMethods.FIXED, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 7 (dataclass integer specification; fixed method) + models.WhittakerSmoothLambda( + bounds=100, + method=models.WhittakerSmoothMethods.FIXED, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=100.0, + auto_bounds=(_NAN, _NAN), + fit_auto=False, + method_used=models.WhittakerSmoothMethods.FIXED, + log_auto_bounds=(_NAN, _NAN), + ), + ), + ( # Number 8 (dataclass float specification; log marginal likelihood method) + models.WhittakerSmoothLambda( + bounds=(100.0, 10_000.0), + method=models.WhittakerSmoothMethods.LOGML, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 9 (dataclass integer specification; log marginal likelihood method) + models.WhittakerSmoothLambda( + bounds=(100, 10_000), + method=models.WhittakerSmoothMethods.LOGML, + ), + ExpectedWhittakerSmoothLambda( + fixed_lambda=_NAN, + auto_bounds=(100.0, 10_000.0), + fit_auto=True, + method_used=models.WhittakerSmoothMethods.LOGML, + log_auto_bounds=(log(100.0), log(10_000.0)), + ), + ), + ( # Number 10 (wrong length tuple) + (100.0, 10_000.0), + ValueError, + ), + ( # Number 11 (wrong type) + "error", + TypeError, + ), + ], +) +def test_get_checked_lambda( + combination: Tuple[ + _LambdaSpecsOrFlawed, Union[ExpectedWhittakerSmoothLambda, Type[Exception]] + ] +) -> None: + """ + Tests the function that casts a penalty weight lambda to the respective dataclass. + + The ``combination`` parameter defines + + - the lambda specification to be used and + - the expected result (will be an exception if the input should be considered + invalid by the function). + + """ + + # the input parameters are unpacked + lam, expected_result = combination + + # if the expected output is an exception, the test is run in a context manager + if not isinstance(expected_result, ExpectedWhittakerSmoothLambda): + with pytest.raises(expected_result): + get_checked_lambda(lam=lam) + + return + + # otherwise, the output dataclass is compared to the expected output + lambda_model = get_checked_lambda(lam=lam) + if isinstance(lambda_model, models.WhittakerSmoothLambda): + expected_result.assert_is_equal_to(other=lambda_model) + + return + + raise AssertionError( + "The lambda value could not be checked correctly since the returned value is " + "not an instance of the class 'WhittakerSmoothLambda'." + ) + + +@pytest.mark.parametrize( + "combination", + [ + (None, 1.0), # Number 0 + ( # Number 1 + np.ones(shape=(10, 1_000), dtype=np.float64), + np.ones(shape=(1_000), dtype=np.float64), + ), + ( # Number 2 + np.ones(shape=(5, 1_000), dtype=np.float64), + IndexError, + ), + ( # Number 3 + np.ones(shape=(1_000), dtype=np.float64), + ValueError, + ), + ( # Number 4 + np.ones(shape=(1, 5, 1_000), dtype=np.float64), + ValueError, + ), + ("error", TypeError), # Number 5 + ], +) +def test_weight_generator( + combination: Tuple[Any, Union[np.ndarray, float, Type[Exception]]] +) -> None: + """ + Tests the weight generator. + + The ``combination`` parameter defines + + - the weights to be used and + - the expected output at each iteration (will be an exception if the input should + be considered invalid by the function). + + """ + + # the input parameters are unpacked + weights, expected_output = combination + + # the number of series is defined + n_series = 10 + + # if the expected output is an exception, the test is run in a context manager + if not isinstance(expected_output, (np.ndarray, float, int)): + with pytest.raises(expected_output): + for _ in get_weight_generator(w=weights, n_series=n_series): + pass + + return + + # otherwise, the output is compared to the expected output + # Case 1: the expected output is a scalar + if isinstance(expected_output, (float, int)): + for w in get_weight_generator(w=weights, n_series=n_series): + assert isinstance(w, (float, int)) + assert w == expected_output + + return + + # Case 2: the expected output is an array + for w in get_weight_generator(w=weights, n_series=n_series): + assert isinstance(w, np.ndarray) + assert np.array_equal(w, expected_output) + + +@pytest.mark.parametrize("combination", [(True, 244_9755_000.0), (False, 490_000.0)]) +def test_smooth_wrss(combination: Tuple[bool, float]) -> None: + """ + Tests the weighted residual sum of squares calculation. + + The ``combination`` parameter defines + + - whether weights are used (``True``) or not (``False``) and + - the expected weighted residual sum of squares. + + """ + + # the input parameters are unpacked + with_weights, wrss_expected = combination + + # two series are generated where the difference between the elements is 7.0 + np.random.seed(42) + n_data = 10_000 + a_signs = np.random.choice([-1.0, 1.0], size=(n_data,), replace=True) + a_series = a_signs * 4.5 + b_series = (-1.0) * a_signs * 2.5 + + # the weights are generated + weights = ( + np.arange(start=0, stop=n_data, step=1.0, dtype=np.float64) + if with_weights + else 1.0 + ) + + # the wrss is calculated ... + wrss = get_smooth_wrss(b=a_series, b_smooth=b_series, w=weights) + + # ... and compared to the expected value with a very strict tolerance + assert np.isclose(wrss, wrss_expected, atol=1e-13, rtol=0.0) + + +# TODO: due to ill-conditioning, this is highly limited in the differences and number +# of data points; in the future, this should be tackled by QR-decomposition for +# extra numerical stability +@pytest.mark.parametrize( + "differences_and_n_data_from_to", + [ + (1, 0, 2_000), + (1, 2_001, 4_000), + (1, 4_001, 6_000), + (1, 6_001, 8_000), + (1, 8_001, 10_000), + (2, 0, 2_000), + (2, 2_001, 4_000), + (2, 4_001, 6_000), + (2, 6_001, 8_000), + (2, 8_001, 10_000), + ], +) +def test_penalty_log_pseudo_det_can_compute( + differences_and_n_data_from_to: Tuple[int, int, int] +) -> None: + """ + Tests the log pseudo-determinant of the penalty matrix for all the difference orders + and number of data points. + + """ + + differences, n_data_from, n_data_to = differences_and_n_data_from_to + for nd in range(max(differences + 1, n_data_from), n_data_to + 1): + get_penalty_log_pseudo_det(n_data=nd, differences=differences, dtype=np.float64) + + +# TODO: this test will not 100% reflect reality as intended; in the future this should +# be tested with the LAPACK function ``dgbcon`` to check the condition number; +# right now, it is set to a number of data points that causes the intended +# failure, but in the future, the condition number has to be used to detect +# ill-conditioning +def test_penalty_log_pseudo_det_breaks_ill_conditioned() -> None: + """ + Tests that the log pseudo-determinant of the penalty matrix breaks when the matrix + is ill-conditioned. + + """ + + # the difference order and number of data points are set so high that the matrix + # becomes ill-conditioned + n_data = 1_000 + differences = 10 + + # the function is tested for breaking + with pytest.raises(RuntimeError): + get_penalty_log_pseudo_det( + n_data=n_data, differences=differences, dtype=np.float64 + ) + + return + + +# TODO: this test will not 100% reflect reality as intended; in the future this should +# be tested with the LAPACK function ``dgbcon`` to check the condition number; +# right now, the matrix is heavily altered to cause the intended failure, but in +# the future, the condition number has to be used to detect ill-conditioning +@pytest.mark.parametrize("with_pentapy", [True, False]) +def test_normal_condition_solve_breaks_ill_conditioned(with_pentapy: bool) -> None: + """ + Tests that the normal condition solver breaks when the matrix is ill-conditioned. + + Note that the conditions for the solver to break will never be met in practice. + + """ + + # if pentapy is not installed but required, the test is skipped + if with_pentapy: + try: + import pentapy # noqa: F401 + except ImportError: + pytest.skip("Pentapy is not installed.") + + # a banded ill-conditioned matrix is created that has zeros on the diagonal + n_data = 10_000 + differences = 2 + a_banded = np.ones(shape=(2 * differences + 1, n_data), dtype=np.float64) + a_banded[differences, :] = 0.0 + + # some further required variables are initialised + lam = 1e100 + b_vect = np.ones(shape=(n_data,), dtype=np.float64) + weights = 0.0 + + # Test that the solver breaks + with pytest.raises(RuntimeError): + solve_normal_equations( + lam=lam, + differences=differences, + l_and_u=(differences, differences), + penalty_mat_banded=a_banded, + b_weighted=b_vect, + w=weights, + pentapy_enabled=with_pentapy, + ) + + +def test_whittakerlike_issues_warning_difference_order_too_high() -> None: + """ + Tests that the class :class:`WhittakerLikeSolver` issues a warning when the + difference order is greater than 2. + + """ + + with pytest.warns(UserWarning): + whitt_base = WhittakerLikeSolver() + whitt_base._setup_for_fit( + n_data=500, + differences=3, + lam=models.WhittakerSmoothLambda( + bounds=(100.0, 10_000.0), + method=models.WhittakerSmoothMethods.LOGML, + ), + child_class_name="pytest_run", + ) + + return + + +# TODO: this can only go to differences 2 due to ill-conditioning; in the future, this +# should be tackled by QR-decomposition for extra numerical stability +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("differences", [1, 2]) +def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( + differences: int, + same_weights_for_all: bool, +) -> None: + """ + Tests that the automatic lambda calculation using the log marginal likelihood method + refuses to work with no weights. + + """ + + # the smoother is initialised ... + n_data = 500 + whitt_base = WhittakerLikeSolver() + whitt_base._setup_for_fit( + n_data=n_data, + differences=differences, + lam=models.WhittakerSmoothLambda( + bounds=(100.0, 10_000.0), + method=models.WhittakerSmoothMethods.LOGML, + ), + child_class_name="pytest_run", + ) + + # ... and the log marginal likelihood method is called without weights + np.random.seed(42) + X = np.random.rand(n_data) + with pytest.raises(ValueError): + whitt_base._whittaker_solve( + X=X, + w=None, + use_same_w_for_all=same_weights_for_all, + ) + + +@pytest.mark.parametrize("with_zero_weights", [True, False]) +@pytest.mark.parametrize("same_weights_for_all", [True, False]) +@pytest.mark.parametrize("differences", [1, 2]) +@pytest.mark.parametrize("n_series", [1, 5]) +def test_auto_lambda_log_marginal_likelihood( + spectrum_whittaker_auto_lambda: np.ndarray, # noqa: F811 + noise_level_whittaker_auto_lambda: np.ndarray, # noqa: F811 + n_series: int, + differences: int, + same_weights_for_all: bool, + with_zero_weights: bool, +) -> None: + """ + Tests the automatic lambda calculation using the log marginal likelihood method. + + Some of the noise standard deviations in the respective fixture are set to NaN which + allows for two different ways of handling them: + + - with zero weights, which will set the weights of the NaN values to zero, or + - interpolated weights, which will replace the NaN values with linearly interpolated + values which cannot be zero. + + This has slightly different effects on the log marginal likelihood calculation. + + Everything is tested against a from-scratch implementation based on SciPy to ensure + that the test is decoupled from the actual implementation used in Chemotools. + + """ + + # first of all, the Nan values in the noise level are handled + noise_level = noise_level_whittaker_auto_lambda.copy() + + # Case 1: Zero weights + if with_zero_weights: + # this can be achieved by replacing the NaN-values with +inf + noise_level = np.where(np.isnan(noise_level), np.inf, noise_level) + + # Case 2: Interpolated weights + else: + # the NaN-values are replaced by linearly interpolated values + nan_flags = np.isnan(noise_level) + noise_level[nan_flags] = np.interp( + x=np.where(nan_flags)[0], + xp=np.where(~nan_flags)[0], + fp=noise_level[~nan_flags], + ) + + # then, the weights are computed as the square of the inverse noise level ... + weights = (1.0 / np.square(noise_level))[np.newaxis, ::] + # ... and stacked as many times as required + weights = np.tile(weights, reps=(n_series, 1)) + + # then, the spectrum is repeated as many times as required + X = np.tile(spectrum_whittaker_auto_lambda[np.newaxis, ::], reps=(n_series, 1)) + + # the smoothing is performed using the chemotools implementation + lambda_bounds = (1e-15, 1e10) + whitt_base = WhittakerLikeSolver() + whitt_base._setup_for_fit( + n_data=X.shape[1], + differences=differences, + lam=models.WhittakerSmoothLambda( + bounds=lambda_bounds, + method=models.WhittakerSmoothMethods.LOGML, + ), + child_class_name="pytest_run", + ) + _, lambda_opts = whitt_base._whittaker_solve( + X=X, + w=weights, + use_same_w_for_all=same_weights_for_all, + ) + + # the reference optimum lambda is found by a from-scratch implementation that relies + # on dense matrices + lambda_opt_ref, _, _ = find_whittaker_smooth_opt_lambda_log_marginal_likelihood( + b_vect=X[0, ::], + weight_vect=weights[0, ::], + differences=differences, + log_lambda_bounds=(log(lambda_bounds[0]), log(lambda_bounds[1])), + n_opts=100, + ) + + # the results are compared with 1% relative tolerance + for lam_opts in lambda_opts: + assert np.isclose(lam_opts, lambda_opt_ref, rtol=1e-2) From a7f07e7934dee40702080f4cf0a0dc5ec0d2c2dd Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 00:32:28 +0200 Subject: [PATCH 083/118] doc: fixed type in docstring --- chemotools/utils/banded_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/banded_linalg.py index c2b51b8e..f59e6a86 100644 --- a/chemotools/utils/banded_linalg.py +++ b/chemotools/utils/banded_linalg.py @@ -30,7 +30,7 @@ def _datacopied(arr, original): """ - Strictly check for ``arr`` not sharing any data wit ``original``, under the + Strictly check for ``arr`` not sharing any data with ``original``, under the assumption that ``arr = asarray(original)`` Was copied from Scipy to be consistent in the LAPACK-wrappers implemented here. From fc8f6983fec9e0c6d37c5eed2c58b8396434af6f Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 00:33:17 +0200 Subject: [PATCH 084/118] test/cov: improved test coverage in Whittaker base by excluding edge case and removing dead branch of `if else` --- .../utils/whittaker_base/auto_lambda/logml.py | 4 +++- chemotools/utils/whittaker_base/main.py | 23 ++++++------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/chemotools/utils/whittaker_base/auto_lambda/logml.py b/chemotools/utils/whittaker_base/auto_lambda/logml.py index 4c3a1dac..8777400d 100644 --- a/chemotools/utils/whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/whittaker_base/auto_lambda/logml.py @@ -181,7 +181,9 @@ def get_log_marginal_likelihood( # otherwise, if the determinant is negative, the system is extremely # ill-conditioned and the log marginal likelihood cannot be computed - raise RuntimeError( + # NOTE: since it is very hard to trigger this exception, it is not covered by the + # tests + raise RuntimeError( # pragma: no cover "\nThe determinant of the combined left hand side matrix " "W + lambda * D.T @ D is negative, indicating that the system is extremely " "ill-conditioned.\n" diff --git a/chemotools/utils/whittaker_base/main.py b/chemotools/utils/whittaker_base/main.py index eb205bdc..a0ed2a15 100644 --- a/chemotools/utils/whittaker_base/main.py +++ b/chemotools/utils/whittaker_base/main.py @@ -208,7 +208,7 @@ def _marginal_likelihood_objective( self, log_lam: float, b: np.ndarray, - w: Union[float, np.ndarray], + w: np.ndarray, w_plus_penalty_plus_n_samples_term: float, ) -> float: """ @@ -223,21 +223,12 @@ def _marginal_likelihood_objective( # lambda lam = exp(log_lam) - # Case 1: no weights are provided - if isinstance(w, float): - b_smooth, _, factorization = self._solve( - lam=lam, - b_weighted=b, - w=w, - ) - - # Case 2: weights are provided - else: - b_smooth, _, factorization = self._solve( - lam=lam, - b_weighted=b * w, - w=w, - ) + # the solution of the linear system of equations is computed + b_smooth, _, factorization = self._solve( + lam=lam, + b_weighted=b * w, + w=w, + ) # finally, the log marginal likelihood is computed and returned (negative since # the objective function is minimized, but the log marginal likelihood is From dce0117ed8b2f7a324c319432a1a32771033aaff Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 00:35:29 +0200 Subject: [PATCH 085/118] test: added test for `_datacopied` --- tests/test_for_utils/test_banded_linalg.py | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/test_for_utils/test_banded_linalg.py b/tests/test_for_utils/test_banded_linalg.py index d3ece0ab..ba404984 100644 --- a/tests/test_for_utils/test_banded_linalg.py +++ b/tests/test_for_utils/test_banded_linalg.py @@ -6,11 +6,14 @@ ### Imports ### +from typing import List, Union + import numpy as np import pytest from scipy.linalg import solve_banded as scipy_solve_banded from chemotools.utils.banded_linalg import ( + _datacopied, conv_upper_chol_banded_to_lu_banded_storage, lu_banded, lu_solve_banded, @@ -18,9 +21,63 @@ ) from tests.test_for_utils.utils_funcs import get_banded_slogdet +### Constants ### + +_ARRAY_TO_VIEW: np.ndarray = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) +_VIEW = _ARRAY_TO_VIEW[::] + ### Test Suite ### +@pytest.mark.parametrize( + "arr, original, expected", + [ + ( # Number 0 Different arrays + np.array([1, 2, 3]), + np.array([1, 2, 3]), + True, + ), + ( # Number 1 Array and list + np.array([1, 2, 3]), + [1, 2, 3], + True, + ), + ( # Number 2 Different data types + np.array([1, 2, 3]), + np.array([1, 2, 3], dtype=np.float64), + True, + ), + ( # Number 3 Different view and array + _ARRAY_TO_VIEW[0:3], + np.array([1, 2, 3]), + False, + ), + ( # Number 4 Same array + _ARRAY_TO_VIEW, + _ARRAY_TO_VIEW, + False, + ), + ( # Number 5 Same view and array + _VIEW, + _ARRAY_TO_VIEW, + False, + ), + ], +) +def test_datacopied( + arr: np.ndarray, + original: Union[np.ndarray, List], + expected: bool, +) -> None: + """ + Tests the function that checks if a NumPy array has been copied from another array + or list. + + """ + + assert _datacopied(arr, original) == expected + + @pytest.mark.parametrize("with_finite_check", [True, False]) @pytest.mark.parametrize("overwrite_b", [True, False]) @pytest.mark.parametrize("n_rhs", [0, 1, 2]) From 01c032f45a7da2aaf77ac2d707f914fd847e6fab Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 12:59:15 +0200 Subject: [PATCH 086/118] feat: added type utility module --- chemotools/utils/types.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 chemotools/utils/types.py diff --git a/chemotools/utils/types.py b/chemotools/utils/types.py new file mode 100644 index 00000000..7ac4f2d3 --- /dev/null +++ b/chemotools/utils/types.py @@ -0,0 +1,12 @@ +""" +This utility submodule provides type hints for ``chemotools``. + +""" + +### Imports ### + +from typing import Union + +### Types ### + +RealNumeric = Union[int, float] From d6e5d0234ad8291dcf898b34fe773e886b793679 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 13:40:15 +0200 Subject: [PATCH 087/118] refactor: added defaults to auto-lambda-specs-model --- chemotools/utils/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chemotools/utils/models.py b/chemotools/utils/models.py index d5b79dbb..b126279f 100644 --- a/chemotools/utils/models.py +++ b/chemotools/utils/models.py @@ -73,7 +73,7 @@ class WhittakerSmoothLambda: Attributes ---------- - bounds: int or float or (int or float, int or float) + bounds: int or float or (int or float, int or float), default=100.0 The bounds for the search space of the penalty weight lambda. The specification can be either @@ -90,7 +90,7 @@ class WhittakerSmoothLambda: ``1e-5 * upp_bound`` for any method other than ``WhittakerSmoothMethods.FIXED``. Otherwise, the method is set to ``WhittakerSmoothMethods.FIXED`` and the ``fixed_lambda`` is set to the upper bound. - method: WhittakerSmoothMethods or {"fixed", "logml"} + method: WhittakerSmoothMethods or {"fixed", "logml"}, default="fixed" The method to use for the selection of the penalty weight. If the bounds are too close to each other, this will be set to ``WhittakerSmoothMethods.FIXED``. @@ -106,8 +106,8 @@ class WhittakerSmoothLambda: """ - bounds: Union[int, float, tuple[Union[int, float], Union[int, float]]] - method: _WhittakerSmoothMethodsAll + bounds: Union[int, float, tuple[Union[int, float], Union[int, float]]] = 100.0 + method: _WhittakerSmoothMethodsAll = field(default=WhittakerSmoothMethods.FIXED) fixed_lambda: float = field(default=float("nan"), init=False) auto_bounds: tuple[float, float] = field( From b34eca1e88e1288d5041889e79952801a49824e7 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 13:46:46 +0200 Subject: [PATCH 088/118] refactor: made types consistent for whittaker base and estimator --- .../utils/whittaker_base/initialisation.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/chemotools/utils/whittaker_base/initialisation.py b/chemotools/utils/whittaker_base/initialisation.py index 5535a649..8f16784f 100644 --- a/chemotools/utils/whittaker_base/initialisation.py +++ b/chemotools/utils/whittaker_base/initialisation.py @@ -6,31 +6,35 @@ ### Imports ### -from typing import Any, Tuple, Type, Union +from typing import Any, Literal, Tuple, Type, Union import numpy as np from chemotools.utils import banded_linalg as bla from chemotools.utils import finite_differences as fdiff from chemotools.utils import models +from chemotools.utils.types import RealNumeric ### Type Aliases ### -_RealNumeric = Union[int, float] +_StrWhittakerSmoothMethods = Literal["fixed", "logml"] +_AllWhittakerSmoothMethods = Union[ + models.WhittakerSmoothMethods, _StrWhittakerSmoothMethods +] _WhittakerSmoothLambdaPlain = Tuple[ - _RealNumeric, - _RealNumeric, - models.WhittakerSmoothMethods, + RealNumeric, + RealNumeric, + _AllWhittakerSmoothMethods, ] _LambdaSpecs = Union[ - _RealNumeric, + RealNumeric, _WhittakerSmoothLambdaPlain, models.WhittakerSmoothLambda, ] ### Constants ### -_RealNumericTypes = (int, float) +RealNumericTypes = (int, float) ### Functions ### @@ -49,7 +53,7 @@ def get_checked_lambda(lam: Any) -> models.WhittakerSmoothLambda: # now, there are other cases to check # Case 1: lambda is a single number - if isinstance(lam, _RealNumericTypes): + if isinstance(lam, RealNumericTypes): return models.WhittakerSmoothLambda( bounds=lam, method=models.WhittakerSmoothMethods.FIXED ) From 2bea38b87dc464b8c64edeb2d761441c2e8440ba Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 13:59:58 +0200 Subject: [PATCH 089/118] feat: added author to `whittaker_base` --- chemotools/utils/whittaker_base/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/chemotools/utils/whittaker_base/__init__.py b/chemotools/utils/whittaker_base/__init__.py index fd4eefd8..69c23aab 100644 --- a/chemotools/utils/whittaker_base/__init__.py +++ b/chemotools/utils/whittaker_base/__init__.py @@ -9,12 +9,13 @@ """ +# Authors: +# Niklas Zell + ### Imports ### from chemotools.utils.models import ( # noqa: F401 WhittakerSmoothLambda, WhittakerSmoothMethods, ) -from chemotools.utils.whittaker_base.main import ( # noqa: F401 - WhittakerLikeSolver, -) +from chemotools.utils.whittaker_base.main import WhittakerLikeSolver # noqa: F401 From 9394025fe37982f92406f35969c2265314b676f1 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 14:08:26 +0200 Subject: [PATCH 090/118] feat: enriched `__init__` of `smooth` by docstring; improved linting --- chemotools/smooth/__init__.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/chemotools/smooth/__init__.py b/chemotools/smooth/__init__.py index b6befa63..6ac49119 100644 --- a/chemotools/smooth/__init__.py +++ b/chemotools/smooth/__init__.py @@ -1,4 +1,27 @@ -from ._mean_filter import MeanFilter -from ._median_filter import MedianFilter -from ._savitzky_golay_filter import SavitzkyGolayFilter -from ._whittaker_smooth import WhittakerSmooth \ No newline at end of file +""" +The ``chemotools`` module for smoothing data. +It defines the estimator classes for smoothing data with an Sklearn-like API: + +- :class:`MeanFilter` +- :class:`MedianFilter` +- :class:`SavitzkyGolayFilter` +- :class:`WhittakerSmooth` + +as well as auxiliary models to allow for convenient usage of the them: + +- :class:`WhittakerSmoothMethods` and :class:`WhittakerSmoothLambda` for the + :class:`WhittakerSmooth` class. + +""" + +### Imports ### + +from chemotools.utils.models import ( # noqa: F401 + WhittakerSmoothLambda, + WhittakerSmoothMethods, +) + +from ._mean_filter import MeanFilter # noqa: F401 +from ._median_filter import MedianFilter # noqa: F401 +from ._savitzky_golay_filter import SavitzkyGolayFilter # noqa: F401 +from ._whittaker_smooth import WhittakerSmooth # noqa: F401 From c4080406d0e28c25134e65124516bf62d1ce7b57 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 14:13:55 +0200 Subject: [PATCH 091/118] refactor: adapted docstrings of WhittakerSmooth; unified type hints; added more detailed code examples --- chemotools/smooth/_whittaker_smooth.py | 144 +++++++++++++++++++++---- 1 file changed, 126 insertions(+), 18 deletions(-) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 0442f09b..211207d5 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -14,15 +14,27 @@ """ -from typing import Optional, Union +# Authors: +# Pau Cabaneros +# Niklas Zell + +### Imports ### + +from typing import Literal, Optional, Tuple, Union from numpy import ndarray from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted from chemotools.utils.check_inputs import check_input, check_weights -from chemotools.utils.models import WhittakerSmoothLambda -from chemotools.utils.whittaker_base import WhittakerLikeSolver +from chemotools.utils.types import RealNumeric +from chemotools.utils.whittaker_base import ( + WhittakerLikeSolver, + WhittakerSmoothLambda, + WhittakerSmoothMethods, +) + +### Main Class ### class WhittakerSmooth( @@ -37,15 +49,20 @@ class WhittakerSmooth( Parameters ---------- - lam : float or int, default=1e2 - The lambda parameter to use for the Whittaker smooth. + lam : float or int or (float or int, float or int, {"fixed", "logml"} or WhittakerSmoothMethods) or WhittakerSmoothLambda, default=1e2 + The lambda parameter, a.k.a. the penalty weight, for the Whittaker smooth. In + general, higher values lead to smoother results, but changes take effect in a + logarithmic rather than linear manner. + It may thus not be zero or negative (``< 1e-25``). Also high values combined + with high ``differences`` will lead to numerical instability. + Please refer to the Notes section for further details. differences : int, default=1 The number of differences to use for the Whittaker smooth. If the aim is to obtain a smooth estimate of the ``m``-th order derivative, this should be set to at least ``m + 2``. - Currently, values >= 6 are highly discouraged and might lead to obscured - smoothing. + Currently, values ``>= 3`` are highly discouraged due to numerical instability + that might obscure the smoothing effect. Attributes ---------- @@ -78,23 +95,113 @@ class WhittakerSmooth( (2023) .. [3] https://math.stackexchange.com/q/4819039/1261538 - """ + Notes + ----- + For a more convenient usage of the following, it is recommended to import + ``WhittakerSmoothLambda`` and ``WhittakerSmoothMethods`` from ``chemotools.smooth``. + + The specification of ``lam`` controls how the lambda parameter is applied/chosen. + It may not be zero or negative (``< 1e-25``), but aside from that, it can be + specified in three different ways: + + - a scalar: A fixed lambda is used for all signals, which is a good starting + point. However, it is important to notice that even similar signals might + require quite different lambdas. + + ```python + # fixed lambda of 100 + smoother = WhittakerSmooth(lam=1e2) + ``` + + Internally, it is represented by the method ``"fixed"`` or ``WhittakerSmoothMethods.FIXED``, + but this never has to be specified explicitly. + + - a tuple of two scalars and a string: The scalars serve as the lower and upper + bound for searching a lambda according to the method provided by the + string. + Each signal will then have its own optimized lambda. + Currently available methods for automated selection are: + + - ``logml`` or ``WhittakerSmoothMethods.LOGML``: The lambda is chosen by + maximizing the log marginal likelihood similar to the optimization used + by the ``sklearn.gaussian_process.GaussianProcessRegressor``. + It can only be used when ``sample_weight`` can be provided for the methods + :meth:`transform` and :meth:`fit_transform`. + + ```python + # will search the optimized lambda for each signal between 1e-5 and 1e10 + smoother = WhittakerSmooth(lam=(1e-5, 1e10, "logml")) + + # which is equivalent to + smoother = WhittakerSmooth(lam=(1e-5, 1e10, WhittakerSmoothMethods.LOGML)) + ``` + + - a ``WhittakerSmoothLambda`` object: This object serves as a convenient way for + specifying the ``bounds`` for the search space and the ``method`` for the lambda + selection. It covers both the fixed lambda and its automated selection. + + ```python + # 1) fixed lambda of 100 + smoother = WhittakerSmooth(lam=WhittakerSmoothLambda(bounds=1e2)) + + + # which is equivalent to + smoother = WhittakerSmooth(lam=WhittakerSmoothLambda(bounds=(1e2, 1e2))) + + # 2) will search the optimized lambda for each signal between 1e-5 and 1e10 + smoother = WhittakerSmooth( + lam=WhittakerSmoothLambda( + bounds=(1e-5, 1e10), + method="logml", + ) + ) + + # which is equivalent to + smoother = WhittakerSmooth( + lam=WhittakerSmoothLambda( + bounds=(1e-5, 1e10), + method=WhittakerSmoothMethods.LOGML, + ) + ) + ``` + + If bounds are provided by either the tuple or the ``WhittakerSmoothLambda`` object, + the class will fall back to a fixed lambda in case the bounds are apart by less than + a factor of ``1e-5``, i.e., ``abs(upper - lower) < 1e-5 * upper``. + + """ # noqa: E501 def __init__( self, - lam: Union[float, int, WhittakerSmoothLambda] = 1e2, + lam: Union[ + RealNumeric, + Tuple[ + RealNumeric, + RealNumeric, + Union[Literal["fixed", "logml"], WhittakerSmoothMethods], + ], + WhittakerSmoothLambda, + ] = 1e2, differences: int = 1, ): - self.lam: Union[float, int, WhittakerSmoothLambda] = lam + self.lam: Union[ + RealNumeric, + Tuple[ + RealNumeric, + RealNumeric, + Union[Literal["fixed", "logml"], WhittakerSmoothMethods], + ], + WhittakerSmoothLambda, + ] = lam self.differences: int = differences - def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": + def fit(self, X: ndarray, y: None = None) -> "WhittakerSmooth": """ Fit the transformer to the input data. Parameters ---------- - X : np.ndarray of shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) The input data to fit the transformer to. It is internally promoted to ``np.float64`` to avoid loss of precision. @@ -120,6 +227,7 @@ def fit(self, X: ndarray, y=None) -> "WhittakerSmooth": n_data=self.n_features_in_, lam=self.lam, differences=self.differences, + child_class_name=self.__class__.__name__, ) # Set the fitted attribute to True @@ -138,21 +246,21 @@ def transform( Parameters ---------- - X : np.ndarray of shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) The input data to transform. It is internally promoted to ``np.float64`` to avoid loss of precision. y : None Ignored. - sample_weight : np.ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None + sample_weight : ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None Individual weights for each of the input data. If only 1 weight vector is provided, it is assumed to be the same for the features all samples. If ``None``, all features are assumed to have the same weight. Returns ------- - X_smoothed : np.ndarray of shape (n_samples, n_features) + X_smoothed : ndarray of shape (n_samples, n_features) The transformed data. """ # noqa: E501 @@ -193,14 +301,14 @@ def fit_transform( Parameters ---------- - X : np.ndarray of shape (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) The input data to fit and transform. It is internally promoted to ``np.float64`` to avoid loss of precision. y : None Ignored. - sample_weight : np.ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None + sample_weight : ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None Individual weights for each of the input data. If only 1 weight vector is provided, it is assumed to be the same for the features all samples. No weights may be negative (< 0.0) and at least one weight needs to be @@ -209,7 +317,7 @@ def fit_transform( Returns ------- - X_smoothed : np.ndarray of shape (n_samples, n_features) + X_smoothed : ndarray of shape (n_samples, n_features) The transformed data. """ # noqa: E501 From 0699a7cf4dbf582fb271fe72e7de91ba01e9f773 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 14:19:54 +0200 Subject: [PATCH 092/118] refactor: made `whittaker_base` a private utility --- chemotools/baseline/_air_pls.py | 2 +- chemotools/baseline/_ar_pls.py | 2 +- chemotools/smooth/_whittaker_smooth.py | 2 +- .../{whittaker_base => _whittaker_base}/__init__.py | 2 +- .../auto_lambda/__init__.py | 6 +++--- .../auto_lambda/logml.py | 2 +- .../auto_lambda/optimization.py | 0 .../auto_lambda/shared.py | 0 .../initialisation.py | 0 .../utils/{whittaker_base => _whittaker_base}/main.py | 8 ++++---- .../utils/{whittaker_base => _whittaker_base}/misc.py | 0 .../{whittaker_base => _whittaker_base}/solvers.py | 0 tests/test_for_utils/test_whittaker_base.py | 10 +++++----- tests/test_for_utils/utils_funcs.py | 2 +- 14 files changed, 18 insertions(+), 18 deletions(-) rename chemotools/utils/{whittaker_base => _whittaker_base}/__init__.py (88%) rename chemotools/utils/{whittaker_base => _whittaker_base}/auto_lambda/__init__.py (61%) rename chemotools/utils/{whittaker_base => _whittaker_base}/auto_lambda/logml.py (98%) rename chemotools/utils/{whittaker_base => _whittaker_base}/auto_lambda/optimization.py (100%) rename chemotools/utils/{whittaker_base => _whittaker_base}/auto_lambda/shared.py (100%) rename chemotools/utils/{whittaker_base => _whittaker_base}/initialisation.py (100%) rename chemotools/utils/{whittaker_base => _whittaker_base}/main.py (98%) rename chemotools/utils/{whittaker_base => _whittaker_base}/misc.py (100%) rename chemotools/utils/{whittaker_base => _whittaker_base}/solvers.py (100%) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index cef6326b..a064270a 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -24,7 +24,7 @@ from sklearn.utils.validation import check_is_fitted from chemotools.utils.check_inputs import check_input -from chemotools.utils.whittaker_base import WhittakerLikeSolver +from chemotools.utils._whittaker_base import WhittakerLikeSolver logger = logging.getLogger(__name__) diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index c8ad49cc..3bf21d09 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -27,7 +27,7 @@ from sklearn.utils.validation import check_is_fitted, check_scalar from chemotools.utils.check_inputs import check_input -from chemotools.utils.whittaker_base import WhittakerLikeSolver +from chemotools.utils._whittaker_base import WhittakerLikeSolver logger = logging.getLogger(__name__) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 211207d5..d270091b 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -28,7 +28,7 @@ from chemotools.utils.check_inputs import check_input, check_weights from chemotools.utils.types import RealNumeric -from chemotools.utils.whittaker_base import ( +from chemotools.utils._whittaker_base import ( WhittakerLikeSolver, WhittakerSmoothLambda, WhittakerSmoothMethods, diff --git a/chemotools/utils/whittaker_base/__init__.py b/chemotools/utils/_whittaker_base/__init__.py similarity index 88% rename from chemotools/utils/whittaker_base/__init__.py rename to chemotools/utils/_whittaker_base/__init__.py index 69c23aab..add3d300 100644 --- a/chemotools/utils/whittaker_base/__init__.py +++ b/chemotools/utils/_whittaker_base/__init__.py @@ -18,4 +18,4 @@ WhittakerSmoothLambda, WhittakerSmoothMethods, ) -from chemotools.utils.whittaker_base.main import WhittakerLikeSolver # noqa: F401 +from chemotools.utils._whittaker_base.main import WhittakerLikeSolver # noqa: F401 diff --git a/chemotools/utils/whittaker_base/auto_lambda/__init__.py b/chemotools/utils/_whittaker_base/auto_lambda/__init__.py similarity index 61% rename from chemotools/utils/whittaker_base/auto_lambda/__init__.py rename to chemotools/utils/_whittaker_base/auto_lambda/__init__.py index bba327c5..eb50bf8b 100644 --- a/chemotools/utils/whittaker_base/auto_lambda/__init__.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/__init__.py @@ -7,13 +7,13 @@ class implementation. ### Imports ### -from chemotools.utils.whittaker_base.auto_lambda.logml import ( # noqa: F401 +from chemotools.utils._whittaker_base.auto_lambda.logml import ( # noqa: F401 get_log_marginal_likelihood, get_log_marginal_likelihood_constant_term, ) -from chemotools.utils.whittaker_base.auto_lambda.optimization import ( # noqa: F401 +from chemotools.utils._whittaker_base.auto_lambda.optimization import ( # noqa: F401 get_optimized_lambda, ) -from chemotools.utils.whittaker_base.auto_lambda.shared import ( # noqa: F401 +from chemotools.utils._whittaker_base.auto_lambda.shared import ( # noqa: F401 _Factorization, ) diff --git a/chemotools/utils/whittaker_base/auto_lambda/logml.py b/chemotools/utils/_whittaker_base/auto_lambda/logml.py similarity index 98% rename from chemotools/utils/whittaker_base/auto_lambda/logml.py rename to chemotools/utils/_whittaker_base/auto_lambda/logml.py index 8777400d..4376daa9 100644 --- a/chemotools/utils/whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/logml.py @@ -13,7 +13,7 @@ from chemotools.utils import banded_linalg as bla from chemotools.utils import models -from chemotools.utils.whittaker_base.auto_lambda.shared import get_smooth_wrss +from chemotools.utils._whittaker_base.auto_lambda.shared import get_smooth_wrss ### Constants ### diff --git a/chemotools/utils/whittaker_base/auto_lambda/optimization.py b/chemotools/utils/_whittaker_base/auto_lambda/optimization.py similarity index 100% rename from chemotools/utils/whittaker_base/auto_lambda/optimization.py rename to chemotools/utils/_whittaker_base/auto_lambda/optimization.py diff --git a/chemotools/utils/whittaker_base/auto_lambda/shared.py b/chemotools/utils/_whittaker_base/auto_lambda/shared.py similarity index 100% rename from chemotools/utils/whittaker_base/auto_lambda/shared.py rename to chemotools/utils/_whittaker_base/auto_lambda/shared.py diff --git a/chemotools/utils/whittaker_base/initialisation.py b/chemotools/utils/_whittaker_base/initialisation.py similarity index 100% rename from chemotools/utils/whittaker_base/initialisation.py rename to chemotools/utils/_whittaker_base/initialisation.py diff --git a/chemotools/utils/whittaker_base/main.py b/chemotools/utils/_whittaker_base/main.py similarity index 98% rename from chemotools/utils/whittaker_base/main.py rename to chemotools/utils/_whittaker_base/main.py index a0ed2a15..874cd842 100644 --- a/chemotools/utils/whittaker_base/main.py +++ b/chemotools/utils/_whittaker_base/main.py @@ -17,10 +17,10 @@ from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.utils import models from chemotools.utils.banded_linalg import LAndUBandCounts -from chemotools.utils.whittaker_base import auto_lambda as auto -from chemotools.utils.whittaker_base import initialisation as init -from chemotools.utils.whittaker_base import solvers -from chemotools.utils.whittaker_base.misc import get_weight_generator +from chemotools.utils._whittaker_base import auto_lambda as auto +from chemotools.utils._whittaker_base import initialisation as init +from chemotools.utils._whittaker_base import solvers +from chemotools.utils._whittaker_base.misc import get_weight_generator ### Class Implementation ### diff --git a/chemotools/utils/whittaker_base/misc.py b/chemotools/utils/_whittaker_base/misc.py similarity index 100% rename from chemotools/utils/whittaker_base/misc.py rename to chemotools/utils/_whittaker_base/misc.py diff --git a/chemotools/utils/whittaker_base/solvers.py b/chemotools/utils/_whittaker_base/solvers.py similarity index 100% rename from chemotools/utils/whittaker_base/solvers.py rename to chemotools/utils/_whittaker_base/solvers.py diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py index 562ec707..d4c524a0 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/test_for_utils/test_whittaker_base.py @@ -13,14 +13,14 @@ import pytest from chemotools.utils import models -from chemotools.utils.whittaker_base.auto_lambda.shared import get_smooth_wrss -from chemotools.utils.whittaker_base.initialisation import ( +from chemotools.utils._whittaker_base.auto_lambda.shared import get_smooth_wrss +from chemotools.utils._whittaker_base.initialisation import ( get_checked_lambda, get_penalty_log_pseudo_det, ) -from chemotools.utils.whittaker_base.main import WhittakerLikeSolver -from chemotools.utils.whittaker_base.misc import get_weight_generator -from chemotools.utils.whittaker_base.solvers import solve_normal_equations +from chemotools.utils._whittaker_base.main import WhittakerLikeSolver +from chemotools.utils._whittaker_base.misc import get_weight_generator +from chemotools.utils._whittaker_base.solvers import solve_normal_equations from tests.fixtures import noise_level_whittaker_auto_lambda # noqa: F401 from tests.fixtures import spectrum_whittaker_auto_lambda # noqa: F401 from tests.test_for_utils.utils_funcs import ( diff --git a/tests/test_for_utils/utils_funcs.py b/tests/test_for_utils/utils_funcs.py index 5c1c1bbe..e16f5852 100644 --- a/tests/test_for_utils/utils_funcs.py +++ b/tests/test_for_utils/utils_funcs.py @@ -21,7 +21,7 @@ from scipy.sparse import linalg as spla from chemotools.utils.finite_differences import calc_forward_diff_kernel -from chemotools.utils.whittaker_base import WhittakerLikeSolver +from chemotools.utils._whittaker_base import WhittakerLikeSolver ### Utility Functions ### From 6ef864aa8955b11673d7aa3af152a9010c74dc60 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 14:22:29 +0200 Subject: [PATCH 093/118] refactor: made `banded_linalg` private for now --- chemotools/utils/{banded_linalg.py => _banded_linalg.py} | 0 chemotools/utils/_whittaker_base/auto_lambda/logml.py | 2 +- chemotools/utils/_whittaker_base/initialisation.py | 2 +- chemotools/utils/_whittaker_base/main.py | 2 +- chemotools/utils/_whittaker_base/solvers.py | 2 +- tests/test_for_utils/test_banded_linalg.py | 2 +- 6 files changed, 5 insertions(+), 5 deletions(-) rename chemotools/utils/{banded_linalg.py => _banded_linalg.py} (100%) diff --git a/chemotools/utils/banded_linalg.py b/chemotools/utils/_banded_linalg.py similarity index 100% rename from chemotools/utils/banded_linalg.py rename to chemotools/utils/_banded_linalg.py diff --git a/chemotools/utils/_whittaker_base/auto_lambda/logml.py b/chemotools/utils/_whittaker_base/auto_lambda/logml.py index 4376daa9..0c2b8947 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/logml.py @@ -11,7 +11,7 @@ import numpy as np -from chemotools.utils import banded_linalg as bla +from chemotools.utils import _banded_linalg as bla from chemotools.utils import models from chemotools.utils._whittaker_base.auto_lambda.shared import get_smooth_wrss diff --git a/chemotools/utils/_whittaker_base/initialisation.py b/chemotools/utils/_whittaker_base/initialisation.py index 8f16784f..a2e3588b 100644 --- a/chemotools/utils/_whittaker_base/initialisation.py +++ b/chemotools/utils/_whittaker_base/initialisation.py @@ -10,7 +10,7 @@ import numpy as np -from chemotools.utils import banded_linalg as bla +from chemotools.utils import _banded_linalg as bla from chemotools.utils import finite_differences as fdiff from chemotools.utils import models from chemotools.utils.types import RealNumeric diff --git a/chemotools/utils/_whittaker_base/main.py b/chemotools/utils/_whittaker_base/main.py index 874cd842..91ef2005 100644 --- a/chemotools/utils/_whittaker_base/main.py +++ b/chemotools/utils/_whittaker_base/main.py @@ -16,7 +16,7 @@ from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.utils import models -from chemotools.utils.banded_linalg import LAndUBandCounts +from chemotools.utils._banded_linalg import LAndUBandCounts from chemotools.utils._whittaker_base import auto_lambda as auto from chemotools.utils._whittaker_base import initialisation as init from chemotools.utils._whittaker_base import solvers diff --git a/chemotools/utils/_whittaker_base/solvers.py b/chemotools/utils/_whittaker_base/solvers.py index 01987a1d..e84d26fb 100644 --- a/chemotools/utils/_whittaker_base/solvers.py +++ b/chemotools/utils/_whittaker_base/solvers.py @@ -12,7 +12,7 @@ import numpy as np from chemotools._runtime import PENTAPY_AVAILABLE -from chemotools.utils import banded_linalg as bla +from chemotools.utils import _banded_linalg as bla from chemotools.utils import models if PENTAPY_AVAILABLE: diff --git a/tests/test_for_utils/test_banded_linalg.py b/tests/test_for_utils/test_banded_linalg.py index ba404984..670c2b5f 100644 --- a/tests/test_for_utils/test_banded_linalg.py +++ b/tests/test_for_utils/test_banded_linalg.py @@ -12,7 +12,7 @@ import pytest from scipy.linalg import solve_banded as scipy_solve_banded -from chemotools.utils.banded_linalg import ( +from chemotools.utils._banded_linalg import ( _datacopied, conv_upper_chol_banded_to_lu_banded_storage, lu_banded, From 907baa473299505f8751b2259419e2dfb31d6596 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 14:28:08 +0200 Subject: [PATCH 094/118] refactor: made utility `models` and `types` private --- chemotools/smooth/__init__.py | 2 +- chemotools/smooth/_whittaker_smooth.py | 2 +- chemotools/utils/_banded_linalg.py | 2 +- chemotools/utils/{models.py => _models.py} | 0 chemotools/utils/{types.py => _types.py} | 0 chemotools/utils/_whittaker_base/__init__.py | 2 +- .../_whittaker_base/auto_lambda/logml.py | 4 +- .../auto_lambda/optimization.py | 2 +- .../_whittaker_base/auto_lambda/shared.py | 6 +- .../utils/_whittaker_base/initialisation.py | 18 +++--- chemotools/utils/_whittaker_base/main.py | 12 ++-- chemotools/utils/_whittaker_base/solvers.py | 20 ++++--- tests/test_for_utils/test_models.py | 36 ++++++------ tests/test_for_utils/test_whittaker_base.py | 58 +++++++++---------- tests/test_for_utils/utils_models.py | 6 +- tests/test_functionality.py | 2 +- 16 files changed, 88 insertions(+), 84 deletions(-) rename chemotools/utils/{models.py => _models.py} (100%) rename chemotools/utils/{types.py => _types.py} (100%) diff --git a/chemotools/smooth/__init__.py b/chemotools/smooth/__init__.py index 6ac49119..ed6cdba2 100644 --- a/chemotools/smooth/__init__.py +++ b/chemotools/smooth/__init__.py @@ -16,7 +16,7 @@ ### Imports ### -from chemotools.utils.models import ( # noqa: F401 +from chemotools.utils._models import ( # noqa: F401 WhittakerSmoothLambda, WhittakerSmoothMethods, ) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index d270091b..5c9bc16a 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -27,7 +27,7 @@ from sklearn.utils.validation import check_is_fitted from chemotools.utils.check_inputs import check_input, check_weights -from chemotools.utils.types import RealNumeric +from chemotools.utils._types import RealNumeric from chemotools.utils._whittaker_base import ( WhittakerLikeSolver, WhittakerSmoothLambda, diff --git a/chemotools/utils/_banded_linalg.py b/chemotools/utils/_banded_linalg.py index f59e6a86..7ceb2e6d 100644 --- a/chemotools/utils/_banded_linalg.py +++ b/chemotools/utils/_banded_linalg.py @@ -18,7 +18,7 @@ from numpy.typing import ArrayLike from scipy.linalg import lapack -from chemotools.utils.models import BandedLUFactorization +from chemotools.utils._models import BandedLUFactorization ### Type Aliases ### diff --git a/chemotools/utils/models.py b/chemotools/utils/_models.py similarity index 100% rename from chemotools/utils/models.py rename to chemotools/utils/_models.py diff --git a/chemotools/utils/types.py b/chemotools/utils/_types.py similarity index 100% rename from chemotools/utils/types.py rename to chemotools/utils/_types.py diff --git a/chemotools/utils/_whittaker_base/__init__.py b/chemotools/utils/_whittaker_base/__init__.py index add3d300..35f1249d 100644 --- a/chemotools/utils/_whittaker_base/__init__.py +++ b/chemotools/utils/_whittaker_base/__init__.py @@ -14,7 +14,7 @@ ### Imports ### -from chemotools.utils.models import ( # noqa: F401 +from chemotools.utils._models import ( # noqa: F401 WhittakerSmoothLambda, WhittakerSmoothMethods, ) diff --git a/chemotools/utils/_whittaker_base/auto_lambda/logml.py b/chemotools/utils/_whittaker_base/auto_lambda/logml.py index 0c2b8947..10cbe632 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/logml.py @@ -12,7 +12,7 @@ import numpy as np from chemotools.utils import _banded_linalg as bla -from chemotools.utils import models +from chemotools.utils import _models from chemotools.utils._whittaker_base.auto_lambda.shared import get_smooth_wrss ### Constants ### @@ -22,7 +22,7 @@ ### Type Aliases ### # TODO: add QR factorization -_FactorizationForLogMarginalLikelihood = models.BandedLUFactorization +_FactorizationForLogMarginalLikelihood = _models.BandedLUFactorization ### Functions ### diff --git a/chemotools/utils/_whittaker_base/auto_lambda/optimization.py b/chemotools/utils/_whittaker_base/auto_lambda/optimization.py index 1b44f71b..baf7b116 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/optimization.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/optimization.py @@ -12,7 +12,7 @@ from scipy.optimize import minimize_scalar -from chemotools.utils.models import WhittakerSmoothLambda +from chemotools.utils._models import WhittakerSmoothLambda ### Constants ### diff --git a/chemotools/utils/_whittaker_base/auto_lambda/shared.py b/chemotools/utils/_whittaker_base/auto_lambda/shared.py index db1cf0ac..362b6e07 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/shared.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/shared.py @@ -11,11 +11,13 @@ import numpy as np -from chemotools.utils import models +from chemotools.utils import _models ### Type Aliases ### -_Factorization = Union[models.BandedLUFactorization, models.BandedPentapyFactorization] +_Factorization = Union[ + _models.BandedLUFactorization, _models.BandedPentapyFactorization +] ### Functions ### diff --git a/chemotools/utils/_whittaker_base/initialisation.py b/chemotools/utils/_whittaker_base/initialisation.py index a2e3588b..740e8ad6 100644 --- a/chemotools/utils/_whittaker_base/initialisation.py +++ b/chemotools/utils/_whittaker_base/initialisation.py @@ -12,14 +12,14 @@ from chemotools.utils import _banded_linalg as bla from chemotools.utils import finite_differences as fdiff -from chemotools.utils import models -from chemotools.utils.types import RealNumeric +from chemotools.utils import _models +from chemotools.utils._types import RealNumeric ### Type Aliases ### _StrWhittakerSmoothMethods = Literal["fixed", "logml"] _AllWhittakerSmoothMethods = Union[ - models.WhittakerSmoothMethods, _StrWhittakerSmoothMethods + _models.WhittakerSmoothMethods, _StrWhittakerSmoothMethods ] _WhittakerSmoothLambdaPlain = Tuple[ RealNumeric, @@ -29,7 +29,7 @@ _LambdaSpecs = Union[ RealNumeric, _WhittakerSmoothLambdaPlain, - models.WhittakerSmoothLambda, + _models.WhittakerSmoothLambda, ] ### Constants ### @@ -39,7 +39,7 @@ ### Functions ### -def get_checked_lambda(lam: Any) -> models.WhittakerSmoothLambda: +def get_checked_lambda(lam: Any) -> _models.WhittakerSmoothLambda: """ Checks the penalty weights lambda and casts it to the respective dataclass used inside the ``WhittakerLikeSolver`` class. @@ -48,14 +48,14 @@ def get_checked_lambda(lam: Any) -> models.WhittakerSmoothLambda: # if lambda is already the correct dataclass, it can be returned directly since all # the checks have already been performed - if isinstance(lam, models.WhittakerSmoothLambda): + if isinstance(lam, _models.WhittakerSmoothLambda): return lam # now, there are other cases to check # Case 1: lambda is a single number if isinstance(lam, RealNumericTypes): - return models.WhittakerSmoothLambda( - bounds=lam, method=models.WhittakerSmoothMethods.FIXED + return _models.WhittakerSmoothLambda( + bounds=lam, method=_models.WhittakerSmoothMethods.FIXED ) # Case 2: lambda is a tuple @@ -69,7 +69,7 @@ def get_checked_lambda(lam: Any) -> models.WhittakerSmoothLambda: ) # otherwise, the tuple is unpacked and the dataclass is created - return models.WhittakerSmoothLambda( + return _models.WhittakerSmoothLambda( bounds=(lam[0], lam[1]), method=lam[2], ) diff --git a/chemotools/utils/_whittaker_base/main.py b/chemotools/utils/_whittaker_base/main.py index 91ef2005..c7aad6f2 100644 --- a/chemotools/utils/_whittaker_base/main.py +++ b/chemotools/utils/_whittaker_base/main.py @@ -15,7 +15,7 @@ import numpy as np from chemotools._runtime import PENTAPY_AVAILABLE -from chemotools.utils import models +from chemotools.utils import _models from chemotools.utils._banded_linalg import LAndUBandCounts from chemotools.utils._whittaker_base import auto_lambda as auto from chemotools.utils._whittaker_base import initialisation as init @@ -113,7 +113,7 @@ def _setup_for_fit( # the input arguments are stored and validated self.n_data_: int = n_data self.differences_: int = differences - self._lam_inter_: models.WhittakerSmoothLambda = init.get_checked_lambda( + self._lam_inter_: _models.WhittakerSmoothLambda = init.get_checked_lambda( lam=lam ) self.__child_class_name: str = child_class_name @@ -147,7 +147,7 @@ def _setup_for_fit( self._diff_kernel_flipped_: np.ndarray = np.ndarray([], dtype=self.__dtype) self._penalty_mat_log_pseudo_det_: float = float("nan") if self._lam_inter_.fit_auto and self._lam_inter_.method_used in { - models.WhittakerSmoothMethods.LOGML, + _models.WhittakerSmoothMethods.LOGML, }: # NOTE: the kernel is also returned with integer entries because integer # computations can be carried out at maximum precision @@ -178,7 +178,7 @@ def _solve( lam: float, b_weighted: np.ndarray, w: Union[float, np.ndarray], - ) -> tuple[np.ndarray, models.BandedSolvers, auto._Factorization]: + ) -> tuple[np.ndarray, _models.BandedSolvers, auto._Factorization]: """ Internal wrapper for the solver methods to solve the linear system of equations for the Whittaker-like smoother. @@ -422,8 +422,8 @@ def _whittaker_solve( # first, the smoothing method is specified depending on whether the penalty # weight lambda is fitted automatically or not smooth_method_assignment = { - models.WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, - models.WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_logml, + _models.WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, + _models.WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_logml, } smooth_method = smooth_method_assignment[self._lam_inter_.method_used] diff --git a/chemotools/utils/_whittaker_base/solvers.py b/chemotools/utils/_whittaker_base/solvers.py index e84d26fb..816a899a 100644 --- a/chemotools/utils/_whittaker_base/solvers.py +++ b/chemotools/utils/_whittaker_base/solvers.py @@ -13,14 +13,16 @@ from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.utils import _banded_linalg as bla -from chemotools.utils import models +from chemotools.utils import _models if PENTAPY_AVAILABLE: import pentapy as pp ### Type Aliases ### -_Factorization = Union[models.BandedLUFactorization, models.BandedPentapyFactorization] +_Factorization = Union[ + _models.BandedLUFactorization, _models.BandedPentapyFactorization +] ### Functions ### @@ -71,7 +73,7 @@ def solve_ppivoted_lu( l_and_u: bla.LAndUBandCounts, a_banded: np.ndarray, b_weighted: np.ndarray, -) -> tuple[np.ndarray, models.BandedLUFactorization]: +) -> tuple[np.ndarray, _models.BandedLUFactorization]: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with a partially pivoted LU decomposition. This is the same as solving the linear system @@ -107,7 +109,7 @@ def solve_normal_equations( b_weighted: np.ndarray, w: Union[float, np.ndarray], pentapy_enabled: bool, -) -> tuple[np.ndarray, models.BandedSolvers, _Factorization]: +) -> tuple[np.ndarray, _models.BandedSolvers, _Factorization]: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` where ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and ``D`` is @@ -185,8 +187,8 @@ def solve_normal_equations( if np.isfinite(x).all(): return ( x, - models.BandedSolvers.PENTAPY, - models.BandedPentapyFactorization(), + _models.BandedSolvers.PENTAPY, + _models.BandedPentapyFactorization(), ) # Case 2: LU decomposition (final fallback for pentapy) @@ -198,14 +200,14 @@ def solve_normal_equations( ) return ( x, - models.BandedSolvers.PIVOTED_LU, + _models.BandedSolvers.PIVOTED_LU, lub_factorization, ) except np.linalg.LinAlgError: - available_solvers = f"{models.BandedSolvers.PIVOTED_LU}" + available_solvers = f"{_models.BandedSolvers.PIVOTED_LU}" if pentapy_enabled: - available_solvers = f"{models.BandedSolvers.PENTAPY}, {available_solvers}" + available_solvers = f"{_models.BandedSolvers.PENTAPY}, {available_solvers}" raise RuntimeError( f"\nAll available solvers ({available_solvers}) failed to solve the " diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index 28b4d232..cfe67ad6 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -10,7 +10,7 @@ import pytest -from chemotools.utils import models +from chemotools.utils import _models from tests.test_for_utils.utils_models import ExpectedWhittakerSmoothLambda ### Type aliases ### @@ -18,7 +18,7 @@ _RealNumeric = Union[float, int] _LambdaValueNumeric = Union[_RealNumeric, Tuple[_RealNumeric, _RealNumeric]] _LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] -_WhittakerMethod = Union[str, models.WhittakerSmoothMethods] +_WhittakerMethod = Union[str, _models.WhittakerSmoothMethods] _WhittakerMethodSequence = List[_WhittakerMethod] _LambdaTestCombination = Tuple[ _LambdaValueNumericOrFlawed, @@ -31,11 +31,11 @@ _NAN: float = float("nan") _FIXED_WHITTAKER_METHODS: _WhittakerMethodSequence = [ "fixed", - models.WhittakerSmoothMethods.FIXED, + _models.WhittakerSmoothMethods.FIXED, ] _LOGML_WHITTAKER_METHODS: _WhittakerMethodSequence = [ "logml", - models.WhittakerSmoothMethods.LOGML, + _models.WhittakerSmoothMethods.LOGML, ] # NOTE: "aauto" is not a typo, but helps to not confuse it with "all" _aauto_whittaker_methods: _WhittakerMethodSequence = _LOGML_WHITTAKER_METHODS + [] @@ -57,7 +57,7 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -68,7 +68,7 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -79,7 +79,7 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -90,7 +90,7 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -101,7 +101,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -112,7 +112,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -123,7 +123,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -134,7 +134,7 @@ fixed_lambda=100.000001, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -145,7 +145,7 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), @@ -156,7 +156,7 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), @@ -167,7 +167,7 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), @@ -178,7 +178,7 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), @@ -283,7 +283,7 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N if not isinstance(expected_result, ExpectedWhittakerSmoothLambda): for meth in methods: with pytest.raises(expected_result): - models.WhittakerSmoothLambda( + _models.WhittakerSmoothLambda( bounds=lambda_value, # type: ignore method=meth, # type: ignore ) @@ -293,7 +293,7 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N # if the expected result is a valid result, the class is instantiated and the # generated object is compared to the expected result for meth in methods: - lambda_model = models.WhittakerSmoothLambda( + lambda_model = _models.WhittakerSmoothLambda( bounds=lambda_value, # type: ignore method=meth, # type: ignore ) diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py index d4c524a0..c4d1da2d 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/test_for_utils/test_whittaker_base.py @@ -12,7 +12,7 @@ import numpy as np import pytest -from chemotools.utils import models +from chemotools.utils import _models from chemotools.utils._whittaker_base.auto_lambda.shared import get_smooth_wrss from chemotools.utils._whittaker_base.initialisation import ( get_checked_lambda, @@ -31,7 +31,7 @@ ### Type Aliases ### _RealNumeric = Union[float, int] -_WhittakerMethod = Union[str, models.WhittakerSmoothMethods] +_WhittakerMethod = Union[str, _models.WhittakerSmoothMethods] _LambdaSpecs = Union[_RealNumeric, Tuple[_RealNumeric, _RealNumeric, _WhittakerMethod]] _LambdaSpecsOrFlawed = Union[_LambdaSpecs, str] @@ -51,7 +51,7 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), @@ -61,17 +61,17 @@ fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), ( # Number 2 (float search space, log marginal likelihood method enum) - (100.0, 10_000.0, models.WhittakerSmoothMethods.LOGML), + (100.0, 10_000.0, _models.WhittakerSmoothMethods.LOGML), ExpectedWhittakerSmoothLambda( fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), @@ -81,17 +81,17 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), ( # Number 4 (integer search space, log marginal likelihood method enum) - (100, 10_000, models.WhittakerSmoothMethods.LOGML), + (100, 10_000, _models.WhittakerSmoothMethods.LOGML), ExpectedWhittakerSmoothLambda( fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), @@ -101,59 +101,59 @@ fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), ( # Number 6 (dataclass float specification; fixed method) - models.WhittakerSmoothLambda( + _models.WhittakerSmoothLambda( bounds=100.0, - method=models.WhittakerSmoothMethods.FIXED, + method=_models.WhittakerSmoothMethods.FIXED, ), ExpectedWhittakerSmoothLambda( fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), ( # Number 7 (dataclass integer specification; fixed method) - models.WhittakerSmoothLambda( + _models.WhittakerSmoothLambda( bounds=100, - method=models.WhittakerSmoothMethods.FIXED, + method=_models.WhittakerSmoothMethods.FIXED, ), ExpectedWhittakerSmoothLambda( fixed_lambda=100.0, auto_bounds=(_NAN, _NAN), fit_auto=False, - method_used=models.WhittakerSmoothMethods.FIXED, + method_used=_models.WhittakerSmoothMethods.FIXED, log_auto_bounds=(_NAN, _NAN), ), ), ( # Number 8 (dataclass float specification; log marginal likelihood method) - models.WhittakerSmoothLambda( + _models.WhittakerSmoothLambda( bounds=(100.0, 10_000.0), - method=models.WhittakerSmoothMethods.LOGML, + method=_models.WhittakerSmoothMethods.LOGML, ), ExpectedWhittakerSmoothLambda( fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), ( # Number 9 (dataclass integer specification; log marginal likelihood method) - models.WhittakerSmoothLambda( + _models.WhittakerSmoothLambda( bounds=(100, 10_000), - method=models.WhittakerSmoothMethods.LOGML, + method=_models.WhittakerSmoothMethods.LOGML, ), ExpectedWhittakerSmoothLambda( fixed_lambda=_NAN, auto_bounds=(100.0, 10_000.0), fit_auto=True, - method_used=models.WhittakerSmoothMethods.LOGML, + method_used=_models.WhittakerSmoothMethods.LOGML, log_auto_bounds=(log(100.0), log(10_000.0)), ), ), @@ -195,7 +195,7 @@ def test_get_checked_lambda( # otherwise, the output dataclass is compared to the expected output lambda_model = get_checked_lambda(lam=lam) - if isinstance(lambda_model, models.WhittakerSmoothLambda): + if isinstance(lambda_model, _models.WhittakerSmoothLambda): expected_result.assert_is_equal_to(other=lambda_model) return @@ -422,9 +422,9 @@ def test_whittakerlike_issues_warning_difference_order_too_high() -> None: whitt_base._setup_for_fit( n_data=500, differences=3, - lam=models.WhittakerSmoothLambda( + lam=_models.WhittakerSmoothLambda( bounds=(100.0, 10_000.0), - method=models.WhittakerSmoothMethods.LOGML, + method=_models.WhittakerSmoothMethods.LOGML, ), child_class_name="pytest_run", ) @@ -452,9 +452,9 @@ def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( whitt_base._setup_for_fit( n_data=n_data, differences=differences, - lam=models.WhittakerSmoothLambda( + lam=_models.WhittakerSmoothLambda( bounds=(100.0, 10_000.0), - method=models.WhittakerSmoothMethods.LOGML, + method=_models.WhittakerSmoothMethods.LOGML, ), child_class_name="pytest_run", ) @@ -531,9 +531,9 @@ def test_auto_lambda_log_marginal_likelihood( whitt_base._setup_for_fit( n_data=X.shape[1], differences=differences, - lam=models.WhittakerSmoothLambda( + lam=_models.WhittakerSmoothLambda( bounds=lambda_bounds, - method=models.WhittakerSmoothMethods.LOGML, + method=_models.WhittakerSmoothMethods.LOGML, ), child_class_name="pytest_run", ) diff --git a/tests/test_for_utils/utils_models.py b/tests/test_for_utils/utils_models.py index 55723dc2..30b8a28b 100644 --- a/tests/test_for_utils/utils_models.py +++ b/tests/test_for_utils/utils_models.py @@ -9,7 +9,7 @@ from dataclasses import dataclass from typing import Tuple -from chemotools.utils import models +from chemotools.utils import _models from tests.test_for_utils.utils_funcs import float_is_bit_equal ### Dataclasses ### @@ -26,10 +26,10 @@ class ExpectedWhittakerSmoothLambda: fixed_lambda: float auto_bounds: Tuple[float, float] fit_auto: bool - method_used: models.WhittakerSmoothMethods + method_used: _models.WhittakerSmoothMethods log_auto_bounds: Tuple[float, float] = (0.0, 0.0) - def assert_is_equal_to(self, other: models.WhittakerSmoothLambda) -> None: + def assert_is_equal_to(self, other: _models.WhittakerSmoothLambda) -> None: """ Checks if the current instance is equal to another instance of the same class. diff --git a/tests/test_functionality.py b/tests/test_functionality.py index f5ea8ca0..35234436 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -29,7 +29,7 @@ StandardNormalVariate, ) from chemotools.smooth import MeanFilter, MedianFilter, WhittakerSmooth -from chemotools.utils.models import BandedSolvers +from chemotools.utils._models import BandedSolvers from tests.fixtures import reference_airpls # noqa: F401 from tests.fixtures import reference_arpls # noqa: F401 from tests.fixtures import reference_msc_mean # noqa: F401 From face1850615a58beafb85da04478f62900701f8e Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 14:36:17 +0200 Subject: [PATCH 095/118] refactor: made `finite_differences` a private utility module for now --- .../utils/{finite_differences.py => _finite_differences.py} | 0 chemotools/utils/_whittaker_base/initialisation.py | 2 +- tests/test_for_utils/test_finite_differences.py | 2 +- tests/test_for_utils/utils_funcs.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename chemotools/utils/{finite_differences.py => _finite_differences.py} (100%) diff --git a/chemotools/utils/finite_differences.py b/chemotools/utils/_finite_differences.py similarity index 100% rename from chemotools/utils/finite_differences.py rename to chemotools/utils/_finite_differences.py diff --git a/chemotools/utils/_whittaker_base/initialisation.py b/chemotools/utils/_whittaker_base/initialisation.py index 740e8ad6..090f4b86 100644 --- a/chemotools/utils/_whittaker_base/initialisation.py +++ b/chemotools/utils/_whittaker_base/initialisation.py @@ -11,7 +11,7 @@ import numpy as np from chemotools.utils import _banded_linalg as bla -from chemotools.utils import finite_differences as fdiff +from chemotools.utils import _finite_differences as fdiff from chemotools.utils import _models from chemotools.utils._types import RealNumeric diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index 43e34628..5818548d 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from chemotools.utils.finite_differences import ( +from chemotools.utils._finite_differences import ( calc_forward_diff_kernel, gen_squ_fw_fin_diff_mat_cho_banded, ) diff --git a/tests/test_for_utils/utils_funcs.py b/tests/test_for_utils/utils_funcs.py index e16f5852..5adf91cf 100644 --- a/tests/test_for_utils/utils_funcs.py +++ b/tests/test_for_utils/utils_funcs.py @@ -20,7 +20,7 @@ from scipy.sparse import diags as sp_diags from scipy.sparse import linalg as spla -from chemotools.utils.finite_differences import calc_forward_diff_kernel +from chemotools.utils._finite_differences import calc_forward_diff_kernel from chemotools.utils._whittaker_base import WhittakerLikeSolver ### Utility Functions ### From 6885f7c0f53f18fb772901db2e69cc31a28c14a3 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 15:11:16 +0200 Subject: [PATCH 096/118] style: made variable names of Whittaker base more concise with special focus on single character names; unified variable names globally --- chemotools/smooth/_whittaker_smooth.py | 4 +- .../_whittaker_base/auto_lambda/logml.py | 25 +++--- .../_whittaker_base/auto_lambda/shared.py | 12 +-- chemotools/utils/_whittaker_base/main.py | 79 ++++++++++--------- chemotools/utils/_whittaker_base/misc.py | 16 ++-- chemotools/utils/_whittaker_base/solvers.py | 44 ++++++----- tests/test_for_utils/test_whittaker_base.py | 16 ++-- tests/test_functionality.py | 8 +- 8 files changed, 111 insertions(+), 93 deletions(-) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 5c9bc16a..025fea6b 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -26,13 +26,13 @@ from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted -from chemotools.utils.check_inputs import check_input, check_weights from chemotools.utils._types import RealNumeric from chemotools.utils._whittaker_base import ( WhittakerLikeSolver, WhittakerSmoothLambda, WhittakerSmoothMethods, ) +from chemotools.utils.check_inputs import check_input, check_weights ### Main Class ### @@ -288,7 +288,7 @@ def transform( # Calculate the whittaker smooth return self._whittaker_solve( - X=X_, w=sample_weight_checked, use_same_w_for_all=use_same_w_for_all + X=X_, weights=sample_weight_checked, use_same_w_for_all=use_same_w_for_all )[0] def fit_transform( diff --git a/chemotools/utils/_whittaker_base/auto_lambda/logml.py b/chemotools/utils/_whittaker_base/auto_lambda/logml.py index 10cbe632..b8d1d250 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/logml.py @@ -30,7 +30,7 @@ def get_log_marginal_likelihood_constant_term( differences: int, penalty_mat_log_pseudo_det: float, - w: np.ndarray, + weights: np.ndarray, zero_weight_tol: float, ) -> float: """ @@ -64,9 +64,9 @@ def get_log_marginal_likelihood_constant_term( # first, the constant terms of the log marginal likelihood are computed starting # from the log pseudo-determinant of the weight matrix, i.e., the product of the # non-zero elements of the weight vector - nonzero_w_flags = w > w.max() * zero_weight_tol + nonzero_w_flags = weights > weights.max() * zero_weight_tol nnz_w = nonzero_w_flags.sum() - log_pseudo_det_w = np.log(w[nonzero_w_flags]).sum() + log_pseudo_det_w = np.log(weights[nonzero_w_flags]).sum() # the constant term of the log marginal likelihood is computed return ( @@ -82,9 +82,9 @@ def get_log_marginal_likelihood( lam: float, differences: int, diff_kernel_flipped: np.ndarray, - b: np.ndarray, - b_smooth: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + rhs_b_smooth: np.ndarray, + weights: Union[float, np.ndarray], w_plus_penalty_plus_n_samples_term: float, ) -> float: """ @@ -153,13 +153,18 @@ def get_log_marginal_likelihood( """ # noqa: E501 # first, the weighted Sum of Squared Residuals is computed ... - wrss = get_smooth_wrss(b=b, b_smooth=b_smooth, w=w) + wrss = get_smooth_wrss( + rhs_b=rhs_b, + rhs_b_smooth=rhs_b_smooth, + weights=weights, + ) # ... followed by the Penalty Sum of Squares which requires the squared forward # finite differences of the smoothed series # NOTE: ``np.convolve`` is used to compute the forward finite differences and # since it flips the provided kernel, an already flipped kernel is used pss = ( - lam * np.square(np.convolve(b_smooth, diff_kernel_flipped, mode="valid")).sum() + lam + * np.square(np.convolve(rhs_b_smooth, diff_kernel_flipped, mode="valid")).sum() ) # besides the determinant of the combined left hand side matrix has to be @@ -174,7 +179,7 @@ def get_log_marginal_likelihood( return -0.5 * ( wrss + pss - - (b.size - differences) * log_lam + - (rhs_b.size - differences) * log_lam + lhs_logabsdet + w_plus_penalty_plus_n_samples_term ) @@ -183,7 +188,7 @@ def get_log_marginal_likelihood( # ill-conditioned and the log marginal likelihood cannot be computed # NOTE: since it is very hard to trigger this exception, it is not covered by the # tests - raise RuntimeError( # pragma: no cover + raise RuntimeError( # pragma: no cover "\nThe determinant of the combined left hand side matrix " "W + lambda * D.T @ D is negative, indicating that the system is extremely " "ill-conditioned.\n" diff --git a/chemotools/utils/_whittaker_base/auto_lambda/shared.py b/chemotools/utils/_whittaker_base/auto_lambda/shared.py index 362b6e07..0f95c8eb 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/shared.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/shared.py @@ -23,9 +23,9 @@ def get_smooth_wrss( - b: np.ndarray, - b_smooth: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + rhs_b_smooth: np.ndarray, + weights: Union[float, np.ndarray], ) -> float: """ Computes the (weighted) Sum of Squared Residuals (w)RSS between the original and @@ -34,8 +34,8 @@ def get_smooth_wrss( """ # Case 1: no weights are provided - if isinstance(w, float): - return np.square(b - b_smooth).sum() + if isinstance(weights, float): + return np.square(rhs_b - rhs_b_smooth).sum() # Case 2: weights are provided - return (w * np.square(b - b_smooth)).sum() + return (weights * np.square(rhs_b - rhs_b_smooth)).sum() diff --git a/chemotools/utils/_whittaker_base/main.py b/chemotools/utils/_whittaker_base/main.py index c7aad6f2..5aae7e70 100644 --- a/chemotools/utils/_whittaker_base/main.py +++ b/chemotools/utils/_whittaker_base/main.py @@ -176,8 +176,8 @@ def _setup_for_fit( def _solve( self, lam: float, - b_weighted: np.ndarray, - w: Union[float, np.ndarray], + rhs_b_weighted: np.ndarray, + weights: Union[float, np.ndarray], ) -> tuple[np.ndarray, _models.BandedSolvers, auto._Factorization]: """ Internal wrapper for the solver methods to solve the linear system of equations @@ -197,8 +197,8 @@ def _solve( differences=self.differences_, l_and_u=self._l_and_u_, penalty_mat_banded=self._penalty_mat_banded_, - b_weighted=b_weighted, - w=w, + rhs_b_weighted=rhs_b_weighted, + weights=weights, pentapy_enabled=self._pentapy_enabled_, ) @@ -207,8 +207,8 @@ def _solve( def _marginal_likelihood_objective( self, log_lam: float, - b: np.ndarray, - w: np.ndarray, + rhs_b: np.ndarray, + weights: np.ndarray, w_plus_penalty_plus_n_samples_term: float, ) -> float: """ @@ -226,8 +226,8 @@ def _marginal_likelihood_objective( # the solution of the linear system of equations is computed b_smooth, _, factorization = self._solve( lam=lam, - b_weighted=b * w, - w=w, + rhs_b_weighted=rhs_b * weights, + weights=weights, ) # finally, the log marginal likelihood is computed and returned (negative since @@ -239,9 +239,9 @@ def _marginal_likelihood_objective( lam=lam, differences=self.differences_, diff_kernel_flipped=self._diff_kernel_flipped_, - b=b, - b_smooth=b_smooth, - w=w, + rhs_b=rhs_b, + rhs_b_smooth=b_smooth, + weights=weights, w_plus_penalty_plus_n_samples_term=w_plus_penalty_plus_n_samples_term, ) @@ -249,8 +249,8 @@ def _marginal_likelihood_objective( def _solve_single_b_fixed_lam( self, - b: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + weights: Union[float, np.ndarray], lam: Optional[float] = None, ) -> tuple[np.ndarray, float]: """ @@ -271,12 +271,12 @@ def _solve_single_b_fixed_lam( # the most efficient way around going into this method in the first place; # in the future this might change and thus, this case is kept for now, but # ignored for coverage - if isinstance(w, float): # pragma: no cover + if isinstance(weights, float): # pragma: no cover return ( self._solve( lam=lam, - b_weighted=b, - w=w, + rhs_b_weighted=rhs_b, + weights=weights, )[0], lam, ) @@ -285,16 +285,16 @@ def _solve_single_b_fixed_lam( return ( self._solve( lam=lam, - b_weighted=b * w, - w=w, + rhs_b_weighted=rhs_b * weights, + weights=weights, )[0], lam, ) def _solve_single_b_auto_lam_logml( self, - b: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + weights: Union[float, np.ndarray], ) -> tuple[np.ndarray, float]: """ Solves for the Whittaker-like smoother solution for a single series with an @@ -305,7 +305,7 @@ def _solve_single_b_auto_lam_logml( # if the weights are not provided, the log marginal likelihood cannot be # computed - at least not in a meaningful way - if isinstance(w, (float, int)): + if isinstance(weights, (float, int)): raise ValueError( "\nAutomatic fitting of the penalty weight lambda by maximizing the " "log marginal likelihood is only possible if weights are provided.\n" @@ -316,7 +316,7 @@ def _solve_single_b_auto_lam_logml( w_plus_n_samples_term = auto.get_log_marginal_likelihood_constant_term( differences=self.differences_, penalty_mat_log_pseudo_det=self._penalty_mat_log_pseudo_det_, - w=w, + weights=weights, zero_weight_tol=self.__zero_weight_tol, ) @@ -324,17 +324,21 @@ def _solve_single_b_auto_lam_logml( opt_lambda = auto.get_optimized_lambda( fun=self._marginal_likelihood_objective, lam=self._lam_inter_, - args=(b, w, w_plus_n_samples_term), + args=(rhs_b, weights, w_plus_n_samples_term), ) # the optimal penalty weight lambda is returned together with the smoothed # series - return self._solve_single_b_fixed_lam(b=b, w=w, lam=opt_lambda) + return self._solve_single_b_fixed_lam( + rhs_b=rhs_b, + weights=weights, + lam=opt_lambda, + ) def _solve_multiple_b( self, X: np.ndarray, - w: Optional[np.ndarray], + weights: Optional[np.ndarray], ) -> tuple[np.ndarray, np.ndarray]: """ Solves for the Whittaker-like smoother solution for multiple series when the @@ -349,19 +353,19 @@ def _solve_multiple_b( # then, the solution of the linear system of equations is computed for the # transposed series matrix (expected right-hand side format for the solvers) # Case 1: no weights are provided - if w is None: + if weights is None: X_smooth, _, _ = self._solve( lam=self._lam_inter_.fixed_lambda, - b_weighted=X.transpose(), - w=1.0, + rhs_b_weighted=X.transpose(), + weights=1.0, ) # Case 2: weights are provided else: X_smooth, _, _ = self._solve( lam=self._lam_inter_.fixed_lambda, - b_weighted=(X * w).transpose(), - w=w[0, ::], + rhs_b_weighted=(X * weights).transpose(), + weights=weights[0, ::], ) return ( @@ -375,7 +379,7 @@ def _whittaker_solve( self, X: np.ndarray, *, - w: Optional[np.ndarray] = None, + weights: Optional[np.ndarray] = None, use_same_w_for_all: bool = False, ) -> tuple[np.ndarray, np.ndarray]: """ @@ -389,7 +393,7 @@ def _whittaker_solve( ---------- X : ndarray of shape (m, n) The series to be smoothed stored as individual rows. - w : ndarray of shape(1, n) or shape(m, n) or None + weights : ndarray of shape(1, n) or shape(m, n) or None The weights to be applied for smoothing. If only a single row is provided and ``use_same_w_for_all`` is ``True``, the same weights can be applied for all series in ``X``, which enhances the smoothing a lot for fixed @@ -415,7 +419,7 @@ def _whittaker_solve( # can be done more efficiently by leveraging LAPACK'S (not pentapy's) ability to # perform multiple solves from the same inversion at once if use_same_w_for_all and not self._lam_inter_.fit_auto: - return self._solve_multiple_b(X=X, w=w) + return self._solve_multiple_b(X=X, weights=weights) # otherwise, the solution of the linear system of equations is computed for # each series @@ -430,8 +434,11 @@ def _whittaker_solve( # then, the solution is computed for each series by means of a loop X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) - w_gen = get_weight_generator(w=w, n_series=X.shape[0]) - for iter_i, (x_vect, w_vect) in enumerate(zip(X, w_gen)): - X_smooth[iter_i], lam[iter_i] = smooth_method(b=x_vect, w=w_vect) + w_gen = get_weight_generator(weights=weights, n_series=X.shape[0]) + for iter_i, (x_vect, wght) in enumerate(zip(X, w_gen)): + X_smooth[iter_i], lam[iter_i] = smooth_method( + rhs_b=x_vect, + weights=wght, + ) return X_smooth, lam diff --git a/chemotools/utils/_whittaker_base/misc.py b/chemotools/utils/_whittaker_base/misc.py index 35f8fd5e..d6a191ce 100644 --- a/chemotools/utils/_whittaker_base/misc.py +++ b/chemotools/utils/_whittaker_base/misc.py @@ -14,7 +14,7 @@ def get_weight_generator( - w: Any, + weights: Any, n_series: int, ) -> Generator[Union[float, np.ndarray], None, None]: """ @@ -24,25 +24,25 @@ def get_weight_generator( """ # if the weights are neither None nor a 2D-Array, an error is raised - if not (w is None or isinstance(w, np.ndarray)): + if not (weights is None or isinstance(weights, np.ndarray)): raise TypeError( f"The weights must either be None or a NumPy-2D-Array, but they are of " - f"type '{type(w)}'." + f"type '{type(weights)}'." ) # Case 1: No weights - if w is None: + if weights is None: for _ in range(n_series): yield 1.0 # Case 2: 2D weights - elif w.ndim == 2: + elif weights.ndim == 2: for idx in range(0, n_series): - yield w[idx] + yield weights[idx] # Case 3: Invalid weights - elif w.ndim != 2: + elif weights.ndim != 2: raise ValueError( f"If provided as an Array, the weights must be a 2D-Array, but they are " - f"{w.ndim}-dimensional with shape {w.shape}." + f"{weights.ndim}-dimensional with shape {weights.shape}." ) diff --git a/chemotools/utils/_whittaker_base/solvers.py b/chemotools/utils/_whittaker_base/solvers.py index 816a899a..e8bd2f80 100644 --- a/chemotools/utils/_whittaker_base/solvers.py +++ b/chemotools/utils/_whittaker_base/solvers.py @@ -27,7 +27,10 @@ ### Functions ### -def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: +def solve_pentapy( + lhs_a_banded: np.ndarray, + rhs_b_weighted: np.ndarray, +) -> np.ndarray: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with the ``pentapy`` package. This is the same as solving the linear system ``A @ x = b`` @@ -41,10 +44,10 @@ def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: """ # for 1-dimensional right-hand side vectors, the solution is computed directly - if b_weighted.ndim == 1: + if rhs_b_weighted.ndim == 1: return pp.solve( - mat=a_banded, - rhs=b_weighted, + mat=lhs_a_banded, + rhs=rhs_b_weighted, is_flat=True, index_row_wise=False, solver=1, @@ -56,11 +59,11 @@ def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: # NOTE: the solutions are first written into the rows of the solution matrix # because row-access is more efficient for C-contiguous arrays; # afterwards, the solution matrix is transposed - solution = np.empty(shape=(b_weighted.shape[1], b_weighted.shape[0])) - for iter_j in range(0, b_weighted.shape[1]): + solution = np.empty(shape=(rhs_b_weighted.shape[1], rhs_b_weighted.shape[0])) + for iter_j in range(0, rhs_b_weighted.shape[1]): solution[iter_j, ::] = pp.solve( - mat=a_banded, - rhs=b_weighted[::, iter_j], + mat=lhs_a_banded, + rhs=rhs_b_weighted[::, iter_j], is_flat=True, index_row_wise=False, solver=1, @@ -71,8 +74,8 @@ def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: def solve_ppivoted_lu( l_and_u: bla.LAndUBandCounts, - a_banded: np.ndarray, - b_weighted: np.ndarray, + lhs_a_banded: np.ndarray, + rhs_b_weighted: np.ndarray, ) -> tuple[np.ndarray, _models.BandedLUFactorization]: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with a @@ -87,13 +90,13 @@ def solve_ppivoted_lu( lub_factorization = bla.lu_banded( l_and_u=l_and_u, - ab=a_banded, + ab=lhs_a_banded, check_finite=False, ) return ( bla.lu_solve_banded( lub_factorization=lub_factorization, - b=b_weighted, + b=rhs_b_weighted, check_finite=False, overwrite_b=True, ), @@ -106,8 +109,8 @@ def solve_normal_equations( differences: int, l_and_u: bla.LAndUBandCounts, penalty_mat_banded: np.ndarray, - b_weighted: np.ndarray, - w: Union[float, np.ndarray], + rhs_b_weighted: np.ndarray, + weights: Union[float, np.ndarray], pentapy_enabled: bool, ) -> tuple[np.ndarray, _models.BandedSolvers, _Factorization]: """ @@ -177,13 +180,16 @@ def solve_normal_equations( # the banded storage format for the LAPACK LU decomposition is computed by # scaling the penalty matrix with the penalty weight lambda and then adding the # diagonal matrix with the weights - a_banded = lam * penalty_mat_banded - a_banded[differences, ::] += w + lhs_a_banded = lam * penalty_mat_banded + lhs_a_banded[differences, ::] += weights # the linear system of equations is solved with the most efficient method # Case 1: Pentapy can be used if pentapy_enabled: - x = solve_pentapy(a_banded=a_banded, b_weighted=b_weighted) + x = solve_pentapy( + lhs_a_banded=lhs_a_banded, + rhs_b_weighted=rhs_b_weighted, + ) if np.isfinite(x).all(): return ( x, @@ -195,8 +201,8 @@ def solve_normal_equations( try: x, lub_factorization = solve_ppivoted_lu( l_and_u=l_and_u, - a_banded=a_banded, - b_weighted=b_weighted, + lhs_a_banded=lhs_a_banded, + rhs_b_weighted=rhs_b_weighted, ) return ( x, diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py index c4d1da2d..0ee04120 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/test_for_utils/test_whittaker_base.py @@ -252,7 +252,7 @@ def test_weight_generator( # if the expected output is an exception, the test is run in a context manager if not isinstance(expected_output, (np.ndarray, float, int)): with pytest.raises(expected_output): - for _ in get_weight_generator(w=weights, n_series=n_series): + for _ in get_weight_generator(weights=weights, n_series=n_series): pass return @@ -260,14 +260,14 @@ def test_weight_generator( # otherwise, the output is compared to the expected output # Case 1: the expected output is a scalar if isinstance(expected_output, (float, int)): - for w in get_weight_generator(w=weights, n_series=n_series): + for w in get_weight_generator(weights=weights, n_series=n_series): assert isinstance(w, (float, int)) assert w == expected_output return # Case 2: the expected output is an array - for w in get_weight_generator(w=weights, n_series=n_series): + for w in get_weight_generator(weights=weights, n_series=n_series): assert isinstance(w, np.ndarray) assert np.array_equal(w, expected_output) @@ -302,7 +302,7 @@ def test_smooth_wrss(combination: Tuple[bool, float]) -> None: ) # the wrss is calculated ... - wrss = get_smooth_wrss(b=a_series, b_smooth=b_series, w=weights) + wrss = get_smooth_wrss(rhs_b=a_series, rhs_b_smooth=b_series, weights=weights) # ... and compared to the expected value with a very strict tolerance assert np.isclose(wrss, wrss_expected, atol=1e-13, rtol=0.0) @@ -404,8 +404,8 @@ def test_normal_condition_solve_breaks_ill_conditioned(with_pentapy: bool) -> No differences=differences, l_and_u=(differences, differences), penalty_mat_banded=a_banded, - b_weighted=b_vect, - w=weights, + rhs_b_weighted=b_vect, + weights=weights, pentapy_enabled=with_pentapy, ) @@ -465,7 +465,7 @@ def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( with pytest.raises(ValueError): whitt_base._whittaker_solve( X=X, - w=None, + weights=None, use_same_w_for_all=same_weights_for_all, ) @@ -539,7 +539,7 @@ def test_auto_lambda_log_marginal_likelihood( ) _, lambda_opts = whitt_base._whittaker_solve( X=X, - w=weights, + weights=weights, use_same_w_for_all=same_weights_for_all, ) diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 35234436..b3f05d4e 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -862,8 +862,8 @@ def test_whittaker_with_pentapy( # NOTE: the weight is not correct since the test only checks the method solve_method = whittaker_smooth._solve( lam=whittaker_smooth._lam_inter_.fixed_lambda, - b_weighted=spectrum.transpose(), - w=1.0, + rhs_b_weighted=spectrum.transpose(), + weights=1.0, )[1] assert solve_method == BandedSolvers.PENTAPY @@ -877,8 +877,8 @@ def test_whittaker_with_pentapy( # NOTE: the weight is not correct since the test only checks the method solve_method = whittaker_smooth._solve( lam=whittaker_smooth._lam_inter_.fixed_lambda, - b_weighted=spectrum.transpose(), - w=1.0, + rhs_b_weighted=spectrum.transpose(), + weights=1.0, )[1] assert solve_method == BandedSolvers.PIVOTED_LU assert np.allclose(spectrum_corr_pentapy[0], spectrum_corr_factorized_solve[0]) From e78823e25feb6f4bb4a3521793d8f76523d8b1b6 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 16:29:01 +0200 Subject: [PATCH 097/118] test/feat: tested weight generator more thoroughly --- tests/test_for_utils/test_whittaker_base.py | 39 ++++++++++++++++----- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py index 0ee04120..f8a0e4e8 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/test_for_utils/test_whittaker_base.py @@ -229,11 +229,12 @@ def test_get_checked_lambda( ("error", TypeError), # Number 5 ], ) -def test_weight_generator( +def test_weight_generator_identical_weights( combination: Tuple[Any, Union[np.ndarray, float, Type[Exception]]] ) -> None: """ - Tests the weight generator. + Tests the weight generator when provided with weights that are identical for all + signals. The ``combination`` parameter defines @@ -260,16 +261,38 @@ def test_weight_generator( # otherwise, the output is compared to the expected output # Case 1: the expected output is a scalar if isinstance(expected_output, (float, int)): - for w in get_weight_generator(weights=weights, n_series=n_series): - assert isinstance(w, (float, int)) - assert w == expected_output + for wght in get_weight_generator(weights=weights, n_series=n_series): + assert isinstance(wght, (float, int)) + assert wght == expected_output return # Case 2: the expected output is an array - for w in get_weight_generator(weights=weights, n_series=n_series): - assert isinstance(w, np.ndarray) - assert np.array_equal(w, expected_output) + for wght in get_weight_generator(weights=weights, n_series=n_series): + assert isinstance(wght, np.ndarray) + assert np.array_equal(wght, expected_output) + + +def test_weight_generator_different_weights() -> None: + """ + Tests the weight generator when provided with weights that are different for each + signal. + + """ + + # the weights are defined + weights = np.array( + [ + [1.0, 2.0, 3.0, 4.0, 5.0], + [6.0, 7.0, 8.0, 9.0, 10.0], + [11.0, 12.0, 13.0, 14.0, 15.0], + ] + ) + weights_ref = weights.copy() + + # the generator is tested + for idx, wght in enumerate(get_weight_generator(weights=weights, n_series=3)): + assert np.array_equal(wght, weights_ref[idx, ::]) @pytest.mark.parametrize("combination", [(True, 244_9755_000.0), (False, 490_000.0)]) From 78610f48095fc19aae8ef94c3800a40cf35f59d9 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 16:31:28 +0200 Subject: [PATCH 098/118] feat: added author name; minor adaption in variable names --- chemotools/baseline/_air_pls.py | 14 ++++++++++++-- chemotools/baseline/_ar_pls.py | 13 +++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index a064270a..492687e4 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -16,6 +16,14 @@ """ +# Authors: +# Pau Cabaneros +# Niklas Zell + + +### Imports ### + + import logging from typing import Union @@ -23,11 +31,13 @@ from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted -from chemotools.utils.check_inputs import check_input from chemotools.utils._whittaker_base import WhittakerLikeSolver +from chemotools.utils.check_inputs import check_input logger = logging.getLogger(__name__) +### Main Class ### + # TODO: is polynomial_order actually differences and if so, is the description correct? class AirPls( @@ -187,7 +197,7 @@ def _calculate_air_pls(self, x): # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` for i in range(0, self.nr_iterations - 1): # the baseline is fitted using the Whittaker smoother framework - z, _ = self._solve_single_b_fixed_lam(b=x, w=w) + z, _ = self._solve_single_b_fixed_lam(rhs_b=x, weights=w) d = x - z dssn = np.abs(d[d < 0].sum()) diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 3bf21d09..c5106d9b 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -18,6 +18,13 @@ """ +# Authors: +# Pau Cabaneros +# Niklas Zell + + +### Imports ### + import logging from numbers import Integral from typing import Union @@ -26,11 +33,13 @@ from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted, check_scalar -from chemotools.utils.check_inputs import check_input from chemotools.utils._whittaker_base import WhittakerLikeSolver +from chemotools.utils.check_inputs import check_input logger = logging.getLogger(__name__) +### Main Class ### + class ArPls( OneToOneFeatureMixin, @@ -194,7 +203,7 @@ def _calculate_ar_pls(self, x): # FIXME: work on full Arrays and use internal loop of ``whittaker_solve`` for _ in range(self.nr_iterations): # the baseline is fitted using the Whittaker smoother framework - z, _ = self._solve_single_b_fixed_lam(b=x, w=w) + z, _ = self._solve_single_b_fixed_lam(rhs_b=x, weights=w) d = x - z # if there is no data point below the baseline, the baseline is considered From 4f1eb692b29627de9f3a34643aadac9fcbfe1178 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 16:44:57 +0200 Subject: [PATCH 099/118] doc: unified docstrings of Whittaker-like classes --- chemotools/baseline/_air_pls.py | 3 +++ chemotools/baseline/_ar_pls.py | 7 +++++++ chemotools/smooth/_whittaker_smooth.py | 2 +- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index 492687e4..ce3a05bb 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -61,6 +61,9 @@ class AirPls( polynomial_order : int, optional default=1 The degree of the polynomial used to fit the baseline. A value of 1 corresponds to a linear fit, while higher values correspond to higher-order polynomials. + Higher values will result in a smoother baseline. + Currently, values ``>= 3`` are highly discouraged due to numerical instability + that might obscure the smoothing effect. nr_iterations : int, optional default=15 The number of iterations used to calculate the baseline. Increasing the number diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index c5106d9b..90c92925 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -59,6 +59,13 @@ class ArPls( The lambda parameter that controls the smoothness of the baseline. Higher values will result in a smoother baseline. + differences : int, default=2 + The order of the differences used for the penalty terms that enforces smoothness + of the baseline. + Higher values will result in a smoother baseline. + Currently, values ``>= 3`` are highly discouraged due to numerical instability + that might obscure the smoothing effect. + ratio : float, default=0.01 The convergence threshold for the weight updating scheme. Lower values will result in a more accurate baseline at the cost of computation time and even diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 025fea6b..71b38163 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -58,7 +58,7 @@ class WhittakerSmooth( Please refer to the Notes section for further details. differences : int, default=1 - The number of differences to use for the Whittaker smooth. If the aim is to + The order of differences to use for the Whittaker smooth. If the aim is to obtain a smooth estimate of the ``m``-th order derivative, this should be set to at least ``m + 2``. Currently, values ``>= 3`` are highly discouraged due to numerical instability From bb284993aa5715398f06368b88ded8a0f30b4a9e Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 16:55:06 +0200 Subject: [PATCH 100/118] feat: added `line_profiler` to dev requirements --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 2258cbae..7d04e5dd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,6 @@ black isort +line_profiler matplotlib pentapy pytest From c22491581c49212142937230d35e7dc95d1e046e Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 17:03:43 +0200 Subject: [PATCH 101/118] doc: updated documentation of `WhittakerSmooth` --- docs/smooth.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/smooth.md b/docs/smooth.md index f48ac6da..4bdab865 100644 --- a/docs/smooth.md +++ b/docs/smooth.md @@ -44,7 +44,7 @@ It is an automated smoothing algorithm that uses a penalized least squares appro | Argument | Description | Type | Default | | --- | --- | --- | --- | -| ```lam``` | smoothing factor. | ```float``` | ```1e2``` | +| ```lam``` | smoothing factor. | ```float or tuple[float, float, str] or WhittakerSmoothLambda``` | ```1e2``` | | ```differences``` | The number of differences to use. | ```int``` | ```1``` | ### __Usage examples__: From 395c7c063c528b0b98398113544317fbd0ea738e Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 22:25:23 +0200 Subject: [PATCH 102/118] test/doc: clarified doctest origin --- tests/test_for_utils/utils_funcs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_for_utils/utils_funcs.py b/tests/test_for_utils/utils_funcs.py index 5adf91cf..0e72d4f2 100644 --- a/tests/test_for_utils/utils_funcs.py +++ b/tests/test_for_utils/utils_funcs.py @@ -382,6 +382,8 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( ... multiply_vect_with_squ_fw_fin_diff_orig_first, ... ) + >>> # All the following tests were checked using LibreOffice Calc + >>> # Test 1 >>> differences = 1 >>> kernel = np.array([-1, 1]) From dd4cc7a9647aa147014f3c9e9131a02e7b1e87b3 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 21 May 2024 00:35:21 +0200 Subject: [PATCH 103/118] test/feat/refactor: added preliminary global/local noise estimation function with tests; minor refactoring of finite differences; improved finite difference tests model handling --- chemotools/utils/_finite_differences.py | 337 +++++++++++++++++- tests/fixtures.py | 84 ++++- .../noise_estimation_refs.csv | 8 + .../noise_estimation_signal_refcalc.ods | Bin 0 -> 104007 bytes .../test_for_utils/test_finite_differences.py | 217 ++++++++++- tests/test_for_utils/utils_models.py | 44 ++- 6 files changed, 667 insertions(+), 23 deletions(-) create mode 100644 tests/resources/noise_level_estimation/noise_estimation_refs.csv create mode 100644 tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py index 0ba05a03..c3a558b8 100644 --- a/chemotools/utils/_finite_differences.py +++ b/chemotools/utils/_finite_differences.py @@ -1,9 +1,29 @@ -from math import comb -from numbers import Integral +""" +This utility submodule provides functions for the computation of forward finite +differences, namely + +- the kernel for forward and central finite differences, +- computation of related kernel matrices +- estimation of the noise standard deviation of a series + +""" + +### Imports ### + +from math import comb, factorial +from numbers import Integral, Real +from typing import Any, Callable, Literal, Optional, Tuple, Union import numpy as np +from scipy.ndimage import median_filter from sklearn.utils import check_scalar +### Constants ### + +_MAD_PREFACTOR = 1.482602 + +### Functions ### + def calc_forward_diff_kernel( *, @@ -36,7 +56,7 @@ def calc_forward_diff_kernel( Raises ------ ValueError - If the difference order is below 1. + If ``differences`` is below 1. """ # the input is validated @@ -58,6 +78,95 @@ def calc_forward_diff_kernel( ) +def calc_central_diff_kernel(*, differences: int, accuracy: int = 2) -> np.ndarray: + """ + Computes the kernel for central finite differences which can be applied to a + series by means of a convolution, e.g., + + ```python + kernel = calc_central_fin_diff_kernel(differences=2, accuracy=2) + differences = np.convolve(series, np.flip(kernel), mode="valid") + # NOTE: NumPy flips the kernel internally due to the definition of convolution + ``` + + Parameters + ---------- + differences : int + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Values below 1 are not allowed. + accuracy : int, default=2 + The accuracy of the finite difference approximation, which has to be an even + integer ``>= 2``. + The higher the accuracy, the better the approximation. + + Returns + ------- + fin_diff_kernel : ndarray of shape (kernel_size,) + A NumPy-1D-vector resembling the kernel from the code example above. Since the + elements are not necessarily integers, the data type is ``np.float64``. + Its size is given by ``2 * floor((differences + 1) / 2) - 1 + accuracy`` where + ``floor`` returns the next lower integer. + + Raises + ------ + ValueError + If ``differences`` is below 1. + ValueError + If ``accuracy`` is not an even integer ``>= 2``. + + References + ---------- + The computation is based on the description in [1]_. + + .. [1] Wikipedia, "Finite difference coefficient - Central finite difference", + URL: https://en.wikipedia.org/wiki/Finite_difference_coefficient#Central_finite_difference + + """ # noqa: E501 + + ### Input Validation ### + + # first, difference order and accuracy are validated + check_scalar( + differences, + name="differences", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + + check_scalar( + accuracy, + name="accuracy", + target_type=Integral, + min_val=2, + include_boundaries="left", + ) + if accuracy % 2 == 1: + raise ValueError("Got accuracy = {accuracy}, expected an even integer.") + + ### Central Difference Kernel Computation ### + + # first, the size of the kernel is computed + kernel_size = 2 * ((differences + 1) // 2) - 1 + accuracy + half_kernel_size = kernel_size // 2 + + # then, the linear system to solve for the coefficients is set up + grid_vect = np.arange( + start=-half_kernel_size, + stop=half_kernel_size + 1, + step=1, + dtype=np.int64, + ) + lhs_mat = np.vander(grid_vect, N=kernel_size, increasing=True).transpose() + rhs_vect = np.zeros(shape=(kernel_size,), dtype=np.int64) + rhs_vect[differences] = factorial(differences) + + # the coefficients are computed by solving the linear system + return np.linalg.solve(lhs_mat, rhs_vect) + + def _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( *, n_data: int, @@ -199,7 +308,7 @@ def gen_squ_fw_fin_diff_mat_cho_banded( If ``n_data`` is below ``differences + 1``, i.e., the kernel does not fit into the data at least once. ValueError - If the difference order is below 1. + If ``differences`` is below 1. Notes ----- @@ -262,3 +371,223 @@ def gen_squ_fw_fin_diff_mat_cho_banded( n_data=n_data, differences=differences, ) + + +def estimate_noise_stddev( + series: np.ndarray, + differences: int = 6, + diff_accuracy: int = 2, + window_size: Optional[int] = None, + extrapolator: Callable[..., np.ndarray] = np.pad, + extrapolator_args: Tuple[Any, ...] = ("reflect",), + power: Literal[-2, -1, 1, 2] = 1, + stddev_min: Union[float, int] = 1e-10, +) -> np.ndarray: + """ + EXPERIMENTAL FEATURE + + Estimates the local/global noise standard deviation of a series even in the presence + of trends, like baselines and peaks, as well as outliers by using forward finite + differences. + Please see the Notes section for further details. + + Parameters + ---------- + series : ndarray of shape (n_data,) + The series for which the noise standard deviation is estimated. + differences : int, default=6 + The order of the differences starting from 0 for the original curve, 1 for the + first order, 2 for the second order, ..., and ``m`` for the ``m``-th order + differences. + Empirically, 5-6 was found as a sweet spot, but even numbers work better with + the default ``extrapolator``. + Values below 1 are not allowed. + diff_accuracy : int, default=2 + The accuracy of the finite difference approximation, which has to be an even + integer ``>= 2``. + window_size : int or None, default=None + The odd window size around a datapoint to estimate its local noise standard + deviation. + Higher values will lead to a smoother noise standard deviation estimate by + sacrificing the local resolution. At the same time, edge effects start blurring + in if the ``extrapolator`` does not provide a good extrapolation. + If provided, it has to be at least 1. + If ``None``, the global noise standard deviation is estimated, i.e., it will + be the same for each data point. + extrapolator : callable, default=np.pad + The extrapolator function that is used to pad the series before the finite + differences and the median filter are applied. It will pad the signal with + ``pad_width = (diff_kernel_size // 2) + (window_size // 2)`` elements on each + side where ``diff_kernel_size`` is the size of the central finite differences + kernel (see the Notes for details). + It has to be a callable with the following signature: + + ```python + series_extrap = extrapolator(series, pad_width, *extrapolator_args) + ``` + + If ``window_size`` is ``None``, only the central finite differences kernel is + considered. + By default, the signal is padded by reflecting ``series`` at the edges on either + side, but of course the quality of the noise estimation can be improved by using + a more sophisticated extrapolation method. + extrapolator_args : tuple, default=("reflect",) + Additional arguments that are passed to the extrapolator function as described + for ``extrapolator``. + power : {-2, -1, 1, 2}, default=1 + The power to which the noise standard deviation is raised. + This can be used to compute the: + + - original noise standard deviation (``power=1``), + - the noise variance (``power=2``), + - the inverse noise standard deviation (``power=-1``), or + - the inverse noise variance (``power=-2``; typically used as weights). + + stddev_min : float or int, default=1e-10 + The minimum noise standard deviation that is allowed. + Any estimated noise standard deviation below this value will be set to this + value. + It must be at least ``1e-15``. + + Returns + ------- + noise_stddev : ndarray of shape (n_data,) + The estimated noise standard deviation raised to ``power`` for each data point + in the series. + + Raises + ------ + ValueError + If ``series.size`` is below less than the kernel or window size (see Notes for + details). + ValueError + If ``differences`` is below 1. + ValueError + If ``diff_accuracy`` is not an even integer ``>= 2``. + ValueError + If ``window_size`` is below 1. + + + References + ---------- + The estimation algorithm is an adaption of the global estimation logic applied for + the "DER SNR" proposed in [1]_ (see the Notes for further details). + + .. [1] Stoehr F., et al., "DER SNR: A Simple & General Spectroscopic Signal-to-Noise + Measurement Algorithm", Astronomical Data Analysis Software and Systems XVII P5.4 + ASP Conference Series, Vol. XXX, 2008 + + Notes + ----- + The "DER SNR" algorithm estimates a global noise level in a robust fashion by + applying a modified version of the Median Absolute Deviation (MAD) to the + derivative/differences of the signal. By using a moving MAD filter, the local noise + level can be estimated as well. + The algorithms does not work well for signals that are perfectly noise-free. + + The kernel size for the central finite difference kernel is given by + ``2 * floor((differences + 1) / 2) - 1 + diff_accuracy``. + + """ + + ### Input Validation ### + + # first, the window size, power, and minimum standard deviation are validated + # NOTE: the difference order and accuracy are by the central finite differences + # kernel function + # window size + if window_size is not None: + check_scalar( + window_size, + name="window_size", + target_type=Integral, + min_val=1, + include_boundaries="left", + ) + if window_size % 2 == 0: + raise ValueError( + "Got window_size = {window_size}, expected an odd integer." + ) + + # power + if power not in {-2, -1, 1, 2}: + raise ValueError(f"Got power = {power}, expected -2, -1, 1, or 2.") + + # minimum standard deviation + check_scalar( + stddev_min, + name="stddev_min", + target_type=Real, + min_val=1e-15, + include_boundaries="left", + ) + + # for validation of the series, the central finite differences kernel has to be + # computed + diff_kernel = calc_central_diff_kernel( + differences=differences, + accuracy=diff_accuracy, + ) + + # afterwards, the series is validated + if series.size < diff_kernel.size: + raise ValueError( + f"Got series.size = {series.size}, must be >= {diff_kernel.size} (kernel " + f"size)." + ) + + if window_size is not None: + if series.size < window_size: + raise ValueError( + f"Got series.size = {series.size}, must be >= {window_size} (window " + "size)." + ) + + ### Noise Standard Deviation Estimation ### + + # the signal is extrapolated to avoid edge effects + pad_width = diff_kernel.size // 2 + pad_width += 0 if window_size is None else window_size // 2 + series_extrap = extrapolator( + series, + pad_width, + *extrapolator_args, + ) + + # the absolute forward finite differences are computed ... + abs_diff_series = np.abs( + np.convolve(series_extrap, np.flip(diff_kernel), mode="valid") + ) + size_after_diff = abs_diff_series.size + + # ... and the median filter is applied to theses differences + prefactor = _MAD_PREFACTOR / np.linalg.norm(diff_kernel) + # Case 1: the global noise standard deviation is estimated + if window_size is None: + noise_stddev = np.full_like( + series, + fill_value=prefactor * np.median(abs_diff_series), + ) + + # Case 2: the local noise standard deviation is estimated + else: + half_window_size = window_size // 2 + noise_stddev = ( + prefactor + * median_filter( + abs_diff_series, + size=window_size, + mode="constant", + )[half_window_size : size_after_diff - half_window_size] + ) + + # the minimum-bounded noise standard deviation is raised to the power + noise_stddev = np.maximum(noise_stddev, stddev_min) + + if power in {-2, 2}: + noise_stddev = np.square(noise_stddev) + + if power in {-2, -1}: + noise_stddev = np.reciprocal(noise_stddev) + + return noise_stddev diff --git a/tests/fixtures.py b/tests/fixtures.py index 9176d3b6..29fe6ed3 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,13 +1,24 @@ +### Imports ### + import os from typing import List import numpy as np import pytest +from tests.test_for_utils.utils_models import ( + NoiseEstimationReference, + RefDifferenceKernel, +) + +### Constants ### + test_directory = os.path.dirname(os.path.abspath(__file__)) path_to_resources = os.path.join(test_directory, "resources") +### Fixtures ### + @pytest.fixture def spectrum() -> List[np.ndarray]: @@ -122,12 +133,11 @@ def noise_level_whittaker_auto_lambda() -> np.ndarray: @pytest.fixture -def reference_finite_differences() -> List[tuple[int, int, np.ndarray]]: +def reference_forward_finite_differences() -> List[RefDifferenceKernel]: fin_diff_table = np.genfromtxt( os.path.join(path_to_resources, "reference_finite_differences.csv"), skip_header=2, delimiter=",", - missing_values="#N/A", filling_values=np.nan, dtype=np.float64, ) @@ -138,11 +148,73 @@ def reference_finite_differences() -> List[tuple[int, int, np.ndarray]]: # removed row = fin_diff_table[row_idx, ::] fin_diff_ordered_coeffs.append( - ( - int(row[0]), - int(row[1]), - row[2:][~np.isnan(row[2:])], + RefDifferenceKernel( + differences=round(row[0]), + accuracy=round(row[1]), + kernel=row[2:][~np.isnan(row[2:])], ) ) return fin_diff_ordered_coeffs + + +@pytest.fixture +def noise_level_estimation_signal() -> np.ndarray: + fpath = os.path.join( + path_to_resources, + "noise_level_estimation/noise_estimation_refs.csv", + ) + data = np.genfromtxt( + fpath, + delimiter=",", + skip_header=1, + filling_values=np.nan, + dtype=np.float64, + ) + + # the original signal is indicated by the first 4 columns with metadata being NaN + metadata = data[::, 0:4] + signal_idx = np.where(np.isnan(metadata).all(axis=1))[0][0] + + return data[signal_idx, 4:] + + +@pytest.fixture +def noise_level_estimation_refs() -> List[NoiseEstimationReference]: + fpath = os.path.join( + path_to_resources, + "noise_level_estimation/noise_estimation_refs.csv", + ) + data = np.genfromtxt( + fpath, + delimiter=",", + skip_header=1, + filling_values=np.nan, + dtype=np.float64, + ) + + # the original signal is indicated by the first 4 columns with metadata being NaN + # it has to be excluded from the references + metadata = data[::, 0:4] + signal_idx = np.where(np.isnan(metadata).all(axis=1))[0][0] + data = np.delete(data, obj=signal_idx, axis=0) + + # then, all the references are extracted + noise_level_refs = [] + for row_idx in range(0, data.shape[0]): + row = data[row_idx, ::] + # if the window size is 0, it is set to None because this indicates that the + # global noise level is to be estimated rather than a local one + window_size = int(row[0]) + window_size = window_size if window_size > 0 else None + noise_level_refs.append( + NoiseEstimationReference( + window_size=window_size, + min_noise_level=row[1], + differences=round(row[2]), + accuracy=round(row[3]), + noise_level=row[4:], + ) + ) + + return noise_level_refs diff --git a/tests/resources/noise_level_estimation/noise_estimation_refs.csv b/tests/resources/noise_level_estimation/noise_estimation_refs.csv new file mode 100644 index 00000000..65db6a81 --- /dev/null +++ b/tests/resources/noise_level_estimation/noise_estimation_refs.csv @@ -0,0 +1,8 @@ +Window Size,Min Noise Level,Difference Order,Accuracy,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100 +,,,,-0.0590418038387701,0.0943535834281746,0.1996106967074170,0.2076665665445250,0.2565976378911490,0.3130822235037050,0.4272820436955330,0.4336807688473590,0.5529790761949000,0.6205116979795420,0.6367459615029690,0.6402323907080600,0.7612810390503280,0.7352360002255730,0.7677988900730690,0.8308269827744960,0.8643816497026570,0.8600246311281250,0.9150360580810840,0.8493913653842080,0.9063026644138470,0.9989087664664360,1.0102321479428500,0.9726077451254480,1.0229378673527500,1.0037443170690300,0.9506403941093230,0.8659025013506060,1.0425258859932300,1.0269948414240000,1.0210347447476800,0.9869294147835280,0.9694273518552160,0.8751237973313290,0.8520166643388930,0.8161569956410880,0.7678425167804110,0.7700632777094800,0.7446175573498970,0.6553331368003540,0.7449736813899550,0.6302711853278030,0.6799413262506640,0.7227251574958850,0.8517863323804210,0.8181644158567000,0.9213256152994750,1.2810814194972500,0.8946588499365640,1.3736047892705300,1.2260807298006400,1.0611294804157300,0.9219014587530430,0.5680112835139860,0.5377392990469220,0.3298106843898140,-0.1719686918509980,-0.2819466900079930,-0.1992173465552170,-0.3814389372698620,-0.5068395639674160,-0.5440866677749160,-0.7586247980167080,-0.7157705137539000,-0.7071693108577800,-0.7763499947312900,-0.8502087885332560,-0.9487019031392380,-0.8849128283116610,-1.0185413995359800,-0.9063415914629020,-0.8742798194799050,-1.0850527768695800,-0.9846158086303880,-0.9157723830506820,-0.9821058771097520,-0.9963697347184110,-0.9650347191718880,-1.0039196559098500,-1.0209341512160900,-0.9892918059416240,-0.9840537215786370,-0.8851893179125000,-0.8769338218316060,-0.8940188473463740,-0.8425875126869280,-0.8289408469012870,-0.7532063497603390,-0.7700985647336680,-0.6656819958289950,-0.5902129175975290,-0.5184668194250220,-0.4741418132576440,-0.4164773414535820,-0.3324609887739230,-0.2674577016845110,-0.3231561578354770,-0.1671549465881780,-0.0991688934094777,0.0184878102069366,0.0593230959994008 +0,0.02,5,2,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795,0.0538442229886795 +1,0.02,5,2,0.0200000000000000,0.1451029282433400,0.0772047120710864,0.0200000000000000,0.0200000000000000,0.0489717431487879,0.0274495861433574,0.0357637508810199,0.0684993097853416,0.0200000000000000,0.0700522575202647,0.0612439573762328,0.0260638611495695,0.0812309192143508,0.0461588271225360,0.0222450189854631,0.0531891974716099,0.0513633900333628,0.0200000000000000,0.0733367134797685,0.0775479046939276,0.0217871289061783,0.0717740549983404,0.0200000000000000,0.0422277561726533,0.0200000000000000,0.1153667013297750,0.0589669911798556,0.0820261518839881,0.0926552550358730,0.0200000000000000,0.0257106262844677,0.0200000000000000,0.0254223605983094,0.0476623506657087,0.0364680830983335,0.0200000000000000,0.0679620090761974,0.1080690093938710,0.0765226435820176,0.0200000000000000,0.0788703870443346,0.0320053196940122,0.0459419537572654,0.0262818803884840,0.2703113465235740,0.4519676897481760,0.3204245391241220,0.0830564581735304,0.4104779108823870,0.3857155691067090,0.2175026323541390,0.1222975788331630,0.0636683385614897,0.2978745616108900,0.2054137210245730,0.1715374549314800,0.2692057206749080,0.0200000000000000,0.1740928219363430,0.1602392629955660,0.0694865118001325,0.0731196713058297,0.1363617883550300,0.0551904931965811,0.0200000000000000,0.0733696078640926,0.0964238550462272,0.0360082885289664,0.1034773002961070,0.2403755230857390,0.1714901119416400,0.0772462931068471,0.1921249073584610,0.0700341809602307,0.0647348495205131,0.0649691733350237,0.0200000000000000,0.0501078392212950,0.0421229430929182,0.0327442257339355,0.0200000000000000,0.0538442229886795,0.0727395594456153,0.0287270678498866,0.0200000000000000,0.0262830354573370,0.0200000000000000,0.0312480515132131,0.0509995579399268,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0796640661166812,0.0931431553133826,0.0200000000000000,0.0595196003015288,0.0510785917754346,0.0443036193627499,0.0200000000000000 +11,0.02,5,2,0.0489717431487881,0.0274495861433573,0.0357637508810200,0.0489717431487881,0.0357637508810200,0.0357637508810200,0.0489717431487881,0.0357637508810200,0.0357637508810200,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0513633900333629,0.0513633900333629,0.0513633900333629,0.0531891974716106,0.0513633900333629,0.0513633900333629,0.0513633900333629,0.0461588271225360,0.0422277561726497,0.0513633900333629,0.0513633900333629,0.0589669911798476,0.0717740549983399,0.0589669911798476,0.0422277561726497,0.0422277561726497,0.0257106262844691,0.0422277561726497,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0364680830983336,0.0257106262844691,0.0364680830983336,0.0364680830983336,0.0459419537572652,0.0459419537572652,0.0459419537572652,0.0679620090761976,0.0765226435820174,0.0788703870443343,0.0788703870443343,0.0830564581735283,0.2175026323541430,0.2175026323541430,0.2175026323541430,0.2703113465235760,0.2703113465235760,0.2175026323541430,0.2175026323541430,0.2054137210245730,0.2054137210245730,0.1740928219363430,0.1715374549314810,0.1602392629955660,0.1602392629955660,0.1602392629955660,0.1363617883550290,0.0733696078640913,0.0733696078640913,0.0731196713058293,0.0733696078640913,0.0733696078640913,0.0733696078640913,0.0772462931068475,0.0964238550462242,0.0772462931068475,0.0772462931068475,0.0772462931068475,0.0772462931068475,0.0700341809602293,0.0700341809602293,0.0649691733350243,0.0647348495205135,0.0538442229886796,0.0538442229886796,0.0501078392212965,0.0421229430929192,0.0327442257339349,0.0287270678498867,0.0312480515132135,0.0312480515132135,0.0287270678498867,0.0262830354573363,0.0262830354573363,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0200000000000000,0.0312480515132135,0.0443036193627497,0.0200000000000000,0.0443036193627497,0.0443036193627497,0.0510785917754344,0.0510785917754344,0.0510785917754344 +25,0.02,5,2,0.0357637508810199,0.0489717431487883,0.0461588271225360,0.0357637508810199,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0489717431487883,0.0489717431487883,0.0489717431487883,0.0513633900333628,0.0489717431487883,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0461588271225360,0.0489717431487883,0.0513633900333628,0.0513633900333628,0.0513633900333628,0.0513633900333628,0.0461588271225360,0.0476623506657080,0.0461588271225360,0.0422277561726498,0.0461588271225360,0.0461588271225360,0.0476623506657080,0.0476623506657080,0.0476623506657080,0.0422277561726498,0.0459419537572654,0.0422277561726498,0.0422277561726498,0.0459419537572654,0.0459419537572654,0.0476623506657080,0.0589669911798475,0.0679620090761978,0.0679620090761978,0.0765226435820176,0.0679620090761978,0.0679620090761978,0.0765226435820176,0.0788703870443342,0.0830564581735285,0.0830564581735285,0.1080690093938710,0.1222975788331700,0.1222975788331700,0.1222975788331700,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1363617883550290,0.1222975788331700,0.1363617883550290,0.1222975788331700,0.1034773002961070,0.0964238550462243,0.0772462931068477,0.0772462931068477,0.0733696078640913,0.0731196713058293,0.0700341809602294,0.0694865118001325,0.0700341809602294,0.0694865118001325,0.0649691733350244,0.0647348495205134,0.0551904931965811,0.0538442229886798,0.0509995579399262,0.0509995579399262,0.0501078392212966,0.0421229430929192,0.0421229430929192,0.0421229430929192,0.0421229430929192,0.0327442257339349,0.0327442257339349,0.0327442257339349,0.0327442257339349,0.0312480515132135,0.0312480515132135,0.0327442257339349,0.0327442257339349,0.0312480515132135,0.0312480515132135,0.0443036193627499,0.0312480515132135,0.0287270678498868,0.0262830354573363,0.0262830354573363,0.0312480515132135,0.0312480515132135 +11,0.02,5,6,0.0423226649461898,0.0213382604260963,0.0345832777894842,0.0423226649461898,0.0345832777894842,0.0345832777894842,0.0423226649461898,0.0345832777894842,0.0345832777894842,0.0423226649461898,0.0423226649461898,0.0466425139275356,0.0503726990070914,0.0503726990070914,0.0503726990070914,0.0522689341160636,0.0503726990070914,0.0503726990070914,0.0503726990070914,0.0466425139275356,0.0306469700143294,0.0503726990070914,0.0486302113596101,0.0486302113596101,0.0573742018717883,0.0486302113596101,0.0306469700143294,0.0306469700143294,0.0294598485057315,0.0306469700143294,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0294598485057315,0.0356909784143028,0.0356909784143028,0.0356909784143028,0.0486731792847783,0.0647624501115769,0.0767373878041499,0.0775063075538756,0.0796945243644425,0.0796945243644425,0.0855298076266875,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2870710151500930,0.2717040293160000,0.2268032219320890,0.1736214700293550,0.1736214700293550,0.1713455406124420,0.1574551250434120,0.1317389301256910,0.1317389301256910,0.1317389301256910,0.1279514761457010,0.0821466649254990,0.0821466649254990,0.0796827022609885,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0932762768253730,0.0821466649254990,0.0821466649254990,0.0821466649254990,0.0811093220566633,0.0573316881612044,0.0573316881612044,0.0563031203635952,0.0504775230475304,0.0504775230475304,0.0504775230475304,0.0471399118656961,0.0422763918127036,0.0422763918127036,0.0356466128786073,0.0356466128786073,0.0356466128786073,0.0303400136608894,0.0272555144285884,0.0272555144285884,0.0253196032708275,0.0253196032708275,0.0253196032708275,0.0253196032708275,0.0272555144285884,0.0303400136608894,0.0447277571118028,0.0272555144285884,0.0447277571118028,0.0447277571118028,0.0538429638790465,0.0538429638790465,0.0538429638790465 +11,0.02,6,2,0.0299189178180329,0.0299189178180329,0.0439481529019891,0.0439481529019891,0.0439481529019891,0.0330467400508825,0.0330467400508825,0.0330467400508825,0.0439481529019891,0.0439481529019891,0.0439481529019891,0.0330467400508825,0.0330467400508825,0.0330467400508825,0.0442014748656228,0.0442014748656228,0.0442014748656228,0.0213260087479265,0.0213260087479265,0.0213260087479265,0.0268493269616991,0.0348197958494532,0.0432342655432602,0.0432342655432602,0.0432342655432602,0.0348197958494532,0.0348197958494532,0.0415996328057206,0.0415996328057206,0.0385091946216850,0.0385091946216850,0.0385091946216850,0.0292728953624526,0.0292728953624526,0.0292728953624526,0.0292728953624526,0.0385091946216850,0.0415996328057206,0.0556937879041626,0.0688174135868027,0.0688174135868027,0.0730194668621024,0.0789472020180212,0.1022476970064910,0.1113502653907550,0.1302253619997060,0.1022476970064910,0.1404897947406030,0.1438409548185280,0.1438409548185280,0.1438409548185280,0.1438409548185280,0.1404897947406030,0.1128033154936850,0.1028090077284430,0.1025892369546820,0.1025892369546820,0.1028090077284430,0.1025892369546820,0.0539406445041053,0.0539406445041053,0.0518727238279349,0.0518727238279349,0.0539406445041053,0.0539406445041053,0.0539406445041053,0.1025742752639520,0.1025742752639520,0.0539406445041053,0.0528933997260198,0.0528933997260198,0.0528933997260198,0.0528933997260198,0.0287996226521029,0.0277483480339819,0.0277483480339819,0.0287996226521029,0.0287996226521029,0.0277483480339819,0.0277483480339819,0.0287996226521029,0.0309753027350675,0.0377844829340734,0.0388385937007103,0.0388385937007103,0.0388385937007103,0.0377844829340734,0.0372670600985638,0.0309753027350675,0.0309753027350675,0.0372670600985638,0.0372670600985638,0.0372670600985638,0.0372670600985638,0.0242780219496502,0.0238192920369165,0.0242780219496502,0.0242780219496502,0.0530368406787244,0.0530368406787244,0.0759226297230423 diff --git a/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods b/tests/resources/noise_level_estimation/noise_estimation_signal_refcalc.ods new file mode 100644 index 0000000000000000000000000000000000000000..b684017c8ed0ed53492f12c71d77a5b847f93b15 GIT binary patch literal 104007 zcmbTeWmH_v5-yB{;O-tma1ZVd!Civ8!{F}jPH>0d?(PsExVyW%!#8_Fj16qKtov9ryz}nEt0HE(+X=r6nYiDh1sB2(nVrXbD`;Sg2D5!sQ z0@eJV7cQ_Qqi1HQZ!d3XZK11g_)FA|?mtx@7*5jRtrcg`2hL)VV)@z~SqVSk!VyxU zoxO;o;aCno-(j8wWII+vDN5-ktia-Z}LwE05p1DGS?{_nOCH$ zDY)JEi{(WvGUj*%b*-UYS@kCEb>4i7SJedh97LsuPIz~#f1t{JLuNhM7KdO{Mc7YG zPv0N`IWgsafrfuZ#(tW6iHIKhSo3x$6r+I7NWXJhVja+kq-C*4lno zf<>z?WW(-j4$t|OIAj<@P9t)iQj+AC&V`Sm!^j#V$9Z2-5@~W8@wyy*)6=4R4a3N3 zMXg7~iU0O-TNu<5)2I*Lw0$?XZU2wqe+246(k)_t3nbi;{uQ8F7!?04pi+qVk2C*R zR_m3<5ZkO4MoT1ajI55!jcv5}2dO&a4`^kE-xg>ct0jITV&;iU(?Rd~4J^^z`u;yI#z4^F^2Llm#-EE-T!a|>S)XN#{-hYe|ex!w)Dq?+{?d8Rj4>k z6MplEz8Ci^4*tW7;juyTZx2c$(rf=vw~O{qto*0X%dtVnpFZt$e?3_IDb4c70}-^? zze;D04eEXiP;~_VMMtAcCh-T|z`~%-A8&p_{Yz<&^zE15w2)%_3u(l$LGEv*<|e#< zLu95~TJ4Y01qkFz-8m%rnTQ5gsEXPo_xx3m73`^Nv*oH-=t0?}{b$IHHdHPCQ=rTX1) z=mq^RbN=aEAODzZj{nQ$h(6iXAGZlB;}HLpS>gT_GV1e#7=Nyk8N+{>`}ehqBk6b2EopxtwGByY{RuK94gSAcGjz$u{)}n2;Nag2 zQO~hj^*6~5;pupBzawt?&ENSKJ_ydQe@GrG z{O~ttNFC?>8B&L&bi6-GEyai+a~dsTPQyy@(rRy6eKox!rffp=}YrT1#nP}B%K-J)?gmWIm0qzzi(!jW##9NHI;M$=NM9vbby#fDa6ilfn!R^ zYKOl;npVk@_S08`bTCGMZ)WCi?%?^d1Q}rRwnOCVDW0SS@J!Nr<4d~!z8(8oufdTa z=VI?(=3?L4_hA1#d^sqi&hwC}z~^~~8U6Oy-m7n?y~&sp?OaCt$@4+D*G>%if?G?d z)mS9>Ed#17%}uZ!?ma6px4CQ@#5m?THw~ zb=klY?EaH!#i_*Fr^l!E>-4Dnfa??YDD=s@LVT(`Kx;8o&Kare_uzKnU0Gz0HBn*z z_GDdU|JiXs$hg^_SLAr}G~~xxU;0}ejU(pVB;}pnK{5?qVc8dmFHHNU#mg&Yx;Cs~(rHUUM;_d=lun$xIA@s8Zflg`_S z#P8Px;?KE(pXEp-?xMjc?F>TD0BE^x;{fuF7hsdIU;O#@2a;B%AQT>N)Ymgf@%u3B z_;bi7+3pKCKj=65bd*SAN=dfW0sk#WNw(7{=z%@`?u#4xjr+$W>**f&)3;pg1L{hvL-R=oG;D>&93?wPugR{2#wuf-Uo#DsTIUZ-&Yd47^w=gZY zVN^HRL01V+*Dyxc;e8j_3>OInmoO-o;WlSjJ7)=UXUrD2L=rd5XxGF?7feH!#4%?~ z4rfBKJ07lMkDpUdd>MBW6la38#@wwUxh^<8CI4_6M%XTdSn(n?^%bsyu_yj~C++;R z^!-)Z`AOP&1N#R2KLv4VXHc?d1nGNi_Kp0sb5^zuLfdY2_6>9PjU*n)dkpD&1zqub ze|nN? zBQcO-Ly&z#38=*f$h}iA3eP7YXgUOi?lmC)kbAP-PdoD64Mqyx+QZrYba3+BUMP9~ zb3iFh-}rMc2#Sr0J^5}?;QJ|ve4_&>b7$9mw&8yUrO^F03jM}G_Pm#NPMmhWOHbmC zPv)+fgyM-v#?=YEhW*|DY~BBE%fI6X^jZ%zV8b8MC1KG_OU9nE;<+&r^_7RL z)n~2^XNU@IWWCM+#(5@+xs|5}q&@Z(T>2iszOlr<0c2w&?flHH+v{hhllCyQ_8>If zhQGeo+2uJ-gb~?qwvV{nLkP5%>Q}rF0_Q-;xFYrnB6noF&-NaQ-hj?U832J_Jx|X9 zBMT`bQ+)TK2o?MJeKR5nSND}d1+axYBa>r36~!}ruVDA$Sy7o#{9cfY_l9dshP(et zf$-vanwyKaE_AQp@$S1n;l30!;nT`$y3L zy#bOGar*^}$; z09LoSc$&Q*ld;^9VA5j4>@jCbDPU+dyJvF8X}{m#?C7C7g@ZS_{3!)r$I2{OhRdic zFZgoL;80iPgXTrx{le`4_%h0Z*hSM^2*Q^feoG8>e)e=jv*wKGInrZ&XXdxt-?>K3 zXw%PHur|X6=ufYaIK(v`%Qt53R#;p-U(0Qq62-Y5_iD1)C+-zk_MK-I!osz~RWjk?&DLgidl+*^Cs#jv5^^0@z| zE#L8(Mx<>rN2_QWqOb@1r=*|4EtLytvqTeiAZ>0#%#)_izOimjlw-{6JYP_1tuBXU zQoN*BkK%x3l9^@F)f)Yn;Q-F}?(pC#`s+fLpDtEOkFuq@fx?hxExLi}Bj;uCQ!|0= z8ApJmp+ak2ppeF&(;_@UJ}%=Q2V)c+`>1KZ6_n z-k|WGQ77o5zV+f587y~shiI za`yAe|EK4$gg(3iy3={ZpFWctC|w-Hb2IG2k)#u zklKH{@2ns{kQ&RmI$g6RMS*WMNqgI*dAy znw-(cal7M+`9!(=fZ949p{tLivmmyY8^5oAFHH)6>w@W-*mEQqI43RDUuM{CTyeGF zRU7ASMfM=5TpAb7mMi_y<#j^Zmfs^mr*PrLu}JNRvIIkW9z0~8J=}_K~kSZMV@)jF+1(C@ncAPUa<#% zH0{RYR~GiJMMI4XOrBuiGV2qL$`iz@W_KMEaQW?st?~@}RUf#m4e6ip?9xiKW)2vH zo~7Li_0p9GZeNAyI@wAiE<6C`ccjcr05eP@?z?3*zm$`iPz?KfR;-h!cJD1u^japsiG zWJhKUcLIuk>kV@XI$-@3qEQoY8t8)y*y5&Joxc=2SsFDD%WSE?LRc($1DBP6gd$}X zMHNQ|0kJg(1^K_gk1nmVrA0(61Hg|SDd6rEX4?_I-seN5w?#Uo zgXxS_j6Pv6&HN8GBHj)@0ioQ&Vy^Tvx8^(dQI>-5w~;!WhW3$#GSQG~SVi^;xpxhF zO1><8f<{B~&^nXBg5Zl~IM&O!dCmHJ%8>}lj6Iw)3?sPvNvJ<-vy7qDCoOR7n;1w% z-S*T1FUZ+Y+pEwKphC`@G7c2VuJ01go>tF#Lh?;hHj6lO{wp(1I%}4?Sk&cXPS{Ross)SZd19-sqgB_UzA6B%}F zj6KO4*Bo_vtn~TpKkSr`5f>uGV9*ms*qqSKa^olE?I=sII%l9{LdA?(M(y5pkRmp3O)g7}lu1YsGu zWX*3*#-3K=JN^W`LW#V;f&;@}m}wap96#RwuiyaQj{SQ@>I1Cof8AaMM#qV!x}Ex< zjptZ}>_md7w_QhJt!+a3O@lH8qYB^#Dp~RX6t;f{fimPo6zu_67 z@zYEYv)jSm&EZDDlsT`*Bh$-vi^`(fkwXwc80~I>Z4_a{QTeo!$eT?iT>Bd+S@}vexqR zf+q_SQ{Zp`=D={liCM?;ZRnUbHBV;QUO_i&BBK8EV7kcEh>=?dF0Qn5KnqW2DWo>R z^-VLCeJob-z2w=+i5TjN1rJYFTvl5KWy|w(<8nDvN?A)qo1x4)SDJ@w6~?TFLb3wx z+3nrY6^`Gy{>06$%Ji)m!)?ZR#mR_S6AwbFg0LuI&4T2qQyeX5H!StGt=)R~D1KrJ1ch@Kf0oX8f@y_pehI0fR<1#OidEwe!oF z*~M+&$*8?Z+dN~RtZeG9y=WsR9L z?afT@URU_Dxew}k3IGU{VehVcW;#--<)Rrb zU16m)ds=m>qbhebH`_McsdN`9)-x5o!}}_V7qaQPGrLZ=hX;PC3om9b0X#XQJf1s_ zoNldx1w6hr!zZ3~%WXs>exjDFyc?j1sX>N!i>tU=uIX+M4l96{MEr0(Xs64@g|gf9 z^}D606^V|VP=JnoCXdgFCO~K3^R8>kOC@NMwdCee_4c}}X#ews*Vx%7ggb5Lnl9PE zrzZ6m#8s1#t=H+%tpU{i#DyOA=x3js%PZ7pbo@gvrJ!%>DI23%T?TS*@o?XXZ8ZsRN2)-`+au0bxpvVD z>Q=_BP4XH7hFUYvT(ERUbf_(JS|PwaXqM)I6LQv?`2v>TRTfrba7J2qdvG)};b$_v5mWmTr|2}(S35YzE1wo-HbcZ%@LlX1@}f6` zO2i56YqkJq^~0%z<=>~a&LGv!lt1kfSM41wSlvxuj>d6pYLPhoqhplk1vl{g8u9kzBGaq3zm5I%a_ef(t9x&t7%$L+b&<@e zW~Ixif2%%Krl3Bh$f@mnN)b2wd2Ze^6DLLKXQ^povI{E~_4&310#a+2FdlY?9j2!r z8E1A0bd?H#&2xC#adVj$(ei5wtVV4fhnGEleMJ0+M$MJ6^)@d8Ows%N0S|1i<;07D zJ_X2SE0wT`k7DOHp0{hMmaMfIB{uFq5H|X2M2w>b2L_(G)V(xLN$IM4+dN2rGU9R^ z5Vqf=RK4nqMOEqxbLw$hDI-29WZg9SpJV3vT2?p&VQ7xGU!tEp+9<`>ZI}6cMgzZ?pdMeIzb+K=rl}BkmL>yid4ll@HP6ri{jyyg; z8P&Dq`6ba#l#Z|QtZK{Rvs}`Zjox_s>H8VRmnS0K$IdS7)xQkA`sO6xD33s8WA{$s z{v@&3YwrY5j7}UH-bb-xrg|_#7vBzq*z7b7<(`xH`KP+y7)c`S_nZ0B^kYU*;yx`0 zBcI3W_LV*I>ZAIvyx#a-uwv^-n|e!0**2w^nA^{&6S@*xc^^z?OX}0FF5+3hkhVF0+6Iac0BT2_sTO zgu1M36lfD^@+ZBCrmEiKkLD~v_GUWSrna|5{#f$GkYTyR;bj|_b&abx*vxRh~FL25{Hmd`E<5mM+Gq6p0Nj2}L5d+fW1 zWME~QefrK`jtE&vfb8dkxlZ}hDzZsUS_rgQtk+c`&5oxV4T}Zq-L!A8*(ke3MgTYl zzJ>PCPZE^qZ(dlr4TGJaQO@cZLU;i)!B`~`SoX83;HaHFEIy_)pjw4gqwk{RAn#hW zAwYo?q^RR*{A|*T7u+Kz*cma3R&K?E1CuSpLV~v?%~Uwv_P6nVg?F z#5vrn(Kp}!n3(Y=m>GX>qV{7HQ;RATI}r#Yi@MSKcXLTeoYz{5>NdmkKa&$+Ud2Kc-0rIGtUZiQ3&Egu>Gxu&e2@TT-pF2^&rJXh z(o^73UwE~*y%`cY`Q5%U&Jhdpm@C2|R&jr~Y?f_J(P)Zn6LWkL4(#AH-_??dhEypG zm|3>~a|IU>f+LiWIf5`tOVobr+El!x?LyB(B>RN8nAfZ$+N>R%rq3wjdul->+530( z?`F0^bI&GDvfMY~67fvC1?m>6t<2ce{e2>Lp}*F!=P7`*GS?aSXM*y!Wn*J69X-pb zj?qajJRP<s|iB9GZ7c%BXe z24us|A;y|l94FxBTGyBEc)|+R!gqJ;QP$fG# z4t}&wN?9adh zt@ing6bHqw&~;-U;ko zpTC=!2hYwh=4}T|n;DuWL%=X#t9}_^9fy&W`i$iTWvIshRR%kS1!mUurH#4r&}_7p zxHNvVk&5hthxBA%_+c!3@JBuFTykL|Zf-H$w=P%zhwpqOcH9(v+-Jp)W*(vK?{qod zP5UR9r4>%-^@^$zKZgO3kfJ%LkkuFDH~M|;pdLlVD2DcJKKsMDh@Ovrh(+RW1i7)m z!Xr)HorcPS?~Ickw%lFOblrCGewB$ zR_`hKNYC7)*lT@$h{{FAHYQ1nXVxv+<~a4f=-33dMxzQjnuU;CqbAnWgY;XxNP(Xa za$7LxocBJ&W5LJh4O9@YDK*{?Q8S#l=PWAolc1t0m2x9mvxn4bM*CS3>rWOCc|S$e z68&y`ct)AJ7zIS+1Q;gEV|2@-vuB>y~KG$w$FVbpyAj!X{N{M?9|}RXT2a{(6+_BvJvJhVVx$0UwaU< z8S-;^GRRi9RHHv- z0nJ?f&!dw{;#EPLa=9z~UH0(lGu*R^X2 zO@$sp42Da|T~`l!upd-%uGtNrzWH^Y6HOE7Gd1PFbq?|(tCBDLM-`E?E7Iqu(K#hB z19eje2KMVqos+Mfu4>U}lG*m35GppsqxoH*!P~$EV(@G!(|kPgpdoK&<>sqDN6E6~ z{op^!Qq^EscXRYp;`TW_=v=0W`Lx(=Dx?MFbKoKZ)rI8Q34Irn$%rJ2ia}Yf$Fdf( z6%keIuPsSSV%Wv+b73MT03i`1q+lAW@hW@hoY^exq2IG% z5WGIFV10mbAOq-o&9e^rHSx z6X@CzuzGjjKyHqaMjq!;bkAUUH$P^PJ}XS|a^X|kb-c?{0yn1qm~LX15BIW(0{Na| zRN_f8IfmF>tGQkXOE;w0CVJ7dO>jKGIaOU3fq5fKul~H=w1&SHhFQ>uU=u#3LW3`~ z#-|=Fb^l%K@J#3n3fOdNR2E#cW~lRGYTq6)*0jHm*_BM$86wDp>1flW5)mj7tBm3( z5jS1s@T=M2K(!AdSja(H$h-!M=AM$IH8mcpEb6OAZkQt5Tl+`~RGO4{!}|D0z*H2m zD+?A~cH0E@EXHRHi5{?Nb@*%pv95lU2W9y8OdA;~Z)=Y%6t-468WGTgLqP5aT=5meTQ<(~Vam}HeaVBqn-<~e;EA`35;V(Q^#@y$RzVWY zkdGxc`!+XOrsy0ZHE3+)h@E3(M^R75G;e{gm(6)No5$I%3Xxhz?Ul%dR7ZH;D0}NhvP_Io z?)MZTb@p@mdFEvBYs(~Aq{j+B-&xa^;_I!MYb@ojozFgOH$M@4U2Q~`wRB`83iiP~ z$i1nn)Fsa%#hX@N;Nomz?QLIM;5J9aFNfYERyEfou>?#0N!5#^JbFEDl+r9DDmC~F6_ zl}os;b~oK6S8tQnSLp;FJP0gIng*Ils1cwpT8#w=RW%jGHtY5IQxIF5LlBxav(gwg0Y3JvzsSN)TCH9@4sl zH+2=VzT35Ml}uYAu}lD63XC!7#We0?hTdy?*`q0fATHcl6#%@u7dhwfz$VB{9ONc~ zHpMyF5it|kKnlGaKy!OJy8$$>zvp|wDni5jHVz{0BT8m$OcKExnC4Nu{3yHPAa5j) zybLn;+xau3shWVYv6t8ZLz7*)&i#+~Rm&JKl4S_&0vnJ*>w(H3E(X+ugzTx$ zI>AL%l`gk_etqx=77c;r&T&Wa;EFN?EClkI9|*QNe2An}3-=H+rQR|2POIL-eD0E^ z9r_}M0M^R0k3*1V{Y3_YR9*bm$H{;}b2uAFU5y~u8G0rDSH!` zO{BC=G-iRz;GXe?jFn_gU{;p})NNb1b`pIb=)Qdb;FKu4;v5HmGhDutu>y76AE`g?5k#d%wnn*hvaq#kJTOio~IL|Ok-UuXud zu@Cb`P6jtyqL3>H#peqLgP3}IB|+ca>?Pvx;BRc-3$Votzg6G5neRA$_NiOfomJ;& zcMEw!QZU%0`cbr|ITFb`kv)iPl^t_eZzo%XVLxLYVn1OKk+EXH>kS-?v5)C(ItIZ& z)pg7CC?;DVk{~-|lnIcdU<$Qzm2WJ{DK*95#C?^(g~YYQsAfG=FH_S!rImUPZowe# z1rnOW$2NzvlC@rflpWN6??1AYgr89D6U?ycqnkbDuY{H+^BY)md zMP)I2Aon1=cpK6@Vd;4*t#U;D_E8Zf>ARQzcxCiT1v~H_$W0Zgy8@nRh42#*>FC5$ zZ1yrza3c|sA@oQ>MiMzvFol5)>>P+|MaHUS!NIPz9uhRrXKOh?Bj7bJuZ zu^t?dI1wcZX9f&8USjA!wYW%Dkwxf+USY@32Gq^?I!=mitD=e&F9xX~^E$y&vfCqI zee7XUy~m}>VV#0MB~}hn!HQeP>HpA%wRmsF&+SZJ*Fk3jQn#)zrvY%mXAE1(lvK$N z5@PM{qKvqZLz~#rx6c?g^>vU9lrC+Bl8uTRRZN*C9oC`T`{b8mec7Vz;u5M^Ov5br z6S|FCT3k*fDxsaNxxIO^Exl?;-JY$`q-&oZz)y|2|6X0U3v1RtF_clgLeKKiBt@Yr z;WKBaX;?18PGxjXd)`UUy0(bD@%9XG+Dmg`nlLsdCXpNSO@=e?mh4kXR;hH`GsvM> zLt(OqpBM_hlO$@xCSrre?y?k$*M2Zn1#&mBQv4Ts&J;=U)t)D=c*bm46<_> z`XDj6^q}oTz7EN|E>6m-eZe)6o`e`0BB@^$S5*+7>9MKY-c9|KKRu9gu28jOQPE(+ zKF|)z8$tDrcFh*5PJ+2-=lDX;6%L+RsTmGJGsYbqxe4UPK&T09pAA9S{p&tbB(v^b z0|!;lN)f=(%l!52(qhRPSu8#o6Vnj)aPlO)7?T` zR)!QAfhqmtyPGyHRNDG72N`e zLenynoj~OU?rv$*@<&xz7Ui~?^nP|{F2hxs`29~U!C9FFBSjICI#8E*4Gw&8@t4rHW#@S*@##6{q3OOtz>ArbK4|6mUXG}=di@V<0CtGfPIsjV=jd`QC%4KJnkuts_tt! zz87kp0zZl719W(VcGO>9jhhYq8ugM^(uD zv1Jpg<_GR2^B^DjxRX`ikuMOU+=M8={X*F%*?#;U{4^U%vH-yWrpI^?fmk)bPTc+o zS_e&8pLiv2V&$X$2A+_#h|hsoZ%Z_sW|5#uv-Ow~FfHHXuU+XC*az_Y5G$KFu)xqp zcAIIyvGj{4OO){E3VUis7Ky#n;?Sh@%b+6)je`f;6K-1w-*wV*XrOEsGPSU@2>?SQ zqtlG9)Z&slAdVXjJWY&OLHHso!^;LZczl$+f=X5aP5|?TteF$f7<}>`mFz2UZsIe@ zD`0cs;HpxL9EB^}2YMUfb0N=>Kr+7w)M6p1gsIzHiv5~4<}d9MR}dL!q8Nl5^9M;% zso_4v(A;qMBdbP*IZK>(@vYly_}InhyDl~Syh)XOOVMLf^aI3f zV5AyBO==)WM_P3qdpHXfM$mvw8M~-N7*G5bMn=v7UM~Tzh%U20FH9~U_BoX$)pk{Q zXS)=BFM*+t2MNUv8RsjqXX@HW{Zqq=%=4`x^Tkon$mZ}B;Q#*<Tb{{i*C5o2Tl(6u-GUv&OA2Y&&z*0nOU_#f8(+W_k7>l<1a0>=~Z$Kro` zln4&!q@}Kvsga?bJ*};Q(Ma5g74XQ4{~1?+;pjXc9R!3e`B0NtW+!u?Dg@nS3d(AG zWSp{GUh!e(^j5hD<62~G>WMDikFe^(E=4AN3X-8OCUpl?94)j>`AZy&^Yv7ODfky= ze)*Y{%}!R7?AqS~XjtJa#%glNol~y3WLiIV&9K>gf$Oi0gn}lB5HP*jcE@2K-Gnw$ zk+VO_igvmCz#X>2QIu5;qgyZE7F7Qs#)mHgT5`zW-Vd)?TB48f+m#%9SN6%~fKJ<# zUk;bfIaTD?{Aw`V_Z_YEw#ritzcu#+6W3FHSBI&-i#6hSowU!VGKVdkE^m9Uv}14B zn-#T=LKb|ZxyFytd$pH@N}?6NuAJIClSv5RAp<|~|8>X!21wuEgC{E*>VPFS49_)f z-NyMo+R9KX8$GvFG)7KFPH-mttP`C|8E~~6gPd4$S^Ko>Il7AGFr#J4xN_85<}r3I z!m$WUIggiJqA`rSXhkSYR*AQ6by^+d4J`D7k;{fCUiPD%)TNHrelg#VjS)8XKXm!i zLwSrU)#x{J7R5J5F8M-f-lG@SL;TRK%b&nVtQ;3uV!2Z;l&8U3qrQv(rard=7P|!& zHx18qHkq)-?Br|7%&nLlr$g550!_YXh@X#$P02H376m*%%z#GuGGep^Q*0EGn^M6Q zEot2s1Izd$L8;tpqa}qk2yK3ItJU&LcVT_EZxfBw26!F%ZmZ>2d3irbWm+E+(%O1R zP*HWMw8TDn^{P3YgBmc-*WKu$(tSY^~U)^B}5|ze3W~-8MW%13@7qFWJXmlxV z>0zbblbULVptzGKC8*xWKW+#@LvMbHhOV+K2(tR%n~NN9)|6I2i|v+TJVoNCz-%5k z##chAUlPQ!7~gwQ=x9_nZipVz7it%^examEf&A0{`-~v2Qg~II3Ie6-{JQN@tQGhV z&=r|*&y!V#ocpa)ONBx)D_p{~hPXXs{(Of>@zY3i8y;QCHA6gv*1ZIEyTo_O%P5MH zzK$GApXF<+0p_3K6B}H2CFxU{QIYt`0_Buf+P|4Oi~I!Kc|%`?+m1A^eN@iuEyN(?lZ(i@$0fJ9sUXO- zVAAKzDSkvTaU%cN*bS}ujQI>V@u`*|6P}5xrb(e)@x)dxZvd(19p2|U#Yt|ew48c& zq) z!+7kzxR9V*Vv|pTfBC8aCoI@2jzI!PJ)4ks)KD*5OfcGguJFDhk=?%hE+alVZ?PLe< z^9yBm4x=F0aL#4$L)bgM1FQdF%)*a+m7Mk_BCy7oV-*BOsrLhxb*&N#9WQ@={?msN zofG`BJTopH79+cIe9HaG1loow*>VlhLD0i4o$?IZskgepj)USMaRoiO7N}8vU24r2 z4aAJ-BkVj2xpjBnJfv@NTiD%lgOnGZ-V}jhYo6>OT47Q%+EReP&q`l?=_;sU)HK0( ze7PCRpgl%7LOOPn2>2CB`uoov%#>P01|QZT2kX4pllB0@b}3XhvPCZz$xeEKKs??b(d#eCi?8)i1)jnkjZdo<^?}jQXH` zd?-F1zg)g6jVwJ+c8L~%$xTK=y>y3^?^ev1E2>BzkMT{9B=}(=v}XujbkEMdY#0A~ zJe_7xJRGTgY5JFw&~eHe>Rs6Yp&wEs2p%z`$Ln6{H+|0~{S)~=v8fWO#BZnbx96x# za&zfR-5zI-rnNTAG3g>3iUf)ue@r1b>~DlmqW~84)d~UZl)auzhPDji$Yn_g**bYL zu)^&f$?|;|7vzx13Pu+2#M8C>Nk!V|di16dJi+5o3~9(Y0N9I3s0&+>V4u3&5u@w9 zT747v&-8w9M(5@=-+ zXw5**xuLc*^mk?^>>YRg_7$6`+{oXAd-#6Wv_I1(1NvkSyN2k6oyL@RED3xvlkp?4 z!cOmr(a6yRSOc5Ks~-SYwuimOHF0k|#w5dKZIbSfJBsHS=nZg$@4h8NjF5E`!r>j+k~5hG$>75`DOSRnu5JY5i)|jc z+iB9!Y^vhZYT?8-v%rh?Bh3||GiHzGw-jL$EDJapHZ}aCLR1>Xs!Dfz4m{YmQk`+1 z>(;Juo^;han#I-YD3nz0TUU!HOWmPPi=lN~W<#CNJLFYvvu2hUF1cdPl`_%v zem&WoS={2))+iCYsA4^l!J5+3x*GjVW@>k{ODM!Yo3?>S7ANMKt`yqSF2w!W{_^`u zgCRdpLC@luZ;YWg7VPw{;ux~X*J*vw*EAj9+=9WyJVNrUphI;U<+m<29@rL~TlO8w zP=lrvI_2{6d6@BbogQ$KGT|Ae4O`r zO0czh`Mf^8i`yG}ay4cNLq@r%njcM{kMURtJZuT(J@9n7>dJd~wO7Z!&4}IjSyph_ z%bvZ+A(X_ZI7UC+J1UQT*c!2eiB()z?V%e86A=iLxg=Y4pJqwkGSLx~@X&iEJWIfT za^`5HvB}y1VHx@AAMu^lpg{W0-`T6<_8MlfS%%~c@j!wPRi&nxe@z#4=9*{ak`=Rk z;8|_5Tm9YY$7=&O_nOwLuj;$^tu}b1m` z1Jv^z8VtBO6%~Nn;wny>Z#K9H+bNs6@|bBwRv(C`(B_H-%jrbs zma>t2nHTvW&WJDkE}rp+{lGkq8OeF>6L9AHM!Z%xR`?R8WGA$u)?Jd?qS1dfr)FZ{ zcX*Mx$VP+kO_2i6e^4ijjyF>4dz17)E$3HpbRtNl72%i7V6$!RWP*r>8lAjOvHOOK zKF;O*N8s5w@+QDqtIO5m32X7UYGeP5ttQNtrTDJmuegXYhiD~C`taA_LTI$ptUN+r zHvt|IWiu?^B~OQJKfua2nR*-`PuX{>?hZYi{d#SV!YucQ< zLW3D~ei{pSpO$%=!gx^54Z!tZQnIc>U@iVgkFI{9j_69=+_Lq=BB13qX9*kZFf5&4@USTL* z1KiJv^1R!=64cfXEle(PJDwvcT^eAQ8EDgOr8GMCY3mdhb(kW55BT+1{$8R^KYS?5IsndkI zoW=)0@szr1n!sWBF2I}4X%kM?B_L$PrX%vjH}Oh@_fGt+{d|i}M4zjsmK=u5m{l

VDxC-jetz9nX=8k z=Ue%Qgey$Kp)2~i;*Qh(0k+q)?`yNP6~XG+_r$K1%o1Ab=I%a-SzRR~vR?8hAlgP( zNhHXrt4?bP+V>`Ryq6Z4Bwgq=yzsl2P(g+%(EL*xyHYV#;k7Pl4y$PPVyS!s@&{4y z*+*z^IJDpz?RrP8&`0r5yN%|H1OmFFi%R6860&@2fGQ@6A2=*0OoQLDpyo*0n?&5t z5{v$`jmIZihpq%}My&tmW&{?v8Tr>i=i_uQJK~0ky|)gPsyZ=l!@W=xoQQrz_SAy3 z-g1fbll$XsT9u+{1&x@=%a0+Hv5R$AhZS!0@gizs7H|x+<5feeGqF>j-h;bOz`Kv% zpLo4myIdT;xYub>SFE-M5??*eJRjP#t81ipxV+4DxU+{;yj?lHoiq&B)bt%Z8`N{P zHKl1=k`@e?z`wnOmQ~D>J*==+0F+h8h7Vk;c$!kQM(4;Qp3Tf1CfgqK9?EYM zb}mPboKi<_1~00xwy7e&m0anYhjWg;t!jDHPkT-w9)%4q#KO9{aIa9WYArua64GL* zytTe8A`9LNG?_WNa&;sPCzeHRKt(ppX^pZfSUzko zTE)z!64Fi2%xf&yt^V|yH;LF;EI&DPz50oowKenX+O)VlJ7D(6|0HEg!}|R+aYUJI zDs}5v3%!k3MP>Zyij3gppyfbZQ)#5{?$G5?`7J&rtYub(H4(5!b#o%aIu%90lbb{UoE*(C*e?E)vzI{a;*zUf3xP+rr9n#5Y~6JJwrWh=s5^7mH^#VEpKWYa7rBx zD?k@qUD0x_I+j~h#^THzr#V^B^Q<~yX@Ky!*8es$p8BtYKBM;y zz=p$=XYZDX)oZmSZi-~t^Q-!`$c9UBdt+0pYg^rMJ6qE(Etj?>#??(p&CcM`jQiu* z;1c_t(JSA>;R#uNqL=DVq3Qi#&ZPrGg;!~t464+Zq9LOw>b~mg4H*@dyc}0`?i)#! zVMIij{tA1Egtb`AwGqs{Gu0nIXvmE1#aUboJS<%A>RqjW=U|P0nOe~E@L+hUqLFzr-4w3g z<7v_uy^o_+jvHC>{r|}N>Y%pXpxq?66f5rTF2UWkXmBa+#amp9OK>e7++7NV;_j}+ zwWY;rDL4In-^{%;_rG)IWH$Rg`>dSJzHe+r+uz*YKMvIvn!nMxCZ2*1*3r=52;%lHOXJ)Bc?dcN(6dJD+<^8rWt@h&vL? zj;l{^yHW0g-=}JC=qC)!z_;J%AIDW+iXUumR;K?hm>YeHYUAiNHJkjNfby7tCW^jL zUA~inb8h)p)?Zo~p(vDW^ya4EnC)kWJYgoe=dipeDun@1ZH!&d@gxgkJh zo@f3@vc)mzkM7Ka9bu4fm8`^4!FF@W;|5|nD8xMfZ2yh7q-2x1`7Pgz=g$?zC*1)d z3aEMhM&dDHu&-;n*n}6efUxoQ)qAm;ahJZfjvRQ4bt;+X^yb%6_!mWk#}@NHd>Lag z1^cOM@BaGyVB#}Yo5|ESA$Z5dm|Iqs5Uir^TEBI5C|dU{s3qX%sO)q#a^JyryfZJ> zQ~L)8t70QzA+GS*RxjUy;fN?IxHKu?3ri-@>-jY5rm;>k@G?}8@OFRWo$DdD*K;Q2 zM&JRrb;z#=9dVbSe~aM^PVzcDp3UD_^6-8H;C_BN$nV{~TWifs8@&n&{N+HR@aJ%@ z_oC!hR$cjzznGD!Kta7kj+uS$dv~M4z*_nCny=^AE43LfdGBaqj$qA0Lqk182Z;}4 zWvTBcmiEk5`$wp@cc0JmE4l)fQYM(cxD9;HFYvIr+Oi1?21}Zv^ZyCz4TJ=;8k^Sd z1nJhc?|fa#h`Z5k^**amAxSETh}aO^=CqUC{<*(mGSPh7{TOiFy<^L+b?3Zk!asOx zExv=A(7{0XL*m-GKWJj;=JJna;0D<*0g!3k(eWR&>ajm4{cA|Z!53B7adB&>UINBD zxY>_WM>BU{P5O7#CI2LM-z|4-h#}2vlGg30*B@$l4%Iz8n~c2VxVg(`PM*6M_xuhi z)7fn6@>l=jTlZt;&H5u|VM*FAMC30WFlDQI`M+5FirZ_K7?R+KcYK#~8BULPFYZ=T zJ0Ds%*{6f@+|+kWnti62CYr|_{k$d2cebzRetbLlzM|j$WqrDOc)Ry+RCo1@+xuH8 z=i{a6CG$Vf;-rpEI6I2(Hv6O%frwKKf%`%}SEmeMexIH(meicwJ+o=1Q?5Fvc zqZ})9HOC*#Uz(3^lEPTYn{K!Y$-BPFavb*cj=Nibel>6Rt(f42I@wWA}bSn z!k)wiN;n(O&m+r?KRMf4PQhIm_+tZIwx5G8OA)TO8JZA$!nyBmr^ zuZ8RVnp8u6Q@TpY4eW+3=T8Z}ETT3=<==C52*(#pK2l?jwX{6LQh4D>qq{01T?cWLyB ziPj5BYbi`i&&KH(KU~qen&U_-!=(M5mR6UMb{X-9_|M?!uNx6Qdu0DH;iI zcZ}s~;>_}v`=q7lz0d&{F6jxU9yrK2y#W1~-60yJ@dqOj0LmTcmT zBA&Z=n3*!RRl{ii%CWxD9VDtk^;PM&ibj(H$DQQxPfV}s%IRO~s>;i}BalwTd>Bl& zb{li(k!-B0rTwNMFJrDhXi1N+eiI`nzxeHs8!H}`j(py?lUc?@8_>tn+(V}eR<&Cj z0yHLV-(uP#HF=y_IVTRh-(_cK*44*xe?hFpoo&8oI8|IvNc73@&2xAdL{9r1u6u@zlGsrLk$qV2c;ypFz3X-I;=9n{?9n+x z9D;9@)U?G^GAOT;;jq&JUv*G#%_-FDC&vQSMUux~6DWUV4HFyA-nH@qDwEANES;3e zds#jG6>rqT<$kuI2i=QOkzrz7WaMCD7%6{yA;s{5iP*Kg`!e z>A@CDOBrOKn&`w$Bd3kQo4bUE$IYaE8Iz{Okg-VPzoLBPBSwF_h}z9rLZ{$mQ(w4M zngNr!eCWbij<;qVN%L$vS8?EL~v;Cm&wMkHiB1`^lngbV4x5ZuTAf>VsXD&*E%#B~! zAiXuD8K}{eYi?qXuAZV~g*J{ksd7O|UcY9TFnVSC@h^Y%$S!+XR#+TTx)~L9DApu; zk3A%9$^0F{x&_OJBEu#8d8nmFmQZzL@)o9*fzoVwh5jLW)-^;O(f*@aiww>DnXs6w z>uODumU>_G+{vKaF=Ppq!E z--M1yS7`K2su4nk^P1M+Dr@nU*a!1FgC(sAn{@*L`q=VrzXEp+qqXL%!=|N^Wj84z z&I{%bF*!F;#^E->;9nu%*@bQ+V=c~KGt3ql7cp(eN@%Ak2VmXgek8w*{{?mQ$O@?b znQv;>Gy4B5McqJ&3pOYP9T4LhN8yp+}lvk8rHzT@j_JCf#4n)-{wnZ)> z%3J7ffCwrAYc6Y_QFssKMkvMNr`+whdTVHP8-hl`C|7D29wrRfFs zqY$BL{Ox?h^5~wZf75W`N?dHE!eTgl-pSp7`QUBId*c4?!RZX9OZa}U1kR-p4|iY{ zZsROfOoFtB&gccgO7BW>lBJ%hu811@O_=4681(4suxXJS`zhk(L~NYIbWP-d@X8Uy ze#`l(Hk)ogPu8#znIo$r9B4xR2+>$BHDBvl;DDuoh~b>T9x10~u3W7oZEi=u+oGDu z^|rm8{ZK_))hZT^lRs|Ly)MHCpMG&^4@4hCuUv684MN0Z8@nMgEb5!?0*et==g?Av zZ`LkH)-E(mxgVUR-!bQ)GCGc^!67L<$>G+8pC1_v0k7eQ(bGor7~L)+7%#1)VX-;+ zH#V87CyU39`MC`vtI)aWUI6i$_uQ1aWFa=P({gu@i-{`y1P~e{JzU#mTU+?7ZSx$1 zT1Y;(NYB7Hq$DmljZDe8$GQGFdrhuy0H-l^;!9Bi8>bI+HzaQicHTFz; z^L4jZnK%Sn(eJ7oo7*Hsb4g1PJ`d)R`#L&pjXalUQ^l&FJ?qH*wiE zahcGgg~i~V3{=?9!x5lbqrk+{MS^CWP}2G=Y;^ZiXwe$wqs=DrZ85g7US=AWqLaUS zOMA**^nP%fSJJc=xpY3*KZLpVfs_-dWRyl7T&2U_Xec24zp~w1E?f+9x#hSj0yVpt zo%uz?=LohT1NJWF@sLhEzpT^_k=2?lj9jE7pe!IQHT@>dm5t|HFwAPHEJh%DXHT?qn60lji}JW(ILYod^(2K_bCYq<&0e# zomTnfa_V=xR*DFpETWQz#2*aSd{b~JJWv(s45s=K5UmcIOWZ{F4nyS<&uE7ni?GlM z<4EF$XxSmQS~g69Ub|^dG?sV@it8nbFQ|s<3k_1MLoC|~cDO|25!QTI3Mq6H zt5HR~$g5*C9&q%dyClA;NZK3J4Gn@FU zdS8i`rv9;xvFj*gX;%M_ur+iD3q7t!%iw0%69w>>>?USv$d#E#SQj(HTAUq3YH=?| z#j|X`RGWp097hs9H2=A=&k9}8=@(Y~(*boG=@vxtaQGPg@;w{lv+(NAgZ2?Dl(-(@ z)!n@E=&4X2jW6kgx>AA#IH?F9E&2h!eZ=fPDC222Ow^lwHNZzDr}$aK=V7x*i29Rk!#(zn*2)tg>W2kN*ibJHyU;ruwO1bbpPovMc*s=L zuy8`s6a2OxBk*_>i1OA4ntSuDX#cR;{w2V8)Tg|NKfe0(vfRB9@bke|&FkZc<3sg% z(wD?zO{tZBb5rv_decNh7PX%2K2H>m&R}Rt$AFx9Cz;4vYdVNg^EHAcL{&Mju%5G| zzK&b4MyMReIyWP=*|Sx9&nKRVi-w_#GG=L2f2AOhV(RTsU3hO-?@4tWZ@f{17;EK< zy9rX_JsO3u)P?svz1p=2%U-#{;;#XC6lB1?TC%xXA~*`aO=}mOz+6M5)`ynvZ_=jlw!>M@<=^As zu&uXmVH%8;QCj%`?fBlg-s+-IQ+2Kq749uAY#^nCRw8ThZ6TK4;$!^In+g>k1hSfY z>J!rt`$f@{wHoemW)A~U=fS9g+Ayo&bF0r%gnX7kX10O;=-@cL2h?*@2tV%Zb6b$U z*+wP~q3Pmj6%V|d^ZJ}NDtFYClNi;}4_bSMp^6ggEnZ~cM9ugHN87_pTxgB-VCnU^ zw%TA*!6AzhEyPPh@-?Zd;21uQ3$faPrM#lF$M>`3HowQzJnAM+x~4jui<_*5RDQe&p=I^c{JgZa=^Hr+??bZMO$kA)3>!z~ zSChxr^V;JhVQ*)s6)-(IqCBP=8d>v>SV7v<3RfP`%vZ|3dAe5m99p5Tb(d{)mkElS z`qwScm^E7a6hD`Qk zzu8%YWR*7%RvjB2*R?eXovBs#qK%^*%T;S4b1iW@P^1`{`znWWrq0wGQTfk#`OjmE z3{mIku=(eN4*LZXmQF_T%bi{kWLUR1%~RzJna#3e0j9dJ_|N@hnj`3=Kt>J(j(TrP z`XQ;RF@|pWq5y-pSXw>ukebN2@D2qARpZ@;$lql_cGw$w3k2WnOpokLc_i6TN<|Bm z&bEr3`9%C6*_|yQt5&`Azc)4Bpz=XtsV3qIJ9?8%Nw=U1Rtmr6+fuupi!Q-oUpfc$ zCAP!24HP4FX`j;L*QXIk*7DO@u1zfkOmGHy6(x*PJBo-~W!Zd|bmfJNGKG!wbJUi@ ztStdeVm_?R8+O%mr?DFNrUHq}x5xH0$ZI3O#38YY8E3eaCRL(J;QU*M7GGo>?v2UR zlKNX#7kR<(OKg(Fr97I5sBX%C#Vl!d(XO@A4l|3tPD}h?rT5NJ9%_`CY@{Et_%%7b zd9FXS<^yZ)dn%$EbK~x!-TKN?sJ52Kt0Imw9u3=%5S=VNC zspKAFN4^>Um_^^71vYIZPEglVLRtS#{ivb~CQi7BEqt#-+^u5rkQlR5mBF~>rOf4G`&Ya^DgiJqIoCMyHob9|et2Ry!} z_mBz2GW}f}%H2LcmkIk6aFBeWJVs-Zf~BD}mL?SYjLxNS z7+h6bhpMWY4TI5UWGJEu57P)MI~Pu6roxK|wl1Es;XJ)+wm_}*v4PXqtu>gu!SkUG zl}u`)U5}~S89vU}OvTuQI?w~6reK9H6{hTlfb zI8RH@VUv=9u59#cZgfmXDTKulg$!OLx10MDo2U59CuwA3hL>wb!C?99mnT>F zthNq{QRRRS!WIK0sFaIj6+=}yb*L(;BrupvTE>Tne(TrWFbTuUh;)Ml4Slbk=Havt z|E*A2GQ)=#Q%#j3DUUgM@{@RF3a-%0^WB#$yQrCT);Y{8FuYPg8eTPb}H=@2B z#)ii%8j)*tY81s8et4}V8+B?!a+jlRY^qG2ER7Q>mRH*>$2i8#j_b5)95!Vcu~|mA zoy-dmcr~N0>`SWX1Qv0bW3l=;z2+GABW_89lF?A>O*3Y)&CvTkqn+0^6s8#j=6O{< z&W4bK<^;PQz9t*PU-Q**x?EeOL~YA*X@o^y53MeK8fN4$i&|Y4`dpiy(HnhPYRpke0q`cz=~ z$jEmMVc;^^JD68(J#Z(~E$|KMduNCHKzEIe%s1U!1bZ?{X`VG7dcJm&{~nvfbV?5pKH)RO+I(3u-;$&3(2+%|rGn z{{$4ReXvU+z82nj>8bY3I?EG1Fsk!bJbUBkC!G`3q7C0Fm3YwXY9?E$AHCl`0X=jHcTaGKN$AomoyrBDo42-etk7cG{iHBdz5H2gkIkfq7` zyimT-Ub(GSquDB<1D9Oo5-FO*sZ6SzxP*>8M!_?nIvT@iT`%R~dT5PZ0~Uz_)2mw7 z%7c2M_GkyI4g%ySLT=-O zn6+==Z=pHMb~($L^DSCAUYI-{S5E0Q{&1En&m~;Uo>hy zOPMcn2z-+R2~j2sWgnSTm`AN5`mpN8tj>bus94Ox+eU}%b1xL3t051sgy`j|jS6Ut z%+cozvkwpcZ7CNBS<(OVK^=^S*qOY^DJyUwG#&df-5 z43e)_?dd@H)~-0sbEh$!w%1F2YYiwuxz2l|bMmn~MmB%DoFV3#BD>BfXwyxc0MWB~ zB-DV0LTvv@%>ZNONIP>g&B3KzLfkQred^r!pqjVcjtfoFA#2dkHBID6kdra}$aiI0 z`b)yE&H`Y0G!Zkf?!L_cFW` z-k-H&x0vW*Taz;+&% zOa^jOTZYO;#>+-FZeW;bPTNTBp+>c~G_NS`uBiQ#3B@4KPq&TKl4Q+7D3%o)r`p!;`13gVR~tDIC&GGFz$ zuU(WQsv!q9Nw6d}P(pOzV~;l28QgM-lwqw3JSh|c<|6!T^{+d|@%GjT zjlB*@&^~?2I}_OnW98GSN!D@=)=%}fnld8s&L>l&M`N!sM>RwQ^Q|p`5xiNE3&pXrs2@!BkRyfYVnH*eR(X80XX$@{SRZdyR)0eDBGO-Fr@w;LIIHA; zS_t8kVqd);w=Gd^E)|2=DrUUk>8*vQQ!F5=M5VuaHe4IlYotDw*Oa3yO2hfuP~{Qw z**SGy+N0HvQkL|yVgXt)DF<0Z#;fd%S3@l|Omwj+A$l+iEcV>3ot zF~4V<_6<*BC``b2BR08I{QUw&!NnCH8Bp}nKZH!y6gt*S+kcA52$04@Xsd#;YGtiu zWv>|wPpYIPq8ZQ$p=*qrF4Qia$jPq=I3ecOR$PQnCZZ_DP*{WS6m75H^vRuwsbpx` z?^CU{IvLab8LEB0;K zCRLqn4v+h%hOq?6?ssiB*47bukiMhkZ!8jf9Rj`?&Y@V>r`3?R4o%4gGbXOWi~D{% zt6@7U9tpBTi)?#lVb_BvWXgpTd%?qE*^dbAZC{(PB}DT%BpSnKhQ+Vez?i>qQ^=J^ z-Fw?)F{{~8l;*(sQPu~R?r30x63L2`%c@#N#c%djNA^}Min+yn5&a;fyV0fWkX2_d zHtF&7DAJT|)(HvnOM5Wg2$llbeZT#NrjmxOojJSkeAzAu!~hElo63bBj`vnEan(hu zJBZ+(UQyb?O2NU(VSQ~i&npfGkNFMlxI&3)PI1kXK|*PQKeUBTB?nKG-eohY)P;`b zyA$pw?~^&!1!8PAHk8o?uy1;NJ23BxZF#Mt{AQ>rrv+7;j8B_PHQDGJCKQ1W8Y5RS zGm7tH{ptgn`<(aOJOWDrZl*YN{kQV=&8>t~O8cs`GOX2Ln$p|GLFW9WA(HF;hWvq+Q!8-mDs{ImXfwg%+^DNGiBm3IlM{c*Os=; zb2fAE;juQEi-z+~%T}^C?wI?-yVoC*({O7z z$`N4B3=wFX{q%fj$sSa4@LEv^v00s@Ssj+e!C)0iCPO_ITS&<@I6Q24$c_3vkW4T`Ra4+)2F)LI3T=s@xA z6=mDu^$gL|aRpyxK8x@Erx9WUt(s{Ya)=gIYK=fYf?T41o)Z?;a(P;f zU?#1-%ecJ@tGpA1pd$*dX0b)Nt?r<+S4#21HAQZg3cU?>M+Siy;##DPWe45IcA0fyFJLT6e^;7b_4JqjD{Chm}vWoEBoKI!UQIjGCj^d%Kj_ z5R#&e)4cY8FTX;mEdT{y0_QCyp@b*^2OX4B^&Tms-8*L3d*lRpEi7mO!7yd? zYGA7bi7SPp-OSR&YFb3TdUn1#?55_>BHvotAOt1QLm5m~P<&!5<5($drYwG@OgkV$ktp$q;TD*pQx*WeHq8g%e*JnqA9G_B~L5uYK z*9}}f1M|9CTYOz0(4_OF{~kVoz%Ne}fmO|d52(UW28LML zE9dbmXV#B){V3FTpT&nzpL8cZw(m~x|K+vpL?ijP0&B9`>@WOm30V zH}Z3D`5B!j9YWpwml0LbNwIHke2cueMXrHdVs((R(ECZv@8a)rDxuKWZ6*vBT}l99 z^r)c>4P-8utyav&rDfe5(t|~z*FHdu{F+3`pP>+fl3@wY=rwB+UK&&}fBl@~*ksSK)KL$fJ!$zr)&7BSu8!=b6eCA!Rf z&bPnOn*Wt2ITN&||oV|bStInNhzt;0V^=ry8pDE9MyqI#*4S?~b|LBKR z-CGctE4Tg#y%@k|C*7cYT<N`_sK^jcgpU!pKmgiFH4dD^yPV};N_3x%hM%O@KeesHCMV9_uj|&66VL? z6!Yw>`huY4z?X!_n#bp6#t>a)}*<+R+( z=U=7H5EH|D|2)MWPm<10GRm5s&hPsG2Fy&j-p@>&BrBcbwio4g7v<&_#Gd5Tb?|{S+7?#RX!`J*t6_2v}f_s?nhVRyozr-WE+n~A& zANAdqOcQ(XyjxtU;Gq?=P3{W)`M3Lir`P{wjH~ze+4FnX3EBEs={NHp)DEXgMx-ej zOv)2)YVEy1+7;9ZZ&ONH_zp{Z>OJEX5DrMZQy0Ta+s6F zl=DJpxyv`~gMZfZs7^r_t6Sz%)3E6e6H7OtX$AC$1?$smXv$j3q)f_Hfgkar3ON{S z)h+Y21eL{=9CJ60sOlq2c@^esp83$lb4H48fmK@dIvpkrjL#Qa-I77~hutqebQSsz zFs2G}R8|UU7)^y38Y>3snGUQGH!ZEuA)}CGsAk2T@AMc>@4RnSXzPiS^&B7lZPo4#eeKR2CG1v_L$!V9XGEp#mv{Lu zpN1=SHg+Re}V&Stj+6P5a`7}zBkYH2rR`4{S z)Tl(KJI$MJJ;bMGe&bsNZ6y8CZwmgisY1^lA2FE@X?-x1|;e>NJk z8CfpD!0{)cZUakgdWCfKp}4VrTU})xhvb>cK|=IUk!l`4fT<{EE68d&?FA^U4Oxu` zc{akROY=O`ClFB1=}M*RQ_~kTrTdNK+`_B$acxnSN?^GD>U3@S3Y&5X9es;#m8Yvv z(s15%+xtx`_AW(yp$y9l_q+J6?6a@B#${xgM%?ad;guyc?wABs#7&PuX{5I*8{ZK$ z11Vmdkt@y8vnaHaELAV2GFGcN+&zu=K%GOC4OS73x)m8~v>`qp87C{#Cf8Q$&vAWp zmZGZKVqE)Swj6pRn?*S-Fg2QpG@OJ_4h340B6_Lrl{w^@RYBeb6oyH^G%qfTR?lSJ zucP)_qz)hoyeWFOMM9_8OCOK-Y1h88AWAv?&6pMu-Lf!6f^1Y~B5n9_7>uM9%fJ@j z7|AdTXGO^;kWN^c!;p8q=q7GiB}6MI4`Hhts*OvTdq-SDNTAd=z_C%?+swV1K72z~ zU%84p-TqB*qry~uqHCEEa`xrW2n=0^+%wAny9Vku^#Kt7Dl{LisLqaoe)T((%xc&F zH09N~a4B0oQ**!0-UAhlRmNM(Rgy1 zYD+X!SAz=UDWS#8`r&md^b9{+gntZ)`$T_R{rjTA4?oxP9e4=h5}Uh$Pc*8oVM9-+ zx8$nQ?4wh~U3;44a|pA)p!gNtm2~u5YUwVWeO1P}NQ65TOJRhrjzBIFos26IMnjRp4| z7kk_f!y(4#jeYwrKR#1l=hH9$NdS3NtdVatT*dp1#r@@@pe}Pef_SZMQ3SRmYQLj^ zEsLn+dZ!T@C~63h(@pD&!eweva~ja<9>{eOOWgQNS@raDc&kWamBXrj`ioKRLrtz#T`cMz}?#1lrThDeZj08W5{GdPE zKNRbGTu}q-!jG0`@AAi8*56(Wlbrcj;3odmSuX9pa0Uyfl zq#+fj>9CAOI|#t$TUb7NZ?Oq*9E;prdy*#IH?5#8wCNCHd{1%U5w}%|X+Y#mU$`xB zR!0#yTc6k#G!M9YWcu>w`}2x<@9)3wgY;9M^jv)`#K*NOGhy0nsxb8o6%1>Z9hup+ z%iAhdn4u}_NfnVtbP5X+cYA}n7gtE&sN<%G*_kQ~*PONYl~xjt0<$)gz163mxCfno zpXqx)c3okf)}K4Rcl%hNGu$9xK-$7bb1QOUIzPaD`c5VbDbhTFp*gKQ`cBr&V9~NU{N%w#OmN&8cMA-SyWH$8v zdEs=TkIva;+2XpoLv!vtsQVwc2eaXK>#pN6DIJUStkA4_ttpTFNff&_az>8U-1+D& z{99zBF<8(%i4G4Jr_*e@{Skz6ZSJCi0A|LfQVU4~*D`a=#^zOVDCAV?r`h%tYZgdH z#}m96O`B|=T?pHFcFi-4%$}P!VB)5{mz%43#C22E#s`v{chyjOk2l(lv=nF$|A!?U$wOtPRx zHKax!8{T{%@n*>BJiHOjp`gfF6y>kOIJ-8Z z)1uee0c=@&;-ya8htuEDHwX1#K$_ty*=fA6k3ZNZ9;@X^5wxP`;#LU2K(yo5v7(j2J9bX}o2gjh%)Hj$^)K+`z(O@96K9UmJDjJ=De|Iioi(g#IMHq&rdJ_EbH7 z&n@al(0R}5ajthTRXwWu8GYugk7RPOdUU}Isa%dnfre$RVqC2FdvP}Qlrstn;msIa zuM<6Pe2+c{Fqg-<9GMZ-Qr&vta7$DG@0 ziU(FVK~2n+7of1k?^%?6$u_2H8Y#B?8#W!Wp7@%Mj`14J8IJkX=C4PJsYG6>?=~Tu z;{e}O;I9_UT_Z{%tLz+r?>eHz_$139aYchSq*kjv^KgK7BR)G5{$vtyH;m^{JM&K1 z_S#oIp$li?58e~SL?WZqclD4>_y_plg3xwNsDw(gV*$PpL{uY`8fQY<8j|wIlZ#lH z-2}b~=4SOA-%2#-$WtEo@+tI2$oCV;kdfMQ1>3htj*^{k7ZsobGo;a4bf_Q!b-gCg z1dhCz4gvL>YJhx*LFsfo1LE(|dT595(O15WICJPDoKhD&cr2DvMo?H@jsX=t-=3BN)y>EZfaE6@~SgKOhe?EVOA=)sd^2?+45?}l0e`dsl z!b_8OUa4*Ckk5s2AKzueUB?ljFMud6rNn~4OFcH>>6GuREE{9W*-OaxTR4AsBPStJ zh~->{AvODQi3F)PLtZEBR(yK50zs-CL8=;|uMD9tmXokfFN`m*5*yGgAr)UKHy0S9 z*^J}_s?`#lHgU6^0RGaZ01W3C;8-pPd=!d^U+y*#I8zbwoH%WD;?w!2}R(= z*q{t-D3lF(m;5hUA@#KoLD3fI6mb~$6F@T+WOkfF%^W_2n<6WF&XX9-Ea?%r7|~C` z%Z3s?HGiK}gsxTpLFEUW^#{)S0ULb(M^*w{7AtaJ3o&3_zZ~z?*)dIj*WO~oe}Qi* z2jl5mn;01Zg5}%s&gnQ{{QFDE;ySOZNv_(RpBK!?Qe4PVY{-3#$bBH5zIGKjs|XM* zAec(fb7Sdf25IhYEP=D$(|>t_2wSxWKzs{o855DS=HHnxv#PU3dSJK}&7!CUoYVsB zYXPe@00Rr8hh@}hi!um6!x;EAg?nj5$!M<`BvNC)=dtjQ>5ngAXU?k4{gl8;EcDXo z%0%q_!^xQnwx5$6)o_*Uk)+-sNjV|)*&_9U*!tR){t+GIwu~MPl~JM@(kgHFiT z6qRSx1Sh^%U3e3l+W(@J3gAu!(53=#RRPr5+xVIt2!w3}k+mKbC zQ|?4Cwb}GYF_{Pk+<^gYV1O$az}Wx{h(loFmu&^Q^~wV({misYbCWxsAmWI`Zrm1!~cT{UjH_z8)YSQ0+h(l_Tu@5*`05d;WZ5Rwdx5xZFK=wy8)|R zfR0W;N1R)Kc&&~bLo5~M16KV29lijeOCUUJO&gSv0;U}SrN_JU78K9g*-jFe0ryK@ z#qc)=u2mCP2XGni;7NEy3N!nht7eJl|7I;(z#T21jTYca3pXG3zpOO?Pd5B)po#_j z2q8sIpFD^E2?om({}jT--c|oVrdpGPqil^y+vfO0ccJ~9`^V#nVm6TxaK{K}V+6P| z0yw#W|B{20&g+mebwDMZ#M=Ytlwid9g8zRk_eA(#SqW`M2_QK<8D`6X=(dJ`kk|f~ zvn=XC7S*8LGSDuF;GYji^63HXAxKbsl|W3omCja=sm}+P_X;7{v(D8NCXS%lXtY8M zj!I#rF{uidDQDE}uXTw0?UTJIq!gxrGgE-0DPY|MV6+Q*AVi-IDFXwh)&XhKmXUeH zx8rozQ`8u3)$a-o6|ygH0GzEr`$6Oi>v3=HMVe$)+8(wUbpBV}c(a=xzu}9N0o=*} zT4ew(G5~c9;9po6Uuh+tRI@3FI|`rst-6Q~|H>@?nL-ZA{HRZ4NQMfKpB8D6iTs?u zptYoEd(?81VdraI1b_RVps1D-BB>rCsahhbTq3CjvykAP z!L>T`R~l`!qd)Qkb%Xrv^uZ%7CA3mKv{JRSQn|EJ3$Y==Q@imFj%c&a6Con8R-@|@ zEm(oq1RJE@bW=zUZO$PXjv)SGH0E5|4DFRiZx4Pjo-eW~%a0q?pYNqa9YKpuphZVe zivx(i7VSYF6K;btz!dlupnotM6`kn!V*;snLESAdrxssI)f7nTj+CZeB$q;tpx>~T zXX?yw#W?b!{@HrEgj1KKPH*o%I$jqu93z z+xE+AaLgjZfIv4ask=n&%P5MtyI0(J-~g8}dV>9k17+`KGR z6Jk4U7)I~uc|gklg}x)ymc*v5+*$fl8_?w)^J)6o6}C)E zt>uL_eI;?LBg~^w{}jr5j(PGi-7mL;8wvb+0Q<)wz`GX zn@aIoI_HwjP_xn{8C;6Fh}pzQo28<4xyJljyrj-r`VsCy#S{sl^JEAx0pxL8%rsxC zzHSRQWPSqV@fvPuiQAD>3L3p}XqlGatVxKisHSEHI)aTRG0t-STki4>q+N z<}vm`B!A)5vMIb7mvTrCZDK*N`N+F#!Yw45!-`-7A*nzMU+mNaSONBZk`xX_FK?uC zTwXfa^^nI4d8z#>SZt=H^`GuS-8nGE;gqMjJi$!cCSk7S(*)ZU*0>?(VIbQn>DU!c ziXddkN@b)ep%ZsYiWh&s)SxO|GYc?0!%UO5lM-6uic^h|j&+Z6!05)C-{k#6F`qau zbqWieS3$5zMIQgmziax>c|;K4e?nx>SVxCoF&02GO-)P zWZ;$a{MZ?3db+Im1wUBv|F<78+9? zNDBma3KWMTE$+d!5Tsaf4-Q3wyC=X+-{1ekz3YBDYZi+q&+OT=_jAsfIrINX3&)|W z)xur=O(TZy9F^j?AOkV>=VYFK6;xh+8b}dD&F7xQ@KvNPbx<7i5=9<%-qY_f@_Juc z7D)!b{If}_^;N3%IjZ$hs&%rtAkU8VxDVxcruL|!7|6^4L zD%}-^7U7CutXDCve9jK?PCUPr(xMT{}=$f1#Rf;0e#;jztx(qNw{lC;Pz9 zmZT%o0G@Q5dH;qWzxH46UXLSHrUNyN`P*|GS>>j~qnF-A@0yS_z;{q^3-WUdGIk4k z>lT#m`@9o96loW4yXse^6qTNaT4yE!ar))ry{^Tre4|2MuUd3kY!`nB)4#Z?=i)$? zw>w-1PSBzzu@x-@?R5NKl$*aO&3{qc{-TV>K3^pZB`K`^f*e2OO9e{_L6fy_wShfM z`>ulP@7@@H4oye?3F;VhP@t`E($)!}GE2f7UA0Rlg{u$f7GhubOoL zlve}(53B3H6t}%R(-03iRD#ddLZtqgKQgj+Cjj!?QZUOg#o^s3F;D z)h*%AGlfjbYxj~9Qv#ZS+2%e|!^Q9~5^^Hrxu4*h#avz zr^dAPCiykfp%}!SJIn4PaQ5f7A>ZJ?$PQu(BeW2?4jykX3*TZE z3S!#4#%^WQjjrM3UQD~ua zwq>5?+bQux)}{jVx*6I;X&)4p!VG3|9hHgfFOL7Y;5O0)KVcU7U=||()f1VM*t*FZ zq~x_d7dXPFcQ54|PDL?{qh1)V`R?#awLXC5|KeEZ?0WswBgsr0*?(y=V%j0X%orbV`j%F`M`isXPze!{f!$~;UAQiNjOsYaI;o(G) zPrj_+&NLeGW@#@39oA}Gh=+qlcpz@>u}x*yu-1ys`|HO((&y9v^^g>+kOHfa49kWD z%O+gve?7!O6W}65lZ_*zE8?Sm>yan@cG4uu4z6rd2BZ&tlp4b>W#u!eWgY5Yw}%!* z?WAp45Xx*=9j@O*wNW@AC9lKRu$*A< zhh$>uj28&|8|4IOPYKpFk@T{Pg0_T%#EOI2{{83@VAoPZ`7Au5IvvJD>1cYbN$QfI z#Dy=8@R!~krLEg{Vx_b!&-PfJIj}riV18!EKqQ=wH}*rGp!B^D07Y43{!%+E=I&FL z{EDilxNX!#o{GGcgvq)$}R_~?g^!EQAI$*LMO4|KJf=CzNX#zooEwG zUDFHuI-z>3dC|h1^y8Cm?-cAydkLr&X45UIGgsI%7;(=_=q+lKL%|OZD*n{piTRX} zB?YMej%V`dsonmx%a^OGn=0F-DBT}`9P#6gcSOO2BaS#`u?{oK&)s63y zQk>8Dr33J9!ayVPr=kX2e{V(-0RwE7B$&i#&-VELvW+``A11-=9P%2{0w4u$?@I?nc*CxfU2H5S> z7Qq@~9IpR4T4T;XArEmr4DS5$G>e$O>!G-W{NK8RrE->`j}hiY=0BD`M{O2a@I7zZ zX1bjkEFgb@r4+|P4e4N-E9cptFecN@Os!~AD19mTJ^t9M6!DtzJQk>L4$Cnc<9)n< zbozTJJ)Q6EhI-Gc+;5)*BEaCh$H#-fz=xxi_=8Z|m|TS`%%dk_$MfKFPFwRRU`i&;c}+YgCG2=M@-1YhQka{(&E7z64dg>9E;X4LKg`5=~$NyFzpLq4xj85rr1isl^Yd#oM$dd zi7E4lVGSbpdavL3E_^qV#60l+z8%mBx*x~`JzSMomaUb#c4p-`&KusY={AGY%rRNi_-ZuthH^o#CsUKYNIC84ds! z01NZhd#A1)^tFE978~ChOafm0;D7uJhJH7?^wg=h@%nsueZ0FpJ9+WFpd_{cxM&EC z2T7_PW=S0KseDV}&coO`WqC{Pl?nPYUYi(yWD@XoKc2&`iuw8$`0D+@Zm5;!d;^V` zO*Z6qBNF&UG zIF^~XaYT-1y}s{J9}Q_+%r1X{1gD;@jR1qKT$HN;-PK1mSNSU?z58oCg3eU*wVvRU z(ID_bpqs%OOR*clMTgyq1Zr@3S5qLa@~xC|b^HCj-&3*LPWTH8j#su^%Y2}mtniih z>mD-#`4zuuFO~}Sh1!*0YHP;0ve9|XXWhF6!Z!hrr&sfNb|oT#H&-QVUi6R0bBcya z`)@0s^u_{lHz9A=Ao%`|Kl)WwwE($+qiKO63lVPAt+vL3t|(~)$B0PM#%$f0ef?tr z)^FJ!6D|8h-awLRdiyD_9R>jP7kb5_G)LrCdeQ(7@!hPc@AzJL`8gq$Mxy1@byHJr zP|j)-GK%6Dx7-+74AzJ@u2#xf0K1qfvltfp>(CtL%=eM`E>Qm*{>z(3iJW9DVvQ;# z!x|ZU`^AQL${jMwq49jC_8RB445Y?reb?O6TB?n*IRo7L`Ap}}jlPUt>L*)o|L`7G zR)SdSw|Z2VWaxH&*7c_3c{$U8?YP%lS~puFpv%Zm8Q!WqYT4Udu)WB@AD*{~t$}Yu zN;}h(vG=10}yExz0f>X3Qr#@-I?KZIb5J+*H=eCna`wNKa z6&XT&@BZjaGhO>TX<}NFUlTRX&b?@W=f?@>?p`ac8n(;F*J(OfJ>n=Q2D+Ux>egRZ z@fAEgB_xCfdw*9lW)aVmn&FXqRf=~MHb5*|YLrwW1wHhP?(twVsUP=y-d>FgHTFU$ zGCDOhlmi^uo8$3pC-d`_)1qaf%}QN0(^n)A0g_^GQz~b|%F|f+Q+=!UESr<34E9sn z$MCnUd*A(y!!9#m*o=pFt1lIpy(-EpXx)ST9NMe#9#htUSZ=m^oSA0ac7DF~&S0jR zp~c}WPasa6(_~zFnRp1iPV>lbneTLApI6%7v&@%?E>G-jR{ZF5FLN`jW{s2D9#67u zeIYBO!9v2m#1=Ja)RkX-x+rRJv=(AnfA=jQYnr_WtCcH$w1eOpa5JUE3!TT4L}kivLa_H0!lA?NwR9l&F`@ z2}V+pTHlm(`DA0F->)Jc+sn%dBE95^$`{!jeMF!AZhVp_iN*GL0>JnpLZ1qw%d4LI zk=9{5d=m7X5fr$0@jDEBfCb()Z~wU)oB!i8?l}#7dK@ed=4Fp}N(^DdxL8-vXs6hH zxPQ&%_1M3{STQU|-W>bOly}M!jU-&&ofHhzS5v$!`+v>3aj0_#Qe$V_4vApHJT7UI`d- zH%kzRco6m+|9F&{n(Jzx@y8>gYy^3E)2kyo!uR#)`tWl*oK)K($4WVOQ|7(l`(WbV zxGrscD-sg@m*0lq#Ns^1qAfb{FJ5i)a)=$I2hR$vTg;yly%B&sq(FjL(#jBDZR;^# z-x_fm;qYSXaEQ;qG&eu_7yuoL+LxkB>au+~FQQ8X-ixQ=lcw$X7BEvdm% zwj0zl)!``)izS~ju3yf`V#j18z*FWv=GlzX+emA(kaBLkBi>)nzNwDM$-Uvo4EL;c zi$3(3#Od#GwT~fnHb}3&klY^pm4K~P{zBvR(sg>srz@siHBO<*vl^}1qp0_W@z9jJ zsP~!j&dL{2qotDa8o&K}iiOPQtZ^x^3iZz33!12#`7X$c1RAS1owG7Pn=~21l~_A} zM)Y|PdKN9TQ+Ydg$@3ExmM139S)d}~+LEL2xR&)9n`m7)h9}bm04^NUA5`}E76qCJ zYD%mm7qz~FM^M>yh*MQfQhJX3mqNhFwRzh`A}meepXk?NJzk%(&;`j3 zfmPmLj8&I#kWke^JnZDnl~DB?^XNHmqk>l@g8TkP1!~h&jlB*1P~|}NMr5-|%}Ixu zR86kS-lyP~T0%SF+H7TgFO^7vIm~ujRTF=wFQIg4vP;mrWvzQT;FX1fBCmcyHYb#B ztcz6E5FqW7RAVW3uiVqbgy$tPr2#`;s2oa7<*DT75PrlvOn8l-3#FBRX4fZq;Hi~~ z=T{$mp1okg%eTpt9;*zoq+SfWY47J1{ z_IcBBz#28s*tM4|!sQkNQT~=AMRi3dv#~cHG_5q>YSny{gDZD8$%0IY@4VEu=A#F} zqq2|dbX~a-B8le4!H6wdNo^aho{925EgJ*j^21kezCC^Wek<6-Hdp-uB;kD)`QylY zi@w>#pK65#$-OqgN;UJnIr1K`3KnFH0U;`V+lUzkY&cvac@=r;^g3)Dhq=7zn}_m8 zZLc|8+`$V%$vT#A;V~#0(ENp7S2pWfxBPc6f$6oO>8>_hb5-QhA=y3A?Q%KgU%*MV zNct)FH<5x&wQjxCHs+&yz_hR0D2;DVhBB;gC;TVFV=0wn)HGPaFL-z9M^FAh?_ys` zdOVW(7F&(}N)U@lME-kKK#R)`Qac;iN3X^L7IC?fHD= zhaTrfS7=m!K{ZM31&t&L*?`Glof0<1x2_v5M(ccyc8^>4LsrZtWr_MVB6LowhX#rL ze_CW@UQX#O?Qalr%ZGVp`RwfHz0Yp2PqnB}6u^{nd3q-6Fjah2M-VCq{`h zLRpWPO})2pZvEm8t39Ve?JLWDBUc*1z$*{>-}`dW0b6BOZ_cht#mb;HQvpR*#sYve z8_AI&|BONZU8$XfBecV^jO4os<(-o3L#2kd$*&ivaVadxy>sGnIu$pVntSNI>j+Fg z`U~6r+MnmE9nz}?VUez|Qo4G5n`^Q92QnQXO}r{RzOANtNz zu6>xp%UJPOGvHBe$6s`XziyInWimQgof}*9%-MMWiDF%1CddXNt%W*f7m{A3fCdQA;OaR1iUIe z&^hEzaP#U=Utp^;3NIL{m3y$|@ePman^N43+3e+~$wQE`&&%`~Cu|T;<^OXP$Nyh& zrAO#gp!)CZ!ALts2yvQ%^beWt&ky!9UX0$W%umu=GI9WZVGG^>h&<(Qf$#Xa&Yomq z5nG-dCVWhx_t~bORCr-uN-4^qEj|B0W(K4wN{)Q<&q(y&wMYW~r}tx9?_MVpfC}QU zUqWG)sRp<7;NH@{c7f}9oAc0gE1#AjuU94tnTQdY#>LKb`VV|hQEen7Le&OH$xj=K zWJxCQWCgIY0z~=lteu`DHau7XMwy6bv#U5VJI^@8?JP9sUxsu$Fz3k$8{X}j)sp?) zn;+{#2W!+qtFe&BW(vn_!K>C2HTj(7UHN^Uoe#|JbKxyLyQ@Jvx5VG-McaCZNgwEp zv=U^?`gI~WA3gS_yThC!FYMUea;Tlrn`#i8reo<AZ4ca*8idUD7ox8t2V|)JDyZ(nVu!Mu>hYlI_chEmODsrk(STbqR;RATw`vTa#_0FIT9Zw;-el7kN zk)LOdn7Px4-s!&qB#7+jSEmHY9Xo)NO1f)dzWAuNvnjh{O)!r5 z`94Sj&r<-fH0bYFnxfMZ^}yDW`b8BJ*6%}h$el)zk(wdhNhbL0St}mb^B|GXrq(+~ zy)$oPj%$Qe%6{I%G+OO3q!p?L*K++#z{I;ynM*m^w0o-Qo%c2T&uWK|R?G7Cd7Cb(*V8|Hh)n$NUCZx_kMXe0Fqk#w0OBW4 zkOVf8HcPt*MBi8MBzghuiID4;o;e{oau*f!_OeL-4p3^l_TDFFy_;zsWW|5IsF$rxxgrX`$XtGd-ZEnwM=d9ro-; zaf7AfO%I&ujJw!epx`xe5#!c6;mPZLZz}SG25(`CTqh{MGGtNG^?SG!8T@^N^G&Ik z1GFY4pvcl#4UnEKIVKX2EgW!YnGI|}tzoko+tcPMIZ2jRQgS4ZkIhg(79RHe4<^X7 zf62=PBNgzu_q|QzPM?_{D(`I)DL>T*xMv@A*EZ<$mGx(BCe#?4TED4wG#`SdO%Je? zBzUR;*wg@G?)TQ;QTzmUvLyvQ@0Baj>kFy8XRb?bM2W=S?`g9nTXPX`yjvz6>N(a| z>o-HF(faH3#cSN>JBJ`UNm^uP!23# zV8_x2S%S>MktIljB#%Qt*7W_G$@>}Ag9JB>L+s3p`mrSHg|5QA83j6L$wwj@V~ z0>WriltxGG3QBg6NEZ}q7jtdi2%ns_Uj&C8%s@z zb^NI(R}Vq1F^kGjjXQo@3i7tb)3>OeFtA#O15QtDb*sjS+eRx${)XV#BL0njEiyI> z@*v;htf~xxFVB%Q)T*^)K_DwvEFcdND2oa4h5<1XcS>-AbvTsq8nOK6)Lz>bAT?9S z;y)09-UvfIDTg{#1jries>N~q+&cZpbv%OTlo_xh(2wjqP#w6X|9v9w*Q!XQ62bws z&+0iYwC?4+Es8%Q{|jUJJ?spQ~cJV+ro!5U#7a`z|vr5D=GfRbh(3V07&5RqK^^5%T~_5dJ*vNcbUr z3wTKA{BS@<=S_>7DiVu2OI^6~P}903Z+Ad$C|o=kzWjU{u#5)P7Qnn|C0q7sS9rUs z_mjEXa~@d*DkIlFt1DZq;ab%d=?Z5j?LA~!(WpeWaYGC2q;|YRu7G!JtwKJGaCPBt~H6a=eA0@XuC3aw$pz0K4_o+GX)gMrMhbVjI`M!*O`&24K zZUvtTm&(&!KPrzv2EhsXn8y5b--jX+msG^3VpC33s5-`c4@$lXW9c9FjLTz-%k#59 zgnobKC$mEdY_>w^(UzL8V`Ws)#}XIl(U=$*<@Vj;GUws&%qD!>+?jQUR4j+2qk8Jc z^hs@Eg;+K}Ehj0cL{rd96i4sNua$FoXw!?S!`353h7F*!h~Nk@~GV@e{vYHz|< zlm;SFPq}E9tPsC`-+qoiy9hh;6zd1JG1ahHtDUL5?lTYP&?Gt?M(>GC(^E8Ivxaei zUgdJmN7mZ97X(DCs0~_0zFJQG3kxNMaOy5SRA!AolU6->>CHC(`=cB}d9aBT$jbrB z)kcVGB9=q{0+vaTB!S%)rEV%TPIdjUVdL{?puG5T5O(;cAIw5AJlK*SsJY>X?oAw~ zziUw0G2cxcP;IRs0+Ml4vTIDy!>_~;^_kw(6&0$CK!Tly1bbi{i2+-Do}W7+)cBd7 z=ng%kAq3H<%ts92r;O^QL%s`I5hu=Ly%SmZ^CxMBmba$^^dC_^1-B<5JfgOmwV03_ z8nI^zi4W_=3yzs>dSh;j5{JmQ(pd(1Jrm7Y6V1N_AwvEB4J0$9Aql}!#GJo>#e(*7 z4e2A4 zi&Q^!xGnbeFwJ2)_F+1m#nXCjS8k#Rly}quvS(0zygkA(@LC|YO>Iyu^Z`v-Z+FIM zQmgJ(&`qpy3^z>GN>K`v{WH?bxlf`L_+r&@<{S&;hv_U(Ue6kH z)*AD+pa<*rXMV~%7D!MY%1EDdxnd3U$WsLpTGjrjB|iW@|I)HDw#Fn`ZS|$KxznZp zxh50NO}fmLYuPMXMuRL%emf1H4<~J>iwyIU4D%jwKkdqQ#d}8=Ag1EZnXqC>IQiJ{QKZK&N(crZt+iZ$W+YG?w@h})gxAW=|{9; zGr6C|3SUXo2G&8a6k?U||6(hFET@c$Y|5JG{uX2VySBj?8K{vihMeih%(v1-hImPa zcuf@_YfXbBSUi)=jg%g<>#$w=jr<=X!QKs}2Puc#LwSytL0XAUvsAzE?~xB}Sz;wj zr^vR`L)jthh1`GbIra1-{@*T>kEoNqvyDF=4m(#NNC|XTu}~kP*-qC>0g}C99o6HB zeZcv1@%m7dL+ohi>1VqY|Mn4IsTc(~6wxUqZONrf^PIrdf4oKvPbVJvp!09z_Koe( z$#C{sdh}X4oe<(3vR0VdzU^%K3rDUHHV%K0W`LnUoU1v{G4J3HsPHA$(+Rtk(s#z( z2ndhwi>b$N78~(8EX58y^H&a<@$kTQ+otnoqy1M`BF+3{n0PH+q?4DVlUL&Op&MCO zvUvKN8=pOBNjX~m(IfkSFe~)f=uq@XTT?e_;WTO_P}F0HtevM$OXfG-wXl1uUl)o3 zQLk>*4=ISa>S`n<_hw`H7dyon_jH(eJzb=Wm!ykV0_E|4v4hNwQ6HImjQB`GbvwGJ zVG7W2E$^)L=8+%njO3>c+|Fpm-lQ93TI9&rHvYJ;Ii^F+5FBD6>mQo$(m#>uE>v@(wZG;XcXaDN4Ca{XLISksV2JQ4d+XO7(LP>ZB zt{cgg)RSV#hN6^I@LY?=OAi_j-ZYt)8hkb(LPQCu9;MT=wNPMX_QyXpay5c6%K~P$ zEX?dK9e%k>?^*f~*I72gkwQ1q`Kl}+M;k7x3119d1T5n~Nd!u+8#5Ob5^ad}ex{Un zYs2iJUdq??h1Y`D3r_lu<^)>s3>UzS_y|stPEY-BU$F7Fe%%AEvqddYZD zfQ#MDBzWmlh5dlpCW#^~24YROZu+oOx9_@xzM|thF*u%ojh7=f2kb3X22=Jab)TBsV`QP$|Apkk@bN*?%DhNBn4}#n0e$H1_{TMGbK`}FuqYAaZd{X) zlIrpQ^uF!s=fM!v9&BnEAc1u-ZxmGdifEdbhrbr-E6QyVsmEM^RXoV+>U1U~lf8@cPx6 zyK@C>_|k`e*U0q~jEVhVELn!|KD)pRkDToKD*V!Wt<=Pq-F8+dVzk&$yV*^8QquineNjwmcueYBvIFS@0#_oW zJ4otOlFg560rZ^>qFHHH4P3-J$E=^uTynO4lh$h~3)*r^v%_BsDNi*8o<@Q#_8aSM zhTo?L39s^e$lE%O29({J3++{YqNj2h5|Lxd zfVsQI{HEGjB(mK3W$+->xYT+<0Y6eDE745Ampm6oK;!C929=+DjrT^EZ%)fSd~#p| zqHb?pn%^=hsbFZ}gswG)uFSy?5Wk$nW^_1~=Cvt92=-qef)5a>wo_k<+%i8rEqD}d zS`}?BDZB0DN9vokb9IR8ePN=cbrt?}E4pUB@C4S_od<8bbv}y@tZ9lxo4@5acfRZS zbw+CdH!g)D`vGq-fL}IUfA9Xg(T+GL>7nuSeY}6co4b)AwB`AZt0Z@W$7CO9EZO_Z zH{$N-yt>J|53Qp7Ic|K=((P$J>fupj7~P(-?er6ZlAtTnU8y zV#~DK)2**hp1!$+E8SW@BykK89%$)x^pxCG2k*q3s6P3L=nFi5`oHyKZeSJ#*F4A{ z`V+xwMnk#t?Ps`~7`3!)V-yA|*6+HEgc`gPx27Y-O9pmhsf;$FPB{YVa!X-9lGo=8r^3tJ%}}3FHwm3=H%WQF~eotR8r}A67SiRL4XfCz}*_ zn-C~+DUzRfayprda(kJc_q3L-BFXqM@8oP6d8SxNUMFJa(aZZ*>e_2{ZxgWrcsjpJ ze*JJ!Qb4h&3)nk(yn;de?=Ih(Ki;GR*N*V~Pc3D~n@=!HwuE!QT-7G2dagvt^k;weF=g51@(j~M zO6CS4-DZ0zJ|0pX4@EKc?)ulO?qahu*BmJ!it?vw#pRq=9?lkCf}GsG4E5kiY6}-# zWgD~@V_TG=jGzbYT?JkYKV4=LRzTYVvd=a0Rp!e@5k{`RK3R`%wyBwm3Hb8_c%#1d z8%rS3#)n+agjOGQ@%&yxqE%wwdjngEaKi$Gu)WPmN>@L%tib2)3vLzSQ+iIDg6gqG zL2RkpZmuw{Tvv`ie(O3C3FXZ{P_1WT8lAG|D~6{)jG~lJh5Hm{`Ku!-=u9umFE{y# zvU8yuD@Ir(#wQjIv?Dp8L&Cf3)E52BC_yX?CA-^py7L_=D~?zRp|jtMdg2=ERuq&- z9Jexpls!Q-(>+Y3I?+IicgE!0r2cRgf4 zaBj~J(#=7L^F7GZ{dBrHuyYzGzKznB!Wl2K2=Kbun)Pk*Lfp$aXmjwfNJ7=;7p~-q zHm2l>>OJuabfV%;Pkd8-xuTa6SqQ>C3<)AElEU9W1ewn_6-5dfC80kUhO2V#LFt0jZi^zkM%oD%=Yo6{J|1PqZYcCxJq~7k#4tAv-696A? z0g%VNfhYJ`5bh#YLCS-hs;4o&=l5}s@A9o)gjbjyAQz@(>rUeX-?xmwpV=ywKd9WO zO0}6ctg$G>nX1?}&c6*ggf@KT5^d#L9dBaBQ`oa`f;>=vf&XzeA3QQ&xu!$?s`+M-S-K-ZhJ1B zUN)znySOUg`&)f{9}=jZ4!r-gY4)(GZ?_4Yyp)C6jCV44J;vr;+^FV4z4Td*Qa|S- zjKJ2Ckw>fEr2!`rfMc|k<8L{dWGE}Z!8OLJD|G-KOrt44@>EJ zlA~sBh-O~MnMXSM~HNfkKlVb}&iNgiMrM3B`ygO(7^mOF>uQ5q+UthW7 ztgyj>d_t4@BjibZjm%-YrCb$lyWy9p`DPdS&EX{lI^$&O5Pph}g5)3h$<7Wme&eG? z*8W|mQwNX`8Dlcy>!C4;>%#W-EI3QC>-9H_D5*Wk@4exJ z?bRaJ721(arci3%VwdaEE-jhVtwGj{ym3_&%4)gNK-O6>#|)TN8f;!OmsW!*_E%pYP2Hde35&~f zWRe4W1e1@Kk>Bw4+3|ePXWin`P2Z=RzDR!%HEwzrWOw*GS)%ld7KTx>fJli9N*QI$ zU;9wsSzA?f`~5xZuV%&4UjrywowC^g!xxGyDbB{!Y8on+rp3CZ{LlB-BcANYciA_5 z`lj4oh?tEEo5cw~if**LleOQ1HcKRZA;!!~78a?8gefO25RwK^WsA{#yrWZ^RQhrc zElp=|c%e$$87ZQafF4m;yp?2neWSUdUvGA`!Ti8*@A?$^jrT^uu!}3#bz184OUyqP z;u^Wnq+8!Hen`}zVJ>G2{2_RkT%%Wp?$R3(@)3>p4ORkSYIZ4%V7I(A>InE+s{8Z; zw;Zw(;g6 z;`_qGNCgX4u)^v6>GE_<>xYl&r+`&=>Vfu0rX(vXG*pQfVmz2K})ntv3nJ)V@5PAh}kA_~@W^G^w$HUl5Q+lzMv;54A5dEO%o2rztm*@{K_N>@n*yQwE3|N3S6 zO?|k&?_>W@iVAIB@&Pk{iJSa)O%93vG+XtN-+u@$+uUSo9l7n>k5PP;95`Q^whg=X zCCQF|+X@N4CS#GUB)qn~L90%X`c#R!Pk%l}6u`Y+=vJg zZ}(0DAI{YS@6Xnr8Utu{9tt(_WnF1_1Z0}O7L?fPQpp#>2Usb<* zZP=c<4z3)KfX*+GHyc>Jc!gv^3{spCUzg>iN^*fV-GCm3i170vKN zpuL(CRPle%Qjh95x=dIQx54XeH=1y?NhFJ5;}`#!Sr$tyXNp zL+=(KMZ3@-RQ*Z6ONc|Yb%3F?^OODq9{Pg%J8b8d#`=1_cYo-BlYTT8iiElGXdN0V zyQ!zdio(`c@$EGd`v|%-syjrdEDqS?I_#kyhpJ2c=YE&h0|V z#s`gy% z)od5^FLLpFei8n$>X3=VO!G{k{n|s?ILvDMMl{^#+{}wD1@A&xpM42K!{Vh>@MjeC zVicTDSU+A&>4dUhpx`uNXrMT58%h8I~ZjQhZ+HbiFFCMvH$JNr}Z8GtZJCTe@ybYwVR`|>I2<aHWwB|PG zsbv7Skqk!Mec?)UNyctbWnCA~4AK@r6=Npp-wzaHey7fAak0lGtHGcEt8W3P< zJKtzS_rB|55^d6PDJR>|MO8;4S{=fk3gCI_m-O2i-hjlgbKS!pquRT_2eR zK6AJ5x~ioFm1Y3U6{zM46l#A6y@E(2wJs)%Is#eSpGnAQu{c1PdJp=4d;#A@l+1$0 zyepK#omJeb3|rVXb678!q{zifswx%}24+B(?G;R1Om;7Px{zc@#3Q-ZqhBh2XwiXx ztiIkXytb}i1VW#do@ai4fI43%N4X66r+N12^j@QzvL$^6-^s&9ALkv0tZv8-9j`ZQ zUn>+!{h0+&6)4_{%OUX?ig@YAmE~o1Gwgv7`=5fcs|Nq*1nmX;ckPz8$WA1hnp0y^Ys$d9Mgn_#=jN+N6JZ zGB5pkXi0&8e08(Qd}B?~8d&($tRRpMFEc!*2+H!DD}OU~Y|T>}e&Cg@ zgQ7>6ZiB>GZ!VaMjq4uN^VeyHFA7Mj85j7h-Q?=3mJ?K30Wc4sng>v5_#qU%OCk}O z(Wt!%DR)!7jvau6Dpz*qCxLH*wd?h?Lp=Zort{M%!2zf%jNxY6ylL>Fhc?$_M8tdK z&|aAZ?A3Gel_9_3QZLklS~Wy{ zWTe7x!&%!q<`aRn-Cwg$qALb*I8fb#5^I=p%?hUg++A7=`>Cm8U!Ibh?U09G-WZ&h zW(FMX0A>9GIbPpm!UO&2J*)DJ8XvdD5if>~oPtSnRa3r*U-~83{4ZlJU8RtrHKYFL z$}OH6{R)ZPb8Hs+^?C8M+?o$IA4WjM4n3!}%t$UI;c5R5gNV<{NE-*1%HK zmdO8MY~Pp)HmdWA7lC4k|EXRwYFkz>*?ydte%bd0eV_AYy7py_WM%@MjrSf@jx^w4 z^ixG|XFrD@SBlFK$Y}B5O*7n~4qk>l7RMT|OCqIp6^Syv@SyV^g<4ppWr`-II3!Zw z2ENy@_9Vj|D~LNYds1Qzf|MkP_)Xv$glJ6_?CY5>@HzaO4QuKukh~ob@I46VbqXI4 z4H9E#x`CH1oY-@Wl*{H9Vf5N3)Ex+A9N$bkK%T-i$l*ez6>I7Ph{yf;c%BFxmf6!! zYDl*Ji7@<^-vh|#{^89H+~FLV-{E5|@jBfflhcyxg@-u9*vW;aZsLb;zxD-^TG!P} z(4Qz64G))g6JLH$KFnAjM|r_Q!Fok;Kl#9tO;Z1lS5E(UwXUwxowpMLd>;b5%2)yF zJX>bUq<)-nyoEV0Jx4DE4Vno{9_#9V8~Hxk>|t~oNQ=$H&f!BTDap2=*$p)Ii^pP* z4oUtC1 zDP;;1c#9Zz=zR*$sxtsfnc@YCI^M!9$x~ues3b=ihjS2*9~W0~Y>Xj!X&~U}11Jjt z+F!pRXhgXY-uV*fFbijZt21|zkzgj=`{|f^OO^O!xotPBKFLtV2HbkO&b3pD3AY0T zWNHI&O0{nN_5ahsOh2R!UV)7EAKuKv9j5+=B;z{W(DDgy6$r@4qAKUe!qH8>Zs_%L z$mEo_1U-;b*$~&mvXlk!EY!Ql1|W@`t@5VmfufAJu<`Xs1A}+qLYi#WGxoORb)ed^ zaj-Mj?X=cu333>c*VSoRZxXIhZlreN10B2(RTROIxP@Awn<%;e3Pjo`kJYn4+QdFl z^!@$YbnAEs`z)hOxwl-Hxrxzi4&6<{#dR7pm+=6EoAyb-2Qj>Ln~q-(A8-spYR3@d zjmqPe{Sx)aAP7JOI)&M${~Td?@&Nfi9sJ;)O{g3wT}B{b0y*Rs2FP7>BptVG-N zHgu2;HN^)Siay#N-=+_p@zT;f)sp`M;EcB_9!R*DpvC@L0|#H|vyQ1y=7`_FfjdeMgY zPDEhli>ImXQ@{braqtM<)0FdV#mal!+w;#jf_Ps@j;c0hq5M>q9YYze@^g@vQ*wC=AT_x5kn zLoeVAPJgx(=B;u5$=APf>!?bx=?XqPb;9cjfvAE4_x|1_d_uWVM22y=f+yQZmCG<@ zg`T)02k_g-!=nc%=;fLX;B-ikZ*if*PIGat+O2amoNmv z{qTsCj^VAF^jDxKVRUz-THmDWwXaooB`I?-lX*b9PRlEVcXD=&C0uTBi?yo$AHKda zD#|bFcZQaf25A)$K~fqd6cCUQq(r*AJ7(yT5b17|?vzGyXzA__B?cH~=05)4_rtwk z?po&)i`i$_`R%=*bRBb(CWcif)+lryjmouhA(CJs$32Mp$zBeAQ+sqSztiUl`_xcdt{q-ym zwu17RJqd@gN;hP;s1d_W4cx_Ie{oe2dnSqu#Qs3yy%5nAi}OH+B-#A80|1b*_B*KB zM8wEvV25SH^c_?;&N^fN=hLbRm(DfCZ!5|~qfa+#7!BNM2Y^3X?x3hQb;^~W>#v|( z`cBX+{$rU9iZh(7)=o6P3y!sLU&Ofn!|_F=xivhcWySDWTc=w!V&DbT_ql6ZXQS#_ zRW`k4bur9~igC@5TPfDJ4=t5Z__%}Wv(9_@lAc@181IBKv_j1&;2w0O}5 zK&8lkP>HI&1axu6{cX)ql<=IX^q(%SP>aQIy#bxj9|>X|?6UA=12@{iWq{R_0E7XL z<StB1 z?VTz%*biD)j&v^L~b`4U=&Ioh9=ftVpP z_&8$G@vY1d3Q9;@I5xHBhf65p}R@F6?*Sya2a7P@GHm${fRHyu{KACj5PJi>`81UuY zW5AdHA69w0{;?X)H;1q-TRmehD)f-BfcVq;ho5Q!;Bf-@#3_U&@t0^QF%m^@F25aM zN%kS(69@k{Mk(5?Xl{ehe}@m>9Pni>BuW&HINFBK5eNJ`e7NQ_NYvsneCgz4o|(dT zX5&u7@fFQC&w)dfeTC@WDrj7(aZJgNxu`qzcpkl?Eed1FE;x=@b$q{R(xzzA;Qm1* z)X>0ZAxljVwxpK_VCY=Yy5U|BBnu0@D1FXU20qP$VWSsc+;BWqiL5hYr=R61-v@%- zn&J4}e<@UWmHriAvop`9J3X1l{SHfs^S>@QTW3yPQk$AK=e5qNl-fJTu27-}$cErm zNiEFftyBV3gXVdaRU5`rEcVmEP+VU(FBT;6;LjPqt-2tHh2pHXc!`;2(*+{f zF;SErQxbQ9v~w-Ztf{Ls^;-^%@@$Z7?Qmv?$MwH>YM26WOn`DHySJ@dmo&gyHcjI| zHMm0|-DV6AD3*+5h_Aupr6W4Kv64CN_a|btjH!gMRE7K&ovK!bk$=UIbBb#+gY$is z;&GDz$xkEV^cjtrEFHo3D|$6{9Jg~t+lG4ykgTWZMb6teAuwELH(RpF2Ne|m_Wpx6 zZ=!4t3P{x6+_$d*5@bUvAE@O8DY^VH%|9g0kPI@fAu(+8L0#qUIeNOc1?Uv!;@;*f z^^BZi)fwNd;#=sAcXCOfMd9~H+qZKTYFFB1D!Swrh$7s$>(t@Xw{^zEYf%<@Go4(TXi;tCQQ_U3 z#SG*pS+hKU6SBoU6;h+RXGvy$B&ZY>dkes{(Qd0`&J5BJ&w}@rn^%^2FfG$>q(one zv=q3Yw5b}GW(lCXdT@l28Sr|Ii&X#vPBop`vEcFlIQiopr(Q@ykPWz{W7ihnCx8t+<(VfDLVz|ls8rzot zlnmPflx%x{F?F-o@@M?ppYhRtwAV|gICCzf>&092dzfs+*8A7uqISt1spRL3BcJc~ zrpb}CH8#qeNeR0*6S6-UV8XLpZmEs0!WX2E3^inj1Hii6nLQbl+->BeVL$vh((NlC z2A|O4O(^eLRLjC0lOxXQB&;3caD|f4Kdi$?@~GmE<@##r%kjfexAz~>b9uVu#_N@v zl1von&>fgX$tzJ7y>1UPy(fuE*eg6H!5; zSqgDm4TT^3O?j(En04kvlgxtF{a(?{xa*%Bwm&%;zt9^2>x?7I_j_6wl62{^4+Ypm zRM{HyuT?I9boq9Cj}6hOqDO@|U)DtK1s`@sk(?zy4G_-nTQeVSxt-Fku}ht3GFi|< z?G@dRyWZxo-R5NM{4Y(^o)$v#!%)1srpycW)d98e1GcjX7~PnVh{PH?)QYOD$w?cX zLvn{c)T5M=)MS}icM7sx$jX1emH1B)V2}Y0$cquQ&JOC5zyJjkbxUM=gP?ktAka)J z43c(_GbwRmRi)^It{4f0yt; z+)qH_aiDVw@H!(Yv^#8_aa0_Gxq=jw!*S5tp~-0atl-u35f0jG`u3US6VQ+-Hl!{r z4$t1}c}B`gtVIoi)pHbd-c1O#X=n>yArL0mskX8(_k1)UJa(WrX_og#wuI{(Xf{_27+n1w3{6BjWu3@r=Q|@S=uJnSCf8hk|IVr2W?w zDNnRN9Ya_R%-G`?aOpu6O`~y?FlPh1R%*{2p|h*i~vZIE@XoZHKX*Bw9C5*{}Zu&~a!Ms)(G{^^LzQBUMe*il83Zk*Y1I{5iXeu3Kgt7BNkrm;JTwk>8 zIciMH2F2GsfNBTnmg}!ql7d7YfuV&R&-u2~gQ|W3-wuKncnTb`>+G?ll1V{n@*uM) z(7QOiE+c6t5(`Ff{4AbScVg6YsN@ZvYHCC2(W<2nmeiO$U}_7pBWh9c_RQX6E3B?c zhS7v5^~#67mG{p#}srX+WGeJ@t=`ZkZopC|_kNU}$e`7C{$HGAd;Q*?0>%RfH@L(DFw?D==>$YPExSt$p5en9x zv`d|C%3lz~Vjg)2g}njsg@Ov@@rq^YB*6bT8EfMWXeyfncwrkYKsY6U#f-QMq}q(Tr2kfqfY{U=L(oH znY0uGp^)pGuHQI*nSG(G8V)Mu=KLdV|C*fn{x@vcLB4Jtn0t-_f(Q>5Ggg3~9S$Uc zaKn-tP&#y1_=lrxojsmZG7Bgz9At(MewXsFOGn-*&VmtRV)h}_e&C+_E!7WpuQ;D% z{V+hL{R0XqX%>tn^F@-rN2tKzG1soRF5Tl^&s1K4frZI$pe_VwUMc$>}bF?J@I56C~4&IN!&1hnefad1G|IJM%Eis2G<7 z!$>8-6n-KxYXtkF>%k^rnmOAPlICCvYW)J<7stufAe3tB1N8$tyt<%)Hui&R?MV_W z+B8hm@p3RsG*c#Fz*kaH94R?owZA$2i~CMe-#iR{`(cfmz(-u^8a=H&p5w&7p8Hg4 zHnS-vPekwtP&|KnzqdaSt6V{?FklnhTn%C=-)#_%?Y~Xvp^dvBG9H?2`wPyZYB?ez z?lwY8My!cxX%NAoY6uTNdpVv%Ff8DINjjhG;VJVwF$-Sp53+|t!d(nJcs!#mElX?EqUz7EQfMiG$y zF32q#bAlZ=SHcApECHO>AA6vB0bGz&P-K~YV)-A9^z~Ma^q)I`-;01?*9+>rub;-?V8R213%O#K8{k|P}~UT1_Vg|C%~(vXJEH9KmanSlBdw?cNm1% zSa7Rd1(3Zm##2(Q=Pi5-0mAc+lEam~Y4YGpH;VO8C#oYFQSrpAtz#pKu1`KDdZcOi z$zI}NDi#WrJqtW_j-hq-6jCW4Kw0u&wiLO1 zhxei5qd$-Q>bNiAqZa307AKEzCW4JYZ*$X+we8h3BJ`!kYxVxvr7kvY0`m`ZM|hxP zy&%z0aQ#Rm$W*yoBHNorO0O5Bz&DR-Ni>@?AsWN>PS)?QyI)^85C(m$M)Fhr)dqKO zkusTguI&0@jhiS3d&FSwcMF4bEYAjbcVB$mB&FqOqZ|)g@aM?3-vZnbb6=b|SA$;4 zw-1an667m>Aq;KejgjJL6yZ-~0k4BME?Y}1SG_uJ;TK}#CWhf2qX0c|5XP=LQoj>9 z9L%0)xlmzXM@)XNCBC!^s1|p^n^CGn9PkZFqiXKvfx5rzy0(v*f4G}}WE#MDgFimi zSUWN{eK|G_^$;b07=VKZ&&bZ>Wwx55fHJ-ZZx1s*U}^YL}GRi(zV56PEvPK2~cn2!3oEc9N~JR>m7gWhPU4KO!v5Us&cW@^D;c3ccFwl zx*~7DS$G4N4DK@0Z|!FrYu^q**Rc-#4Klsu81QLE2hVg##0~|Uj{ZfP%FwAPa?)R$ zR~)v6mA~fk_=G}d^ID|1>`(7y#Rbrq$$x=#Mjt%6)A~45nR%X(?@~5|-XiP}7;NnC zL@$t;RfA~#uysj}Ak~yW`RWfX*SuGqffOey6AKMl+;rDmY@&%o8j;kHdzrBE zEO-31&n0tQr^c_Be4HIAz0h?#oBh8w`>Uec!f_<$zb@%6Uu#Hed6wbj@CEX4s|wo|Bid4*%r;f7~Srj-&$=Iy2)HN>A47uS4nM?AOD+t;T?SkA49hB1kaB69+3k- zP4U#{JiBxEhC4;!F3Gix&@qj)ExZu?oP*UW_W%+ z?d43ra#=fUL|KmPj79H^8K(G>?08hC)wL}%joG7!_O8)D!ufBWmtNe{0q0QkaL<|8 z*x(>6!t1FNjpRfE}Fx3mC9GuEZI8;|V zTzhc6cvkNs-?6_x{rq;FxN$~E9zoO^+BC zqOrcGSzYQy%&-}u<7k7gf629rPhM>O(NK9FY%?O>kUCT&#R^jqSv6WsC$F znkFt|YqE*t-~Wnt$b9V%dK?_}IG8$$uvN{>)G?bRZh%f7XOy?l)Ui+!XL}cVX`PR1 zHOBl)IYmHKNd2*p`e(esIc0)DIPP`~D7Y#LgrJGd=$-U5a-P8OvJ&o)vFHwh1x3Mv zsQCz5<8@8hk7hUyeSBXVY#y>+SD0MR5&1?MZp0XM(@FoL>n3o-lJF|%?$3=&Y3@}z zUV3y?MR!fs88q32Q4ok<5J>$6$H(YZa79_W!RD>xkAS)q^y-P{&GmpGS`>Ylan>2v zdDp}Qx!Y($+e@^}lKce{tXp8l+g5`m>LiEpHXOK-a~juq+j}zYZ{T<3eRU^@!X5dF zUg!Q&d9tQ1@3A%>UwC905xd3*M-h6nT8!d9+L-xshmgF#-%>^YxpT@yt>pA?8KN(H zQ_u&@Iwz`sxgq!C`r{jvx5RNC)KJw<^dEFb=HH0OJD}|x>Q5U}z|G$`h`EK0z{{hb zQ(Jx)XPuBcvvky@^;xvX>=Pu?5oivRq6$ImbavcZP^?d+30cc2Thy?LEoFvrc0Ya= z|MkSK-#;eR)1tLyfafk}ZDG}QFYM}z=9#6Fg?`WXt*)ypcYkk*&em+M%8aUtUUJiQ zoscJA11ZN$dCTrzG})die|1#)t-*F(|4_h&Iq#&JR~ARn;ZA<41MzHa9&z0v>Sqt} z8C!Qh9g5vKrKkCMRCaoo?;9{Gq3sU}Oo(N5S~A2! zjf_sUeC@C$NkNnv*;m~)pyucK7=J4mIrgUYGp1Z+!Q!8MsiS!!JVp$50 z-TV6ucqS0+%&Ebv7eC$4k^Oj-V zBxRx^u=G($dhbVa#Ti2NA7--rEz~T?-p;9-(m)x{xaT%2nxD|)3A|)11&I5C=)%s7 ziCf}IgkbS9n_EOECgtwieFw1u&ArTgcb#ef3nUzJf7^D3h9j}Z4oc{Med(BzsXNUT zam*FD8n>fv&MZ*O=rCqJNl<4n>h{a+4-h9lA$k+!Q3Z&rK8w|*v6(YR5eGKaG-HpdQf-OZwzn5b*wG!M6wfZhND zz0av31=}k=myNBA>YGpBw1NB&oY|G*9=GjxFPUj~nO7s&w;R$k@H&{&M{OTFaKt_C3{(l_4L@!zmq; zAL~4a?a=QsnrUCc>^Cjd%FIVF;xpxEGQPy86edHftnFG^q=R*?o#J={u8@`BsT3-V zJH^A}$8L?onf{TPCq{`hKZ%zURO{aMX({#1Uu7$)L=Urv^)f8#zU!pNvTYkU{We_B z`>Q=XQ`B^%?URsEj%TnE-t{I_zrA_9tKx8F=duo7dFvWCvJ0p14*%XU_1JfkrOhTD zImOf#EMj`>yqR2_PVz($!4t+v*ZL~yy~aj$G1XwXm}&%Xf$yTyPKx0)JpQ9)M1XeN zyJbsi(m}A&Aidk0E`!qH(pd3J1!SkZtmn%f-MRi~9XjsKWiL{;E!ktEpESO-KUqz8 zt0O9iqq|c&xEM^QYi??0aMpU?XmbFRuqR`%@r{NC*+rL9mP^MLY&_Si&Z@6UI_Gl6 ztFbq}zpoBWsH}KW(-_;=;{H@+_UnANC|@$`vby?3M4v_6ppL~vQK5-q6y%ztXJ1S@ z)<1*jn7{Vja~czN_CUoSo|49zdw2&&LJ8=~;u(&JmIz%BO$*(iMvIgkOQKg8}m7~4@ zgTwus2pOIYx4b^9TZ*X0nnfB;Nv=rRm$hEYz%SPPFHTAT;2EjvM4(7I?Ma?pqNLt0g0pJYci_&yt3;dgc!?;}1a$3J>1fxhs&_vXfV?Zch|McYVLScv zw@U-$$|Lhw;cJ%LY=zZ!6r5a~Ro^D;CZ)AwrKMP2U-BS7`^}VQxLDaxY1qN}_x0vb z;wmDE8o|rHmOM6m`|AvyB-#OAedM9^`CG z%EyydW>#ER$+2l|&l+28`*G9|Zk%bcf5Kz4rYXIZ2Ch?~9}_UtrPekNkZ>y3Gahxz+f(wwI^ zsnzZeE*TYfm-d*J3fg~|A^e0B-Y9@sr60PThMZ|?C)672IuWqz`5Kv=1V*}&yw4Qp zsgbXX|GKi)mdYjC z*y;RSk{O)s_@;wU*cG4}0pL)^K;xTfp^ z3ofkcR*-5Ed$%>VX^^lIU0G-bOlh-W?QOWivORh*R>3B$t6Cj=Wu}IesWAle396X= z_8mh`R^DH|+nsTKwRCK&Csk{ zREsl<9RaN#;&t`4|N1gK@Y@-t*v)Gk^Hvdam><*pAz}hvrPi1EC;wQW5 zF5(?i2gzlYjAt`>&d5BY9PE_>oGmf|^`k0IZ!k71Gl$izH-85348eCs_VHaU(im*P zSROp@2o-Hy@zZFZR?je#;%hv6_o7j>=-FD$Ud6F^=A6{d-sMKT)LPSIcL2Y3Z5HtZ zE;89U)4tXdCsVUe?yT%|RE~}DwGI4WTT6_wtOvp(7|E@1=1YbyzlZ?mMe^)b>yq=kW`Uv*478w6R<`D zPBVIKI(og8c`^`rGC*vH&dud3NuXVd8?}SX%BJl*e86UIHs;g2%49R|zb@&uLvJzD zs8BO`m?NB-mZ}^{-ach{E45d%+J300F334)%VKWZ$^@)EH%S#YNsnB_08WU#LfRkX z{4o6JkW9oFH0jK={I}ekjcbJPTs0hw#+~9@;|S9SLzd?rhPV={!2;r7r{DR?S`(6q z2(CU^Eu{Y$0G7yB9Qjrp0}Wc5)>!2we?OBREt*CtFQND4hXoOdmC`Y>OQCJJ2!q8v z6VOnPe?;ZmRdV8q-6a^?g>;DZ;PRA_tlF7RL~FSJm(9yUJEB`tqT4O{zXK2c4iLwq za}&8r5^1+{(peyhkH0E}1WWeos8H~|xu3?}L{pE^5gbR|Y`yNwacGjIy>w&|`Ea{M zeP;=@>bU1^+w8%#b5Fd zOEQ(*Gwq_L;P!5Kx@k%auLpCU?~5}ipt(*d2yJWJSy}0*5cO)c0$Xvwa_whg>u1WK zMSoydS7G7rlN4x5j_CUFRL*R3li^y+Jv)|mBPWg=B|52-Y{gD>wlP~1vwRPq;9cuA z)$WlzIeID0POF;bF8KRkQHMlFAW~L+DLhjPas6AgqR~ zXJn#DWZv5qCL_F6Tf&!O&I%S^ASk2zNzaEd`n2f!sEO0@4a-plE8js%6Ohsei>JZbZ^X%OPLyZ6!j! zw9XB+evFW%#&=*{niu;SoWQy!;&vEyTjKwrS}yY2Aq{i1+1W}H7fz*{rwEsXb)K2+ znB+_*B;6Hdy?L*}zb7gD#ad~I@ZWY26Muq}@^T8oUopmof}uJ;+r+~ZIlH$|04&JoJobG16=pqYv95lb~MvriNLon4UXNo7PI1q zA}e+}tA1CE`hBY(`L^yat^INRw;(y(d zZsqqE?Mih#Ol1M+hM{}+Qyl}RyD`P)YT832A5#CW>AFkywdQ+0|K*;OfsQJ>exgQT zBgdvka`XZE6)EaR+BsADulgdBFnEc@PeVhQl`w3m+F({=vF4sg65+M~*uax}Cb@pU zvW(wJr>edO0V5jkT9d|qWtHSc-x5U5H}4mQMpP`IKGq?v$F55vPgF(D{6EB-Zx9P1*5Mfh3#|Cn% z=NpxJV?RP823{hvn)B?Zq z2zs8U_~lEW{fiJesxR%BQKDvhvp#2KSY_Gd(S~6xIV!HeyhEGQUH1N>g9h@HQyR{m zKXP~6j!~EY?~I-QJ7W*fpUF|)DmQzP=i>}P_X8HZ0I;&4S|kQ@)S667$684|EXzO5 zcq>{&Kty71Bw;xGh}Z(L!nM&PJHrIXm)Vr)d?&my*@cRIbvl^wf&u}n9yzMz+5JW4 z`8fGTj_=c3k{FZ!fWVbwljPxpw}vlm$@T=}^=7OUExOKt+i*9Mu?e>-1!A1Hdy0*? zf5J=DF7@NhB44i>n8Ncp*#BcR;s0e64_%}Co=!12=6t*&1Om6hTxCcnl@+`cyj&*B!m>ZvmJf7Xk$AgAse)9?|toqI*724@|P00+;MnEb_(c<#= z7a@Q`BhV9^{~)GEvWW~udQ9$I7!;V9g?7-~b8maK32&h%`D@AE>VU&+EJ^?H@0Cx2 z@OV~ik>XDv(RqmeEUaTl0E70d3bbqfi-W(|fH)X)v-jxFH~@*N5s=e|+meDY)=I-) z{)|_Aku<^o-rJejw=iV7S~m8LFZh1Mc(5}*ktPW11@pE+qCHV1Jf3e2me8h-@@Cqp zTJn;KeC$zh(!p2zA3)yxGaftovj$+i+r7xY;|wnM1I7Q07l>-1y&Oo`dYK@*_tEls zSNowjJO4x{Jr44}7^ZZpc(>D@y5)wsY>&R#hr zTL-<&G5`TOc_$~*9vh|`qeEC~OLeDm!kgpJ|3#hoPWmvU!Y`5JkcD!c?MCj2FwxHN z7X}9VXDne{=OFH;S|tIUM=5#rYIOpqIkwA^mCe6J-N_;os7MId`ETH9#M3B}dY%h^h8B*K9QXt(XY>m}g5uYQ z7?I>^>BE&zEqYB7sZ1JiP_9Jxdm=!&w=U@kLJbu2*+-^<(M7a3)XAv@>`-8svhOi` zzwvKzIxUbN1fp@(`PX{)D!U*HnDZidHY3DKGzuR)2yv zkG)TMaLdgQkFFVduV-U12(gw$u^e>8vB-RZE*zOumtOS2A|Rsg?RdWLz?t=q!jjO=Tic=A7U`$G(`aZT+v0lsF6$rH#yu;v=BiOW3@=e*Nal3 zyRJI=)sv{mcPWH*b~+U`HqBdrI(UAC78c#j_$WHhy(`)nek&xKA9Qybu_G%occC#8 z>Sp&je@bRxb^`Jv56u^j8o@%Q5QEWgFw+~|V5txrQ`E+gOn{r<%)!SL(+;6O25vz9 z_D5Ad58;!K1 zOb6f%1ls5 z_G@7w0059gqLGjP!1SU7SR#9>I_K-JgFZ!Apgq#2Y|-E@$Yd5gUsUX-tjgIjca*iXVU%|4+k9eEB{)PwEkDf zV&r|acgo}RTLFqa{hRh<#nU3^ABqGbdi>~*{ODJfC~FfSt-NJonyP2D3`1(zE<$EY z2(0{HE4ZBhEdnZ+bXxd$cnf;q1OFO3g*!Idn(c<0p2h505c76OLvby^kA-HPqR{k; zAxP;EU}`kb)B5*&RGg~yqB?&tx<%kzQX%$B(NhzdKb52PhvF8qM(dWSx)O92X~OR$ zR7uulXT%soJ{=XtwMTS&iavX&^XKX*8QAQlW=iu04*!U7aef(g1UY{tCf*RE^FfhGhiq(O%lVqNQ8VC!DVK=9G} z^nGCwB-H?h?&5&b2|>T@Kr}hO>x(Qb!0TEBP!Z*qrz4^P&n%f&fY2_$PXMKF1c}3@ zVDO+1+PI)I7f_c5-t*k4MAiuY7Y?QWD&7&T(uOZ{IG{=*khl=&+zbQ^I_}#4{x8?T z1T{SZCA_Q&s)V^bb@DWsE4`30C&CR#?!^J=J$s0w|Bccq2W&I5Q;W)a$nP!yIybbz7X6hABlXk5p11)rF`0=-U?@533;0^qk)&qIff}HBGjl4{zlT*`hp?VS^mV?YkGG9a3aoHq1 zxy6*nzWfux8U1z=CgA02E3BFY9HVdiYd_q=eRomLfb z>P$_^jLmX^D)m9)p5XIsjCDpXXm__VolpNR#<~`@)EC*gG$}HSJE8PX?yJ~P((Z8= z;XmlWZI~zLN**vQe$3+;s_u$*I!EwDtoAS;9WcH6VO;|O)Rr1F4$y4@HHQ-z zIl28~n*ge(52E6={;}56)TdCDIuuDNg@YwI@_|~4x$iOSz9I`Y7;f|J+(%D&t=CnX z4s=!z=Ao3{#`~fc1PLoKb!M_0TBvR=3eBvr2Yp@wc{yON58-r4*nxtlGa9#HcA#Y& zOi*;jcKo21F#M~Cn-%v}aeCSVsn~ST$KOmz8v%4lA5?IW0bs;?fIf!QLpfWc5Y_{a z?hITEhQCs)ANl#ggCEGDzaAN55VyXLo+QH1TiE#xYjQq1_ZXeA1GdnpZ&r`65RN+~ zicZ!a$`|2sv(B>s6g0;L%5DRBxqzKWsJqq)OJ0ibQ$y=pLC zE-6$ljn`nO6U_A%5-1xJ2!0MGjCEd2;J*OBDq5`&DCY8fQD)8v*&bq*NO2pMk)&}dlOQqd>TD4oKCRTTgahoY@jAFupF+4 z{ZJY$xNrdvT3%%wnOWW)=y_~A6PcmhI<)YCt?S9lalbR2@H3NVmbGWeHbaO1Vaq^Qh(SJVYQS{L{2 z(9SBc^qa0L2!N_3P=FcqQ{TKl-C0O>K9{(JHO-4OI{Ipid(dL^Xt1~JC(E*Vtw=@r zPUo9zAO3#DhJ6}m@VF394*ML-Fi;&|6%x`q^$uxeuZt8MZ|K5mGF{muzjVz zmg8Wd`lcu}yFwYPo`GTJfFD0Z!7T9}96X)bcnSLqo;t*jV8NR8o)OF%N@Mpc6Fs|u zog2;6^;SYNrGr*}D-IpQezYX%6Bkgf^(tv=Lyz6C&Gcuf_410o9~r~H56;;vyy6P{ zCxkZicO@vT4G;qR9f@|=x>N~~1i35ryE+c39YP+eNHcvIq>smHz%-sQK)(_CB{Sid zH4n85p8&je;0PXJ z$Q4moH}yCb!nQ9FV3w`{k!v=5S%46JBhJ>%WVPybNu>&EY{YqIPZZxGAoa`^TCayG zHy~<%kS2mzxIi0EPEJQxo-??@_OWd2h{PzddU(Ouu?ur3&9UK$eml5!nhr{%G5Mzp zcqn%IOv=KqBO#M$#N^Z9!Xh1WgOJqjDk$wIcsC6 zlB};V*8hw5<+hzYar`1Zv#Trg{VwRhgqSAd^q-3o^X%7un85hHD$^e!t<8U0|3>)3 z#9!qJ;w8)keBDH|9C_{O(<|M@<_2#P=?j0RsToFQu@ms!t`t`?X~;r`i3@kz6mqv& zsyH;a;smU|glXnL5j(#4VsU-^hhGGb^{d)( z{958IKLJ-e>uD5TOSIGK>bfN8O3=RNTM!Ybpv;xFmsd5{?9Y+@i+FN}pPq$PF~eju zVVbaCA37Nj02vxf7tv)c6!CUrc9by>k(F$_X5<&(p;@9Agpo8lYpVf zs@I2)GOAG!uWy<8VG2&5Fdddj+z0R*J#oNt8JFcV%%CyeVg#vPXK_kE^59e12hdkc+B4?rxI1qZuXXVf9$IXprVs0} zZp!e?Nc|WLb%eJ6Dsyp5JfF?sO#W(%ICX?zrKH9W8`~1-&*ZX4W~DgVnnxQ{G|Ly( zN;5r$HWeOV@}M}YX?5uI3tinHRb$-Y#cgWnN&r}fTRi>h?T-*O?(Z7&3uZ4@7E3)z z=*I4BjLT{W2Nx7G8%---6$%YUTBcOyueZjvJgGh;gm3RWf?G zUO67D@~(sn+_6vlk7kV6dbheJpZY1o8DY2UyV4m4#2T$a8hgpX3Xv-8lJC3^{USxV zme}^`J>Kj&F0Aq{g5lr;#@irSn|EJVOrL+_gzSerS6!9T>*=ab~XFJdruMQ34ME6`pi^7N@b}L5CvT*xqE=&9Ha)@2BijE+tP-Ww?it zx7ShHs9Op9N=BMFM=#NIFc1D-i89fD99YnL@Q(l-a4!uAd;mBY{W}bBfZ$w00vw$G z45q8Ivp`^7%5Y5BmNy(U4Zh!9S6E=ZB=h*(s`{4u_?gd|&28+#>+*qjX{WYRng%1& z5@rXCl?SWA9t_J(X8bE`%arNPb3fjY*7Cmg5@1ZuC-!Srt>-u`*6-}IL!aJ;pFt3R z=0-!B>|4ajbL7(6Y>PDvMqGs~TIi};mV%GymkUh!J=yBXKTWD^ZRZo}41Bsrk-2?7 zR54>aeJ0k~*XpNsNPI;9p^$4CN}UI#W(lU=z~`1B@p5(&iW>c$wJ&Q`L`}qlbM{0d zbDjUnwW-%EhWeW)&<0GX<0knh_p`I=*ALA9t{PqD0{2QyFV~0nrAg!*oz6l>x3icf ztWFO`h}mDph{)~DEPsE?kkUq85Z*>BVZpb}%>Lm;_EoXaYJ7q3;EJ&_czC%zFi#p- zVR_w@Z-NUOBcy@VAJLse8qD}*-UxkTajo)&rk>B_p;dY? zc^1LykGQXIaNEX4hQy>zn(4^igtJhNX{ihh9pqidICsM1fA(gM0!oqU$d&3~e$%lY zH_P*eA+t)4#CDNmC1^87UNg2eycje!kHp{cP&uS*Xwe zTcvwTMe&x(>@C&jNKUI~wP6SYp*l~D+G^54b#50izbtOcBP^qh0MJp{$n4dbFB&F| z&I_EhSLu0OB8BQNbS!z?<_OYJ1L?$rb)2xPb7{LCq3pE$WtwUOHfyVsMjjzxW8AYr z%Tu0N!0hrj^YVwMij{OB6ciy8W+4=xAF%IBzl$+wJXNWBkkzwW>GdY1&QRayV0P+Y zPp*i1H8-_7HU#6EPe8TkWq=Ae#YPc_k*J8vP;+AsUw^RA=nQ>Xdi@Vli_trky&~A zD2F@@_z*Ioo$|TW9HHVnS-IUBcxVKyu)}rAnQpG{V>)j=99=d-&W$dbck=d+wNvz4 zr`V9o)yJ)YQ%1lFn_8zV>SlaCY6Kqy*n3E4=g|9VR`c&nQHsL~F~XwvOtFftnt-aB zz@nPKz?k|N+(vk<35XmS*uC2!{{)$AsPA$RPd+cS?5gduV)UWf=mU@Ghx6flqc{hd zU^?PsmC6U|-CFO=?Vbcxy=;>j3bBKwo~G#w>5y{jkiv9Gf7U*}!S*;?0AzkEW2mE@ zQ^nPuSdBv%EO3rf9*A8YNLwDLt%K?7HbT3M?+D0Tk0UVs=#AdCKcK=wf$J;cwIqEZ zDiroE5`B{?|jP(m{ch3e;N- zY7N9z16COJ1SLT(x08xym7dG3;&qBXWNpU*annn3&ArwEIYa%e0xl1pUqHIgARB*U z?(ZZLJV^4 z7&LZ(Ij?$m2_Qq~2H{3gHXz>t;i^Z~zAujLUuMdwQo5^$8*@$MpiuuvT z?Yk>6qbwNv9>ayy6f|zr#qP2q7sz!~#W6)Exsv#0n5blRRA`pjF1DMCWu7+B9>+*S zJ+B7Db$2N_vIqpk)}xQV{U4q%1XKMO)ck?RJ5=I4>Agk!8)rvn!Oqzz4ILgQ+1^HB zF6lsJu79E5;y%@;Is0gIw0oH~S?o>A=(|eqdMb@#x-;fdOIIBwiOj&gQIC}e?|oAe zWE+=C?FKGGhrV{y!KfN=m)T1zjnuW5K5GhmcTN$Nuvhfip$a(LO|~@>9tcgTKL3I* z6x_4>HS+VjCN;cc1tNIScSL2Ad&%b$4KS7U+2PcxC|&y#rpM4vl^VZENp1^?*6ak5>Py&s5;e|HTp z_6l~OXCU?Xv#~8aznx#vn*WaXfzBsixv<i-HsNncaCLpO|qn7bc2b@b36Res3z4lpf{;=PobUaq%Yo1dgHh*g4J5m^?EcB zSuMex#iK09<3L$_xtqX6n#cS7*S>}tt|X%DAi=@4-QUqaPsyk>#CkCHhSYtURKWV1 zIJ?9YazJk_)j^-IY0N^;P+qUe_IIv^aF(31f#}lr58XWc-Z_dgv#qP#9}KJCu@Bh0 zQL=w0`yoAWS=B~f-Z!}DDL1@3F61LgU6E5aHa^!s&X8V8f??x^!MA{|{|(yp1R5FG z#nYNAU|*Qam_=fsELnfT(E2z-t=Ep&^c1JE6Lmch2SFYIPoJW*jyoZ!%V{Y$Gm7te zi#d+XIRqE3yZ#-Ya(pj;Fkzz(DQpi@PL>q2zmglEAYDp2#9n{+R?Xq`0&+5LDvKF! z)QC}SA^z7s25;m^Sn8VG>SF_wYE$EWU+^5lGi-<_TEdf~kVXMK*1q(p=WO^yBos!3QhS#GuZ_Ae6e56N-Mcw4>Yhcs87R$-n)9JNeq8wcy3 zpti!FQW5`@9wN~?q+(KZU}w4YtQAVAaV9qSy}WkEV&li-_ z6(XpRt8L6vAE>20@4bH;(l!jAA2TO#P zh28K;a$WoSn!bY<94gFpBJa{3GmKObY|Uae8N%-&Ix#Yv4)FqBnj`35dUmhy$V3HHKjb~b9aVVpX}Pd2@a*NwHOb3uKK~|6F05BWwyWAb{t7X+ zLQnel8|G3Ug|fLsd7TFOCCd$b8F?-eR9O93(d+o7m#o-nS+NKH?h;1_AHE6)iNl!w zfjKNpJg00((TiLZ!dOt4yQCJBvy$xbDqOOUmC=BZ(S^4nyk^>-?MJ=d0Q1WEwC#Oi zF6_R`TuDi0LE%N&Iam>__1be$>Qx58{`bq9oSd0?JAVO!`GL-ANlCa zW;`2`Qm#LH@_S#blPn`QhGD5TPnT19&RVQ8TsT(QA`SQajJwG>@{5}FPnDlOaqnJf z1RYpMbc)-&UQMP}S?A0jeJA$JGfdq6XjL~KrphWgl|@8uIv=Rz)*$CJE6g=y_pZL$ z>bLD=Cz$rONqu-zZ5J(=0b*0!$rD zCq4{j8Ek229SEVJ#GLO1mktyu+1leJAGUytg-K2b#xLoJO9bg%&vT3;9+NVXs>tD> z`e>?^l!(M-{K$8E`9~=9`j!^Xf*N*6k_vdzUVfPCI1}nKE}RnkQj-6ziFt(rw5~I&nlXNRDWVv&(@!< z99>8fiIv@6zhc0O^0~BvV+ig2^~Ma#y6nBkyw{hT6~);kHunvSuI0nm1LC|4Jj$Jk z#FbPX$Q!Vy5cFx=djDy9-k|&AjN?nlTUT_JGGFCGBlcpIh8BLp2%T-?$acBxS!&)P zE~ENtzu&gFU1ZedvIO$^a^LX7-*FkKbCR0NsR_!u=e&BwDIoknFkx10N5J09AYjXR1{QJl z?3y&wO6gQW8lfTjI3(cr5}my6fxjmn23I}1FCp4VAV!>^#u>?L?HZ`hJ5#BN-=wU2 zDWam)%T^)UX<7GdKQauqOd4#+V63-yuWCH&QTKxAIc=%JfyX3IBuzf7_q8XO-n@k) z-qxilSAlYQT)lx}@phdm{^xNzFw7dO|ql>XksGjOs3LxGt|Y0mQI4+(Al z<%gQ!zxCN>uT4wwvU@leqG!Onu_qKRtjzT0CWArYGdshxGiB0nOl}@qyRvc;p@8t8L~S{%0tXb#qVJS zg==Q&OET3+QWp_HmYw?`hWo8h+Tu@lLRn=!?|W)sJvZe4Qn Mz$K+18wa}vX}Ja zp{At6J#@Q9CA4OLw24z_Yzn9K*4kl&ObjMp(^6j(!k1WqkCt^6!QrNlTlm@owG5-i zdrFZu_fjHnw#POc*~}7pQddo`bwl?dPy=-X@!$Lk0Zx$pD7?sUL5?4&(HBx21GvM?>ggLV)WZPWNrI#yn5+g z=%waa0|-1h9XBK`iib>XG%e;Hn&QmYkKdu1P=m9OP*o_kk_9;`&` z4lpcDWY3te_Qju1q35|!@q@}PCD{R2sbk;Fz~`fEO>z?pSs9R+&XKy>sb!<7>F2Tu zxlZl*2&3>BzT_-voB$uCzT80DWaj(_l{HddoV@V34u}L7DGTQ_cn6RH?{oKtH^9Ab9#K|4qR)C$;$gaC4p+0M)l| z#72^FyCRiBde^o4cei~rr7cB-7(}#M=3{;Tz5Z_2Wk3ap_P6iAZ{K&FXu&S@6#gN| zYG96n{|l;t_ONbSo$6bxY?-|%SSy4mr5)W2cN#$7OVOYy4kL&f22M zxe$AyWuaYZ%pGRX|M}&mv&Q9V?BQ0*;g)$LyqwJx+IRPGOSc|LTPy9!5zZrSw*o9v zf$C$vD-jpZkJUE7gdB?>cVqn7_D_-HfcYNMly|wz=SNQugFo?y#>%kf(pK&+K37av z?F;sJLCg%BM5j%noAXc}1*nFULy&dITu9j_S#h?9Ljet3m)hUrSxGwQHKF|9W%(5 z8HA_bH>2MdI5rp+J~M^(-aV-MX$kjCE?6AM3mfw2T9|eU6Sr;pP`cNDsDwHqY%M!vf?wc|0 zd+2vT?rLVclN^n%P1x6@3udKJ`!?vp^MjC}6ZogWY}~Tu1u`mfXGPK-jIv{x2R)Ub zmuqzH(?75H7$!db9?MIj$Zh8K@?tSGnmoNju3Vvm0w;EphO*>>1l z{YL}(ht2ywGi_BswuR!=>xdR{ z4*+K%UCOj}CX6Qboafirqq37d(x_1u9#PPGF`o*IvgnXx+m=4>yZGe*1oQ>E^#(hR z#iNNw`rr7O!t|w{uq!e=|F4O)X+BCp@G|snyQ5k5UE~U2JMP=0bySrDwdi}}I0)rz za4ySdp8o_k)-V9jZQC|H@7rXCIvBcG1X3>aGtO?XjFwbWJh_mb`ef64vYL|#^5!*CAr89k)h1!%cu&T5zzPv=n#O|y#AE;d}iZ^ER z7KCJxs57Hwy-+GsDM0v%xD7u@2j^BK_m!{2DY}*f2^mSK$TyX{Y!Yq+fu0cx#`N9I z98fomYhc^vrKPM#9t5L)O@IQ9|1gtUFBx=dJU4C3Pdy;MT%@bV8q+CK8ah7o7g``= z*O@CaXNx%1qLKqZ-gc1y%PS5R&BzM?qdApW0siO@jL@73LMTZ)$tpETN&k9+0L-ST z#Suc_AW2hh6rm+yqBFG>mH%G8&xL$|l%Q0%&w36+c(uw@>8wS3U3gQ!1w5qztLkl3 zZboi?ZZrIYb_2!|Sb@yl&fz~JK;FEi%9kZvt?rphG$RxS#|?TqdF@~%DEJM1Oq)8< zr38mb6YK>c)z95@Q(-%2Mr0B|W4=?Zb-B%%&uxRpzIENGsO*sZOs9KDmc z^wMz8`NaP@Nz6JbgJqYOjj|Pauz@;K8F0aO3ON>rvK4or;#jpjOLe{$DY)~AF1ul$ zxUsU)Z|1$ovwiumDebR2v=ccT*tR#;2ruNj_58o?c76~kPY~a`%mIF=n!pW}OR}>8 z_g?=kVdcIwGKnL{{Yt7zx@HJltPuCB>c|`@xyZ7!Bbh`@bcC}uAZOMaLydn;;NM5) zok&;9706U_1=tMlUz2cqCWsVYo>g-igqT8-K zlt>~jjMQ0+ioG@%Wk`0}1YSBauScF+0`?JK0X79}LAV~o$jtJFAku{KtvQBjYGkUJ z#kBLHsl^ul!$-#_bcgb}pzLxhy$H2er9p{+~?=KeN@x$ja!+_ zoD4O6dFIJ8p5S>s1E(x3lEb``%b3j?dAeNjQHG>*JgF?2kczq>7R&3fq5WLr z@fqX~pn%SG+X8O`R?2O_ibHchM=i(hy9>-8rS@ZvGs3yJC~J{*73d^^H;&9iGL4+e zrZtnY`_Ol+j%`Si%WuWVcQQ7~G*j=)M`v;6cl;MEf3?;c%x z9s6cg>)wx1*RDDn8dsxD|GTe!`ElR0@_j7-htNwBCNdh{SDuXQDKTRE>e|9Xsxi=% z4{K=YUKE<*=u%+i==(9|xF?*8jj|nChllAa3d}wu3sByuJpga>asfVibnByoE<%-i z1D7cFRuJRMciE2Y{W;)bLxcfvQ2E%EMg>4zfuq72+BftPMSFBP{2BY%v?np1==ag$ zJ@{w!#vw9)&FKPQf2T9{Nt-pW4GH~+jEeVQa64?Gz_iBxjq1q{YG#@y%hj?73`|!U z>`7rG<^pm(q@UlQWB*&rp7@#PzXWcCa}(erM$-SS(w2fSne5c26Btzu)waCgSj-Aut`-?ai|RyFCGn zv;{E2@uk4p5s;&Vc)eREDvo-N~nmk8)o%8lBZ`lFyk0QH+0hw)XtMx_8%qVlkx zqk0k(iS~|Q^j8v0YHvd0Vpz16`#$85P4a(1qYsrZm$@Gs385jff1zB(foM}BLg74R z$&@pN*JXTNt*t@Ip+K2`MF6R|gxsoc`|tPcAIZV zgRZ4L0@UaM&pgHW_M=cKmzTrMSjXI3o`GyD*p+~&V3Ff}8a)qz_c>&g0?e4MLtV_J zPU24B=YX){=}NUfkOFe zo`M#V;irAJB{C;g%R@@bGqC^20n>%?#kd1Yhpd1m|!ewNGwPS((wi(bdx^WNw|PhzhMti%NWGJ2!Q1Zy0M#)AHL zKQ_ClgC4uywgJTzFNr5kjQ=M5v~0&&Uw+5)p{<`43HmZcl$NKN*_sy_SxkGN6mKqA zEVZ(ZMMZXIN21{o0sq2D(373AQ|Enezf>kM%U8Z9~4)m2<>YuH8IJ zi2s?5HIT!45TUnduScb_upMp@TwFTakUClH`-mKsKt5T#X9jL008BmpdQ`dSHnSEs zjWo`0o2pEofGdqqsZ^Q{)nhkgM9t-20cz|5awuS8GE(!Y**JysYU+M=nO4}Xx3)`KuCdWb`Zg6BEd`2di4gBj*D_G(n%G}DV0 z8?a1AZ=INM4QLLcuv|IZuc)#4=`)>6V)Bli)T!| zKym3)aC4_<0V_&Q!E36`atdZFV7m*4}GlJqj?+VU)mra4mRG;7N?(GrmId^+?>YVY{^2i$8(3mf^qXS7 zxy>OYX$k(`3G1;5kUWH!o0>w0seqcZf8mvh19EW+L;lB{)?wE0mn{Y6#F$g#zYxo1 zPPyQRcj(*lU~>Q_x7K!Vtlp@|+-ni-#Z^^lU++Jyu3KIBbVj>|4?Sh>J^+{;;sO zzC*UTu-&$d3D30c^J^YZmJxSIhzD~|5!@Bz(;>tGx#6hngb(59SB6LU zR>9_`yTk1a#NBIf{%Vl;HP1?Y)-SoctSZbSD6}tUB0OTJ6*|9w*Pf5>mjr;@xp0<# z;qlHi16*6B89W=6Q(U|@wY$wJlLhiM@*dak(Gg<#)t>IHHCPTXw=%FKEt;}9&00IGb6 zv$SxH6dT|8niVnhl+gJOQ-s9J(J8|A5FZ@!Jd0X~+^+r3{EP#jYVK2}tMc&>stARl zC%5p}&4$7=TRZ&K3F3Ar2hON2&fz!)UKj)z;1p5&um(=Qdm~r6VpfR-4xMXiViBbu zxdbk+0vk0Ib>H~>qPfc4M)i=}F#|C+Kk>i?ua7-QTkc`CUq_^o+FZVMm2ts0wZRz; z9PV_SW*5A_`*&9vHT*glBT0o!UGe&SL`FZ#UGaF0YRsn2=(AEdC#LO0NbN=VY^NOT zt(KbOtXL7q#Q%98!o?!>veIX%t{l2E&x=&dx3CewNixTIpNjig_>LpFK)VrrZ(>GA zp2bgl9J6@()ax%6{PsS87YdEc(0k3$qs9aliT#=ua6cm$+ItnmHwaB;4&K`6s?6n| z*(RUnOjOk{_(3Ibcn)wR7lGy~3`XTKrqIzFfS@nVQFQZUZiI}P(g5NI~%GcSS<|(*u5tc^&sc)xhd1LP*0Mkss2sgqg!s}agu^@-qYcI z7QX8Ul)E(j`2^+}9G6iej#pr{nVi2uAtcW+%cQmSQ{>Y`RaYWTruRMEuij+y^^wV) zr3i~!3tLm&mG@GMRz!ZuOb>M8$?5mw23hvCJ%xvSw07nfDBAPEeo2Xd$>A<_QSiMRD`9oa}y1Rze(>X<79pY{~!Pe%a9VcAEdLylXYLVK#hmxL$gJv)r*{kvJHy zlNjg}GcGzX%wR77Pb?&LHkrz)y&&8kRnrQPn_A_zK4ARv`!JGA>swr*P}AMK3zn^~ za&9v#3w4E)3S1gFZgZ@EMRQomYkxjW-?OF~TbkSAH$B@H-EP+38e&{^EYinalccRF z`I(M)U8Oza&1{3=wvk%G3-DotRVK3P+tiS#2=7(raDGozl9_GbWG2 zFD=bM{EH#AvLQVVWgDIuPH0{zHfE2(*@tHM!`5{w-)v?=gN9=1q|~gJ<7NN@9;{&m z_;1c84l|{-b$7T@&KkAryVBG6y%7D=JqNC~Tmz6&(`-YldHK1;$I_6evt76UoC_ej zjxb1SnlTH}eEjS3IY49Xw{ONcI?@Hpd7fa;$Jk4 zIXKQZkb1Gv;eBOZtC`F%D8hxnXU^d>$N8N4cH10cJ+qeb$8rQlumi^X5>!%zVaJX4oaO zUBwdr8JL3?XRGkdSC6G+g>(WX4fl%@cem?d33C;P%rw*F4Q*!}xty;34^K6nJ#v^0U+ik<7|k-nY`5uMkl?7`xG>4yQ1$uu`A z7ry<{vHB&GX#8k0_GmKVXtF~l2EhQAF1|j12^n$AyE{WZYyjM{tl&~%q{Gh05YWv@sr{BO|f`67DT^SKTAB91z+e)C8yi?sl@E5#Kft@hk}n>vWrSx z{}5BY+<;mA=y+ZV3O&<#I!~Qk&S!F@^0WnqT^65R7MHz);k7^E-2@}Ngc}rIg7nEY zLbCE_`Nn1A);P(%2e0ovpt|>9=^@j$KTgCiueVWYJ659S**BYmN3H0=BW$w=IQ#fv zRm(o`2Z#xe(eqcv&tDlmckI>3B6Q=Q<2e!V?Y7a5;U|$TKpFMtd(`TJ{Od4P#Dyy4yb&kSve7ph+wm zTg4~Of2EYRW$s+N&ppYXeO7y;dd$;=^{GQ_p^#{9%*EblCfWgejsmr!y5KuGc0SB- z4D~MJQ)6UO=@mHyO^RG*9Nsx>Wj`7$1NHW_@iJbKU?d!VVA`Fb)_`oL#wq%rRU z`Zo&Vb9F}k0dqQdq+B7GK3=ftZ$7T)pb2X!W91*}Cmt~FDOvon8Fx}%rJ61lGVY7+ zK_k<9-8MOfC8Vv^#;rn}9~SA<+N`+#g3UAyTDzCJN{oGz{ju21t*HM}zvYs7HT$M8 zi~!0v>CXJo_p9Bgqh`dMz|8w>8oA{T`4d`-lx)EQpWgb~=B6^G5C0ByC}uzFdRbmx z$owh2>W0mG)6ii1v-|caGmcy7Gq7i zR}zsVwEF}4^@A_A3jyo6L=F2pe3hozdgv}%D74c>*UVWl2zC*BmNeX+Z`P0Zq7%WIZ!k#O9V^E#twTfB@{@jw)GCnjAYy3nW5UPE$WmgLtGdi~&I=UjhV zSIQSv))_r3q6F@k3ECHSJ*vx;c#Zoly{o*mv;}NGx~!HtK!d3}S~&nU+{Nocy$$c^3$@g4Cs@6oTP#tv;? zzVS&B{ySMq0!hQK&Y~c`mm9rxJXPULrXe3gR5xbIT|(7(WOr!e?1S?{-kjU9yVKuU zc;Wmo+RreOCOY=bsL(D0N&3&UeeoOb(fw27CR}q|qKj{kwJIsa?Qo`dgfe|BE;gwx zX7fobsG$lGo&By_k;TBn8O#r@ZAmABq(|~D871@}y7S1Ok&B><^srXV?Vcy!?w4%0 zVs5T@Aik(8_{#g$aF$LF3HfQ9%k!HHCvTLKCrYxpMII6@t6XMBEOLHx@{*WPQ#($} zF(vIVnU?;oz{W=L>rnA?Hw7+I{{xK1c)01BF$lcFSJ;M%k+VA)x4WHd8PrH=F1V$PZ;VcZ+R@Tj*}CW_>w^-D$3j9v3bkkX$LOns~A!sSb=sKNs=QMqVM-qcG|@a_+iKW*sS zLYQkwN77u8;uy?e6vTyTw3L8$rl9Y!WK06M(otcBH0H?et%FToiM93Tw@vQUTIr3; zjP`EOzIfThU@}qqtu$mO_wyeJQI%Q+R}dVukuB>%W8gGDSH4N@GV)aNu#i=wkPw#V zLKUU2x1mIF8np?>|zk!1?|%b z!HxOQWCDFMy~Qo)l&bQiMEBVfgzUGpNIYo{F`w)@m+5zn+nhcQoXZ`XywsqjeW^mH z)Rl)XEFbFV@@OXrZrHUP;_An^_qYXS#k@73 z>0=3rJ5&piy<#U#{kfCn+s(d918824!fMP4q4mxcVq@B$*^-r^Eo(=#E8Y8YhWMFN z;w;}o7_yh{eAC~v1<#c#0@lAna!T;y)16<{Ur^;=$b?fXUTW5^zee+)uYN~6?Y0cc z+~1Fm&`c&sf1Bx-4`)n!@R*m#+I>1bRWTbgsk7MEr z>3Mg?UI?G9#z*DeQ%Fa%`;S|aMX3`Zss5i@)!*jEA}wB<+#|~+nP;=ISln+>WN6r) z+>D8k^Dee;xB31`=of$_e04pvSja- zX{kWo>b{lGB6=RjvNYStXD#h*lTCAeflzUIGh7YmIa z014_?P`F0Mwk`>8ieS%1vD&PoBB|WSr3W#Yn9GcA*~`E*UUL_sK9!*r{{@?ky)VrP za%G0ftu72#`Fnk%rO%KZu7Y1lglkWAXJZ^(8B~{lZ@50gC;4Qu7rb|9{V__R_@EC| z%ODl4Kuag2;7{xuZSbz3PBZoCD5m~O z z@aV-LT1->ilW~~#sqw+T-SZc#YGJdj;1Ab{HxqU17RcD6RT$mfNA&r6KG4?bx^>B> z_zl^?Sk;kt<rXENHh#jf6Ok+H}%SghQ%S5JIy>1u34V?q`OLkszu z0|zr~G05BM@d>mkI`crTHNepF!H5kNe+|jiOod3`uIa2URNA>(n+do*h%WkYj~o@m zsM2TKx!QCcbi>#dZ5b=?fq2HEZ$kSape~C8cq=`tp{T6U>P=E&XcOH1H($6WLE}mg zQ?10TlS@`h=W5WyBu8)75<5(8(sNNaX~SP2%^n@lH&a!Sm)%3`S05vE>SB)4x)R~xjm`(J% zPI-XX#|OFAQ+z@Qqhc}qW)kt@&KRhrx6pEq1|`1`V)d9-SPLmE?G)JO<{TDisL^MO z?5VIjeH&yOC8p?pupwWdF4Gf0M5_AsWG>>FI0r2ZrdeK{u{z=DUv++0LQj(bH%Ez9 z=ZL7*Lvtx^)~zO4Kw)cqX?DrM_FG+HvZv%sy%%6fGT(9c=szNDr!#eK(`3F(?OyVkn;H}zOWp@fn+*JxhwB| z0(}sw`<^t7R7_pgq?vjIXpWbr=UKlKe{?DLK{v(w!+Huk099t*4W(VQy6KCNf^sFg zu;kNzi7ULNK2r`*hu}fsce&9IMN!Yn10W_38Z9aLU&HU#UO#h+yqhMVDx_q&avWDU z8Ly6hILDi~VH^+KR?$ChecOK#@$q-xLa?X|Sy&)U;3?xGuQaqjINLKJ_NN_@-U5ZA z6|{Xm(QmjmuwxyW7+u7H3o|4licXjJPMnY)`1cvV_Q zPUA$Qx0~qU{-n9ku9~=T_*Air?eLjt|JhpXPH)OiuXzES_g`JUY1o-5#{|fK((c`T zPk&<l9=zB>Q3&lEni2(P5l( zty)BXCa*WF@~_lGvVHyApNv~Pjun9`Qe4a;g&dg8YQG&BmFW@BOBa6d5{o8h!bV4p zdFr}U-|5zu6Aqs)4zwjbHzhh>quA}G-R(6mg!5hl@)$VNp`Sd;lpL(|%ZcZu3FjTA za!w6ro6zhj3>l zwTZY;rPPK|tQRTAA1B&tA$F2GZ(L|L;ZGQ<67DfXMkI;XI*chyF^}I9U!O8AUx@gx zJxl`j5OJy#dOAt5g`nL+Fp6Sd?K?pGsJ0NgX%KOQXj^2gj0GrvTRf|!j40*g#wBeF zPR0BTmM&xX$#+3SO%CQ2(xVLr z-c)2DHF#(-^)-?j8_m8=*hJxEpn9mUxcZsbg_FP&UQgnlJLhcsPST$$-w;nFY$JRW z+B(HgD28x-Dn*4tgFAX{cCkkdekP`WLq7jx@ip}y>) zzV9f|f|Tefi5_R#@>DuRDZRjK;B3;k0Q)6==)zCyZ9(P* zAG&`K!N9~~BGpsZdJoMWlN*Rjdx?S@&fE*v!;KhuV#wHYpb-{qy=RY*)sCe>RN?97ZV*B z0n^_9f2JLxgf=B==VH?-gj5Uf68fh_^To)o^=-0kS35(03S@Q}|A6#OfOIrU7R)~A z54d}ZF;_`_m`d11NWF$gdNAl^Yu@?9UmKO|U#f5(C4a%r0MkLg)<#$7U0W4g7s+R0 zD(sL1b}gEh*xK-e-eJW@os-VFzJa zD{kDi*{ajqDP0X2#FJJL8V%=5*5m^3s(GQ;qFc24I_VC`!3&^7OL+1)W-MC^tB2I~)!Ev#l?7a?7iSYYA&`6{c_7CtwN_+b5S`^CqL zeta9TGTFBi6~^uF^G$84A@P=Qy&SZ$5=x%18qSHmjk*^h!qrHKSR_rJ$=QfvF>EP) zn|hZ2RO|@>ic)-eBTCHk%J=6Z|6s$HJ^nw{xBrG249vAEp;wfje{7CFJ2}&f=%sMU zcij+uR~B0#GzU^Ehj4aacmvSccjh3}n8jE=e3U%>9J%JgtB7dXL!yWSDhF90HaMYw z^Uh|vjQ>e@-(|!5I7TGl>ERCoL)G5k3eSYd zpB0!_CTQc(^AOrSa)xqD#WpfH68{%qHZwf2-Tf6MT{L4A9!xx$eTFZlL6O?%lYCFL zm~+dNp-H6Rt`ooce6=}C*<7OC5tDXcP6FCN|-X0$< zRh8S<;}Km~$k9K?t8uGz;FfdT`kv+Q{#c5qfTy^e&L1+#&K>PNbv~mI%%78PzihA^UAj+ z9|0g<4pu=1b7IeVRB&%9AuFLakHF(#NRAZ@Z(>ba*Xi|n#qKW$TFQSv5@}XG+`-Y#+9qu zeHVXkK3MHhqPJ<{qSA8hzPSRdM&Cw8X23)$6t34OcA@|i`5uGm*uGWbI0*Lr9MPTf zRclJv!@`goA>fWm*ojh3!SrfXS{759yv>6g%3dqR`hTMpQ&>;#n|ta5kc=hmI%fw1 zajPQldM$Q0N?;n~RlNsN!vduzwb2z3yO)~!9-=%0Px@%1cf#dW=%B zUWgOC$jmFtH-Y6Tf>4nS%r<$Trqs*%&4O*7*QiXpQfL+gEroD5U__nK6?bMKiP*(g zm4yay+qLWC{%DAY>Q;fH09W$*pf z?!RyZ@PBN5sj+!-?hoPwK!(B^F(g zujvF=n59pTI_z5z&uDI8f{A34)}a6q_zV6UYmx`tbeVq8E_cON&I^7$FP5hnMCwcBigAY`S%z-kwoBqmP@Axa$Dow!o!M#+T?8yNv053a9_5cP)nH0pst#fZ&YYOe8^cI!A z-b+qO-m*)uY5*i5kBQPrn$IYkdvp>d!3-}*dr~eRt=uR2>(=QKk9jcvt!ZRv7YCkw ziQ(X$Bln*Fz1^((XK^8NUgi%D5siu#_pB!=ETOU}A`Z`2N*gKR*yLsZZrLZ&+v_kd zO_VmOQh9e~QhefBe`t$HN2OSQTcj%HJa+xVC=0gn`}&b4fk7w##?_j1Mw|AEwyGaR z*8@7QF%7xpa&-?(y@w-QHBElFp)Y-YA?y=HlNfK6RD|7GiH%V(M<`~Yl`!Th*w5N5 zD(#p3y`h_X-ln`=V&kA*1$Z;d^;*hyR8uGhu3g)R@v|}KS~pu6khoLq z5||I1_YQZVG9M<-cLpuQ??)B)VNO_a5;(PJlU|pwt;bDq8K=+(Irx8cE`>xkU_SSs z_X^B{+?~;A+AmPO0vGs_L_4H6-a5WmU$y@qKbahQH+kTqcbM{q~ebfv^Y zm#LB)9$v8;30vH(B_Bs6g~KHUFrP%$kl!EI<9sX(2YBxPz-{t`R)>u_hjaiCbe1|T zwm#t)8-AEn+)Z@!xGe4MBqPj49;iWnA=tpaLM_BO+t*U@V!mr+d~*u(X9u%ON#$RWe- z2fW!AnYwb+g|&xluZrFq72s~c76xwM8b;>d9+$J7gWhUm^~>0u@-<~`3@*aw?S%NQ z#&0FCg;`5RMFAirNoQ;Y{v|Snp+3-j0Nv}e1b-zm^6&xl=d|el7RwaCCQ8$*9-G?D zR>3N)oLbk%C%_%qUa$o4i?#?m>1R^77p~--`OBVYaB16{lJk2U_LDlM7kUE8=*Q0a z-y98*0Y(hQPD-C(#|&wSEp5b}*Iw|2zN&i|PUqN~&#;8u^8Z-t02bcBtf?JBoL?sJ zeKd5sV;)B9U|^0U8C;sXrRt3RG=%Pj9zrr20IH}X^;@bNn928tZEgYz4b0I7-{T$) zzAmIsWV_6)b;4#&f+t1uo7t{Fv$0qkqA;`j{^lI~=B|s;JMWD)dp5uO&8OW&nVvx2 z$6}8Ge$nf22d7nv+kant!mb7U^B%r-80UCbp&ry|oZ^JFlDLFWK*RfKiUY@HLMxxT zdFh!;NCG@hvL9k)(##*2u|Js;iaVE+~d(oUo=wuEbwoMM$++ z6y3T4|E()5!Ref_JFzHDi$h4wDn`Qj4&Vw6*yPLXFlbB%SA;zHGN0K_mDsGr1eEE}NK1-@`T$ z2}dFTE~x2MgiYQE%Od^~Ga>l(44TAtoLNf=bSICUH^H}7yFny$!wWe1q?BHWfBGdU zrPOU-6uHy8@ttWt?VqOUABZ6Zc1QgNLER0vuwKQBj{)Y*a}IemBa5X8>G9xr9^h1` zPfKUlnm;^+RiWUYrLhS;`8sT<9%FC+H6^BW+ON)=$eaU#j`J?q*u(8JO7nB4Xv}|T z?yKHP6hEAT6*LD#dzv{i1h?+Eac3pqmc11%&v_e*ti}DbXWeGs?h<6%cf(1AkrU>B z+Qi@W2C;@4%`7)htkHab^O26 zFkhueW;@Mv6Toutz5aMo)y=9)7NB-6ClDcmJTc$I3djk-nq^+5V5?)VG}G=p*f6$@ z=j#;vwoH-Q5Oc+9DQtn6Ouo`g8iI=pV2kiikty`IE_Z|Q8N&X35a&k|B)T7amm@Xw zHAE1=mpbHQAPAcRE+b2ss|JOtgeCZZmisTcDWC{&*@^0YVk$oxa124fF|3Br2*5GY z8?aM>s3RzufgE5@rz;)UAo-r?hQJq(2~I~e-_J!f5e~=SjKcbXkTYznncJaP1Qmoy zTtTdp6&svz_*!U(|J1IwK2B{o{tvg<$z-%<(mq_g3Tp*HA-^-$-!)gmZ27FiNh_@r z90iPq@6V?SQx@58sFR8T61cz$G+HiXWy>Zlfs_ANb#ECJN7uBC4uN2S;7))9cXtc! z!GlA93EIcuHHVyC=e zndvTh|3IHA)sPUT&S*^v{C-T-mYKzV^)cQc-&k+cnWi5cOdeFRF52h?KO*(V-VfbQ zJuJVZv~_QwkX}Nwe4m7>a{r1w_uCD#;?i&yGSdcbm;g3iIu@6IT*AFyKY^69N1)`FEBsmvF z4yaIa^7X^eR+<^V`)Bih%C99W4}pQ&a&eLHcrCn@Nog;Q>Z;>@MUDZ**98_7Nd&s8 zgz+9i1ymwC521Vvux=g~p z13GxGo2pwZZ-Utse|Rf!!n@i^n2e7>Wa-Wf+N<1)?CN{*-}}SIxMBXI8>6c>dZ~{{ zy?~;(R}X}To3dIU?F17-RfNxKZ7E5$KtH(Ne$rajgDH03--o6lR3ty^pTcGQIEo9> zpMF9Hi7E;(;f+Th@XNK+(Mh_~^Z-g|9q6e3sW-C@=xXlCcSdbcij42G58c4G7jLye zo#9WrC^*QOUdDQN{|cjSuy)SV12s21Bg3GiV*tHC_jP%q)YuYAH9MTyZn7;Wpfmk6 zid78W7yH?S<(t<0DaPd_qB0v_(^DZ3$tBDYm=u(L3#ykO^=4Q2h%{7<=6!hqqt*~* zCZPJXaJ~1PgJ<-P)99EaXkiZVC~Ua@(-l1LG!Tu`h*6m)*nyznqtF&^pr+9A*{_}} z3|lTBeqWUYwOYb0*^`6nKEQpo;@%}U6Jg|&TX^^H@J}s5;%Zc>U#qca(eRd%L`De zKszPR=o`>N3#_-ph&UsT3TLH%faj?-`2fcTHplE7(ovX-X>E{C@~0<9y>GlVS$A(F zhT)1^_yI+c2j&tzfNXSna#WAy&CCyTH3zKB@WG@6`^tUDO9c^j?tQLOGk-m@BgxpU zHb6GU%o7TmVrBhQ8sAZAHW(f}H_j4tzfC`@2m^GH*f`GavQqOFs8VBL#7nklZYB@o|vARfJnGukHGAp z^kf)kFLrN!4M6G+$kc5Xro1<`xEwB(R<*JgHb|}Qhyl&Hwt;ksU^xl(MZKA@H~d}E z+$)wog)e7zLzwAFupH4%l;WqxSK$~6`-1))_tTVlf{;O^u>kt%-BYh=G>gIFc1q0- zn2Aq^Px0;E#A$8%C7Z(L=EjbaXwt(LTUnzxOV+)aE?mSO+en4EsbbA?8 z@L8>HK!MN?Ip4|#-I*(cin*2%5B+N`V!lmrrSmd2r6^N>ideZKTU)^k7|*aa+TsB> zxHD5@%}{d9(8%(MEXD6h67_Hv0^esA-uh(x(hC+crVOSH4=?zvDXiN8Gx>?**&JW# zfYT{IZjm5j66*r1f6fK={zun zBV&_{ViluPQPG7FttfA6EAFo9Dxsxy~PGxG5b9|y0mwz*?SD5 zBBC5--~E9>EzR$S_x81Fz&>oBLF?Xr%6xbTyW<6~hlUr|r%%MZ{7_!xGWHhJ-_r=s znfKO@p1T>#W|5_f8KWJMjfGRN`7x~Q&2GB1@woAuI;|GSr+U1x9tS#3l^lE%Ik@1v zw4>mmb#~#ig6u|H>chhN4V^O^4BiV>s^h;!uL`QO*{F}Fzz|qhie@3)rJ}CDvw7Iw zB2u+Uw--J)HMZG|t~<@HJKXSUWEaKt@L2Ntp${=N>C}$7a+zq(<`nsPAEYAI|7x%` z33Rgp)&I)`i>;HhFCG$>K1G;w^ifZ!-+Z&QQ(k9Q6I!$MR9|Qqt4uTIOWWTlIdP)c zM3$TNI|s0=m|R%oalGgj?_(hR>;a3F=Q{e>h20v-r#zfM)AdM;JWI! zpNm%)_fCs<%TB`2shkDfY`+~|oeNi9KP*>17eg{XMk_JA2|Vb=)}y`eB)YpL752P> zET(&k3_a{UEDVmoUM{w|J^U&Y^1OwtDiH4sG&uqK4KzI*d`kSbYgK#m^)NH+>AOX->8uoZ+G5VY z%6U>kwI>tAy%)4rQD}PhG&=hEBF>E)O0~1LpPP=qicMSdqaN^CJO>k-?YYsA^N$k< zlrZpZ8WV`j6O4yH`}Lh*vR!k{QSuWDZc|Fdi!d24QwId&lTm`j_QXLuNoN)+niN)& zFdf?zWuX}*A--()>P+{ui23mLCiY&U(TCpz>*2<~vw;1&@w)sk#eDIo>vj7abG8gb zy*9}XHLIGEA%4JFQ)MaBSVgW_gDom^FW?;YEi2+K{UBy7bm=H$qmfKuC3&_orwDaP8bU_iz2{c*Lz_n(*Y;X*ac@u`x3*P zB(E+1(54C2t5Dag00kz~O-*BmxMGI}X$&lB3{sR^r<7WGvfV2(-EAeWjl`~3!cMvZ zPP&>l%o!R1Gb=LhYSpRuq2h5<$sfD<3-Rm=-dED18KqVLR0Dt{rXE2%VJG7OC*uU` zMX2jVP=j&mGCa%p;-hs}|C|Y@uL90!5cO)9#!3pbN*#{yQm9LRCySX;DD!1E+ww?;D$7FEadD7zsmv7M%@&weN=s2*n1Yuc zq_NTMkq6q8xk@YU-x0>sO08Y*8q|OO1J~SeB3aqY-unf0{TIPH;55K>dXr{{Ja2~_ z>-i(b?QJ)qT%NN5?Js`rG(5BVS>bAJHDgC1kW=P|lL3g-vE4y}z0P2XdK2~F zJrBWJ9h$IPE)lp|q|N<)W4`I?+$^mBPvOJDg)@vZxd~;o7D7G<^+2St-j~tA- z&B1Xxybzr78MSrG2sTwX<;&o%3@>Iz^ezXUeff6-r%Diu24WmNe1Er4eUjSe(C=9= z3!m6tqi9+%o@fkqO)0T|Ibr!e=-b0E)FzK@ol zs`dGhdt>lPrL9{DRG9x+H_YK>4U{`#9QH8{N$-)2r_r^S2bKCz1v6pgjh8ig6oFj){aM1SPM4C_a+tovjvae~MB=1Uh?E&}J&BkUtMbi{l<|Q4PLTQ`rwq4%4kJsv zUB&CN6Lb;(SM~aZa9omf*YdqxPg-`wG^5u#o}bhK&obyURbj&2QYRBd$;=BnapGD; z`Y9?z+F^~8l|rBKmY3~IG)UkJR1BuF`q$4(4FR0LcnnOa0K=>~vr^W-yhXx9D%HYT zdsLAQ(<}{3KyCPdi_ha|H;`rpf8YS;cTDG)X{_>jR zD>`qrFSD0YA~s-)w(UiAD8iD^!=#0=#%|-v3wl&qmBtr= zvK9S3Wm{&&TLBz9m#0Ges?;%)N%lAtefeL|>@W}XDMmnoREI22uuj}}fp9ZzTu9E7;BZxfN8`pt)i8~Wu5$u)r|UI7Jc0zpWxk_`<}z^WvRV5j%QR?aD3)wF zI)mU!)wGxze8R)0>{;=M&S>$1*)Zp298_xvL`$2vSW-e8rZN=Ux6X$N@L|5dxoF>3 z=!I_>F>!6FvSET_oU5)JL@D8k5E=)&bS;O!eW2*YhNMPOZ}XH6VLbZ^k#xcl3(9`gUneXL zIYuo^3c2n}8HyO&=LHrNVun5H83V*rik(o)!g3BdHfWS1bV2JorYVA~kIX7Dtz%D) zD0>qk=!i6m`g7cMGZh(S7R`Ebs(niHqQX$B8WIChg?3fnxC&Ndyy~rEeA)sxv;2+$ zs^in#Wgkfla$q^P+uz{P`M`Hi*>qLG@&rD`x_>(_PAy40RsszrYXDgQA37GJ&W)L= zAtD^W>Ss@7l1zg;E+Af81#I*#-0`*6SvUWxTJKLd4|6%#YTNf2N<3nmwafjFUT<`` z=?{!y=}makdbW{rBi`QyKffbqcZ;wB*E8`8MfGJm@r>zwp`2fwY7BGBoL32IFjOmt zA|?#eh9|qtH4HY@1gKQd(R!;l5+9P;CT%0sU0Mwt;r`CAP)Vx(v;yKU1`AKG>DP^hvK64iTGAP7^kkp=w=&9f zy^r0abV{mxM9TCTrtfcB9~DpymcipHZyxHTMTkc*_r%DvBaz(VoPJ~dJt>Mz`fBJ` z{WEu_jd#97ZZ!=4Y$%i~Sz%w_r5*C%0MCB;(E8l$ea>R5>kM7`PcI!q#N-8?wCeqG z@PHv{*cj-(%Tp>3N2+$dovS3K>ILo(;2jOcO^ek!T!ia!2dOlh_}Stgal$p08#Kif z_Tw_MEzFrgFZ5L1k~X)#&V=$xO^)eT^ET#PjP(EJtigL1QCiT_Z|9(}-xUjmCJ_Tr z!!-cfM+$|m@c>YjZYY!i3T3c`LIa^tRw(r4G8Fn03dI7T^b<40`s!{(zn0Z&oI;_y zsy)lnEht#QUMjd@nkRK!@mGKdDLtk6| z2-+#rEVhG!O4^Lnd!e9CNXR<%Bj}cXBIyxyP^PJD=b)~^K+fA-c==`Cpu%A7sBl5? zlULVZR7a7k@bo>7&gfq_Z_qCF z-ynuBe?UZkKzV_GgWQPzbZ7Vnq`v&OqE)aKB|C3zTRT&aSd z_Q4}WgbDqXR0uEStX)Nck}72x#jTT&U?)DCC<>xaq5439rWj`##ZBZxjK0h+sX+z3 z%w1{-*a)s4$$=DOCSG8$i?Gmz(x>h6+Hg=e-*-uZ^?)ZWlvC|g_QdnsSG%6C<#4YM zzi)iw#jwcVi0iz6!$}X`z{W4gLD|UF!Kw%_*O!?gC3Psx*}C*Z6{BIrWnk_6Y8AS= zVT3x~9Fw+q`_H0DZxAOPLueCz7P^YjDjX2lXQ`ezAbcr?dz* z;)Ipvg45|Q!M_es5$UkAXhcjIexZ~Y7sMkvP!hFWriQa7fI8Le6l%rWb&E5(G@Y* z&)6Q%=-wVsypGOBervZ9WMY{b1hqfSAsQvdO7RNX)nxbLp+|~$qF6Nl z(_G>`-qZ@nSpi(|pib062!+p+dya1f$CR2g|= zYo!Hox(URa7n&pxyU=vju)$`Cr*++}CkEAwRVjW=IWs|f8ia|J)Nv9#WiI%D2Yq|b zda70#xCX=DOBp*7d`pSuow!M`fcb&e1yS7W73G5vShSOtJe$wMTyOk59z6(GZiX~% zl9u`5IdwNRsetX6nnh0y_jxn9dfxnn=A?Uq&I65>6|2&Usu=Cq*z<{3Rcmn98_%`T zjpM%c_BrP%xe(z7K=5E_tJXT5*K;v)bzzmQGK<`H0AOe0dJ^(E3sLTAg_1{r?)&%@ z{KL79Ah_vuD|D|j#PGb!oZ7zJ{ur#4grCE(sE{QAHd5yrx3>jsR&T-#7V`7N{V(Zg zJNf6HL#5+ykib)PQ`ofBeQ#lm)dFYRh(O8pYe_nJ8$2Ik(Z4hb;w4d1cT+Jt#`2h3 zI}+mUAbpC~vh#JfTQE4)in@Krtk+hBMK`*PWIKR`R*CQ8J?2&sxt_ZW_4lY0}CJXkWKvs^}*rZkrNHb`EPwrT4MfwqHK zkFo3fv&*nNWj`*-=uRO>dOQN+84Evncu$w418 zRyqU~&$Gw6Hgd&*F|xG!o1<8y)UTM2 zDGP6lv8M-X@(a&8M-iFbPCavOF1OT8ej%|=ZK4);Dva9g`;Q4X$;E-~4}9lI+jNqi z9X9_Iu*~1g08qgNvfoDRw!vc@hsXI%`_OP5%U31fSZUr-X#k&@WS* zO7|1NKH6f4WBil4XCJ&A=%bv|^>%4aFH;JEpON=4@QPTwZYn+>bJp6-Q*L?+4JozC zgLmRaA!DZd>dzYn7gx^Qhzr=d1h|I5c#V*)te$T!c!VA^W zKc#aN38R3>QOXPboaS0=*7{6U*c&BwV#OQ9IIM2l0wD`O*j3G*34q9NUo*V^RWU(q ze{e(UfSW|QS%JaWB!4EQ(aI`UDAx0GV(gqg9WdW$?}*PTfp0}#uVHnh4{Bvjb?grv z)2}$(1vnlYW3SlP<|H!Rh@%zL-92s28rW`$u{^br-0k+6t-~9^l=7oKnDNA zJJx#xuFaygVh(y|hAdPQ zMGmcY=JP7jr9vvVB#h37g<~ne@i{CB3wp^vEM22riqoP2X4?J&C#ev&-TUYtbFNM~ zEFB9kXnU(J>v}VE-Mp+qPWJDkvr^m$qKezDX#jSb-33zG-lj`{1$f>V(#_9mpgdx) zFZ6wTI!4O@j5s!Uu@BMnen647ibCT05+bb(53z##G$LHo(8C}jg4^1o@RoZ!%QdF+ za`bgw(&0}ZUljN2M&D12dTNT8x1Pi_J*}6Mjv@yw!b(vths-R*YxB3TPqi)~qTfUY zTl(!aOs8bVzvO%uM&4mEAX=LRPEKDyUW=5r1ogeeD`F_54i50T+VV)QB|ew3fohR- z0cereB46G1Sq;zu*lpwbLljntHDex47{>^B)O{wj`1Zsc=m!u!)UW#oW2%<`4!xlpeg!Ctv(VRR z>oHgps}=JD`B0*~3K9+Xm%Juu_qV$(&@LIuJP|$O9wGD5Spx28znW<#7hs8czg>P0 zu-gzU&un%^v!uD4U;G+rw+NoAf~XV&60_!<(C%^8>uxP1G<(DORbm&JU^mATw4iao zwAD?p%~b#)h!S$K^C-NeoTQ$&`%1RGb84KfZg+pTWE!I@EPU=lHF=saWIOLkFu9+( zHlL5iul+tCgUAhHsFZYHT#E(Gq3%>K;xVnS3*E`WWt-N;*Q zHV5j76d?k`;gnt^TiwxK_-Y&WI|i{|fEW)K^wJNtZoXNinh&?}BHrxo5RR~4-2`}T zPlu9&xJ)1}a(&&?!e|c0d+7*`$LB+^^HVoJ6&jnaZhndoDfe=skK>M^(y8ZVB)_^j z)pk%7d*I3Mai8EW0L-^G-x{(|iIq5fj&1VWEjj!hZVR_ktQ2s#@w+MclOa<}P_#PU z69zUpl|;qEQU${|E&M8p>W8J5KbnEups8AzfAyJdcfCpOhA2SAL?*;;7wlWtr~pjr zw7q3OPT1{%-!|M3S3#pJ4H{()(9o-GX?0rCx72#m)dSxDw=`91>k&W<_3p`~1+d`aAnSB@|>PpvX+w}===_vCmUyE-?yy`2u zY^A-+PEtf5DU!*Q(md@HI}P3#&Q;LJ02CdvVO{f0SRD(G*wVI+`W_UK#a7u#k4-wp zn*;UNi%r1(1F+w&{#v+McJh9UE`v1t^|b{S>9C|Z=On)ji$F{4 z#xzjJpcUD$6q;OCKzUuCz10CLbwl%o7dl}~v;bAvX($krM<~mr3{`2kWcNayl!gh&e*mYXT||2-O!uJRr78}se>Kq3_ji-qB_J7=N{2eK zm=0V5s;HHC$TT|+kYDs5nBNP|K(&?oc`S)sT(fDVC*R_TeDk>nEE0m3QPF@49q+8o~RbX(hiwGOJr z!ecN2=?`822EO4EVnPw?acCn5T$|)pcTx}FZdN7S@U#9M8bEkl<^yEQJdWG9PTk_nri@@H*^_` z8LtZx+DQdzMurc=9@9VHG1kd0JG^&M&%AXtoc<*{TuX3A-NIg;ZvEvr8-Mm!XiBwy zA{d!u7Q5*NcLU)Ct+2at#l}i+1jpiiTT^s;8DtCtphIDt?uJSpAxaw~g!;NrUhGOK zqJ>{6(fAM*dwuF#wDwN+20>Ty*yqPAWpMM#p&5c+SylrA?e^ytq=Czs#ZnmX?4&Gt zT{?Zwt8MS4X_jDVn+&A2pUc_c)mIL)7yw zZ<3g|{d1mw(FJ0Z>@bapBJSc8R!{Bh_h%JwV~=<9El)Z!O0YS}X~7dl=xpfQe4E%{ z>zs`BbLrwY6Kg1tBEaS6ZX?;DP`uL32Z0)FYl3}`9f_yC?cDGKEq=;!ku8N@EP84g z#CIJcBHhs0*ygXI2 zc;v_nEaewrt>hwO`+M=w^i)Bweu0YzTnViU_mLIKtXM+XHLu?WgV<1G!O%#ao}AJ# zr@b7LWZJnke>8R)h;5))xH9)wIRPWGcOUI5aNEFA8>6iddp=zoii{lu$nLGUoODti zzu-KNKE-GJ-JFn16$een1|Ub&TA)okwCVELu|kmz-npArO&=ix2jrAx(;cW<m1a>^MSzo#(`bL8weA-|U z@9+Pc9(-P41w`JDkc0UWw(??Pr=0R~Vy70o_;>)U?g+T#u~}dK7Bj<;LJC%DAwSW8 z24XO8Qe=WzzbU3vGr55TloHK*FoPYR1n_Mvn;v+#9GxucG_GQ(LGKe>t`4LZU%OnD zM8wjBbCI^m@2Jkcy44$53|Lu&{Cap$>}R<SO<;bwSVd!g=y{lDPQQ(L zI2u}PyW0)L6TZI*^twL2eYm@^0f6&u5XeekI^<|RQuwa(;cnCmMoA6<(QG%^d4dN9 z6x@sfQj&v(!v-NDB7%IYsx^UU)Ba;a0ru>{4i0A4ruIzER+js^vvw;y=pKt@(#O?0 zIt1U5EJZ2hZCSC{{6ei#j4;ATj~>sIzt8njjVO#4mw;R}5EMZ-*Mx$RIi&Nm-J zspp=?fAs-5DUhb2e@%FuO)5%rO!B=$(l$}}YE}Q{2`Qcfk2o&F^w`M@>8lC^UOb6r za^+O#-1XhQup#N-CulB|Pl&hT>eaKDD0aUO#l)=9PSZ(sF0Xb<4qz)VDD-G*q=b>!=(W^OA7CaH{f5r6MjyQQkg=wrs!L z+Vk2-9ejO=Z@UFo%d0K;+)vL$d{|;hH+&( zs_C{hrpX+%&}1BJ!-^Q-JTPo{AFiF7qPpq*{W6v>;C1jprQmA??|X$Yu-E#R8Bo|e zmf^ZAJS^W`W4_xT!&HGNG+pRX@=5HZRK&!cnB9K0Fq_TfzH{iKTF)Z9Yl6OT!cEE1 zSRxfaTj(hFN3>M>VH&HTg07zJ_c~N{;xglncTq4YZID?+P`SRkUzQBG=Vj9|s}#=% zLSLTRrdq@>=X$}hy}`qnCCYX=-vl^jB#1F)vq}`OqmS84;Vg4R8nZMT`&#ChR_QJ+ zhn7}fr8&o0=GaZi{diIV;*sqRRq3n_i zNx*);~dtx*$jP6{0BBN)DWROIzXb{vNt?Wy(+M<@1+vcL($Y)Qe_x-bg z-N368&EpfFq1P;Ai*~K`zW%!o5_WxM+ZpgdPi>J8b{d^uHpcqPtp^5P@|M6@2IKb{ zUvu!fTcO~W`z4R%ud;1Dt+qW1({_X(E_FskXC-*ega)sLe9VW9RSnzWa^qWTbja!{ zb`*bfPWrC+Ig;L`w^gvPH4|n#Or@1{YttMga7f2H%>Bqt%Z4+v28+Fhv?o%er0AED z!uL)@=V%bKa#h)6*$<3YB4M&&8JKPs_Z~?Q&THB?#+~x7$!?$j-r7Tc!hclyIWj5s z8&Sm3dsY`xQL4&x*E*N)U1NC~2J=s{LVqWW%&pMBr+9x|s>pBuIW+d~+}_oL}<;E)o*Qinp|=-qL32;hqRxdPq$)PbZH&x2&huACDRNSMxD*?Nlb^VnNm)vgxL5^Yj->u!RoMCPbfruiFnO}ec`1s>U1<}SN^#A2MnEe|8i zpUSskb6INQMw4Z;3wQ5D4cN;9yK{a|RBK7F7X?J~r<$27nh|CH))?Qb3)?W8%@4Z? zX#mx2tWKtB{3e{7+~^!TwDT|gefT-#t5aL5t|H_5T!qE>u4gve4((}$qaFK5X{p{U zE1q4ZI8&^^)5&&hLgPgObZ?bJYxtv$lh)Gb1%^Ah%lvH@<}SVI7$drjiucd@d_S^G zc5D^28#=HZDYET>BQQGUVMBPYeB$X>te@(M_E=6?fSdfh7w2~3&d=D~0 zF`inb1#Hl^$cr8~ZDf=5)Ewf9GMv)3`8hFt00r z4T5k#x=3`7H(3@l#(+TJaQ+kKLWb`}JgphT@L6LHWLHX4TYZ7`>X17{y4o$b5R5c# z+5fYUS?%(=W#hGmLgp*lqU)bTZ%SjQKFNan?v9t&xs>r=%v)uoN0ccywOvC*y{>`- zRP#U4sz)(>89QWR^%!|ihkY!hX2!(Ho-0dvAJbPiw7w`*0p=)F6Pg?_ibhDV59u9lzwd~bcPydHX)5$6 zxsk*<#*~^&*UZR?4X`LPFmD*OoK?*1za$!5?mH_nce{SS%@4ey*ReFPs8J<6S8;5M z3wR5CdCgL0CG7sfXhLNlk$TukswI--BW_#T%inJofL7Q+V-#Ck$his4I9oD2|5qeAL7RC!M^U_26V=$KV{6<>|EMo zqf{dKU|Jp51y0KbP=?Di9XG66>~Eo6;tXJ0U^uKK= za+R_nZowX3IPj>M!#6QBi!H$l1edOxa=S-Z1}o4JQZDlDkQ`X?nBe*Oy8!6Lg{@q9 zHiAlz3$kR^25My0)S}mE1XITiEtRe69+~D1X4)5pagsG~uH_bYV}@_j7Niq&)-)U2 zeruHONZ|~OY~Ct&T@=l$e4x$Hq&L+mu+*0?hHqMKC=3&bMy%2LdVhBxV2{uuK)t&$ zRM`3{Sw&lIdo5ZglVr|Ag_B_(BAI^fv8Zq%^{!C=>l=rb&GNIj>&%Wl5!dRLZ+o#F zC#B2y1XB3jl@htg;+6lNwBIw^f)oImcXX`-fA>nuD*MoW9+iZ=!ME4`{cw-W_zG zbfn)RC~xr}=y;xNeo20CPiWIAx^q7-qmWl?a7~65oXO_To}*_~KhFZI6whkzc}z?6 zO8Xm`D%KULNw)$!_y#Q0>(?cPo_=#|d0#_))^N{5>k+m7UU$;O#zvlPAK^o*;%yiC142KU&?Lvu#&U83_Y12{Y zsKHlDjZF!+x0^YRXLugb13i;^=OdT6Z_pD5%N_>l@tKYgTKhMWq`;JN3hA4ubs31x1oqL8;$-+q4?c9h)C1W;MH(W>G z=w#mvVO`nRB?i==@dH|QRlhO8oNF(@-&+eb1ZSCO?4Is89GOIQ|T+xxOa?nJlkZMEvZO}5l_9B z9cdP7*(<+y&Ngr|-veuu*j?skx!T?s*QE6l-Sg$o(?(sC&A%*+)K7;vSCFb4TjQlE z+67b&exn};gk4wb??D>s57gb}G)?URkw z72f&ViQVjN*4rJ}?fD4Tl6ZsApIb0(QAh7wzmdEV`n|cY(_(PyP zi~5Euj&5?(QK+fwvyk48-yNIhn;$f!A}!7%Guk>OjBn*w^zo8wM$O_~FHuYdod%j+ zCEE0FW`t^D3)>je!rlvgeChKHY4`!}%ZdP?R-MRB`+fz55h1T?^C^1a#f+4$3L!!^N zhtVQ$o!2f&X5KmP!3vyo+#~3&AJ~#c-Yr}RFS%}AdTk-7Ql7bPWiCEx305cDq;lgW z{pISl#SKUw>2O5};4E1WTiZgt$IX*{b1}wWHtBp)xNNzuPMPpgyCc&I7k~l|`{lG- zxN1^%zkNQNNXJWgk@{rskl-?t`z#qa3~0Mq&0=aDwlONC_;ez5`*oe^ByXpVMV*a~ zzhk)AMOVZ1B*62TD&@Cpt_~Ki{cMPiah_~_6sX1KjHFFNrq3XeGvKr_UjYgFh51c7i&TAib5 zpbC-U>ZR40rdVa zv?q_?f5-hr1Hk{n`w!Y*LLcS*2My?m#P$EA{qLO*i-G=srS5K$q)kVil}A z$k6>SdHuU;**Ny|L~Vf=-dN?*34+><1Ch~!(rFqzDu}2{>EvwH4!sFlFP+KWCL3p0 z!Sa-$;J%k3m4T;GC@5#@*}P!%dYOD~G>)i-bM^gJB>p0^P5v3?3p?`W6JiP2G=yK_ zPZ`3)P~R=J-MV~pNJ2#s_`b4zBK&m+$O1~HBXk(NJ90KZ`3|<9d+ou zTEH)=b?QOUuqkrjx&KI-zo#_g1o^8%49@b}G_|*TL)>i;c<7y?vKbY!J`4!-61ZDb zl7o4I4Z;TfxmWbJLIVJ4O~ zh7REWeYpPJL5v*#r=y{UeKfQNTmG-w{Cfa_|BD<$V`H!-7>FmEe`@q6E2}R*j64Oh z;WMy)1N~6spIbU0=^r)M?!C#M(EGzVFg18?1FQpCh=V}T<)D9V3!K1*EB|Bqym+~U zR1Wm`5$N&XtB3z None: # each kernel is calculated and compared to the reference - for differences, _, reference in reference_finite_differences: - kernel = calc_forward_diff_kernel(differences=differences) + for ref_diff_kernel in reference_forward_finite_differences: + kernel = calc_forward_diff_kernel(differences=ref_diff_kernel.differences) - assert kernel.size == reference.size, ( - f"Difference order {differences} with accuracy 1 expected kernel size " - f"{reference.size} but got {kernel.size}" + assert kernel.size == ref_diff_kernel.size, ( + f"Difference order {ref_diff_kernel.differences} with accuracy 1 expected " + f"kernel size {ref_diff_kernel.size} but got {kernel.size}" ) - assert np.allclose(kernel, reference, atol=1e-8), ( - f"Difference order {differences} with accuracy 1 expected kernel " - f"{reference.tolist()} but got {kernel.tolist()}" + assert np.allclose(kernel, ref_diff_kernel.kernel, atol=1e-8), ( + f"Difference order {ref_diff_kernel.differences} with accuracy 1 expected " + f"kernel {ref_diff_kernel.kernel.tolist()} but got {kernel.tolist()}" ) @@ -142,3 +149,191 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( # NOTE: the following check has to be fairly strict when it comes to equivalence # since the NumPy and Chemotools are basically doing the same under the hood assert np.allclose(result, result_conv, atol=1e-10, rtol=1e-10) + + +@pytest.mark.parametrize( + "series, differences, accuracy, window_size, stddev_min", + [ + ( # Number 0 series is too small for difference kernel + np.arange(start=0, stop=5), + 10, + 2, + 3, + 1e-10, + ), + ( # Number 1 series is too small for difference kernel + np.arange(start=0, stop=5), + 10, + 2, + None, + 1e-10, + ), + ( # Number 2 series is too small for window size + np.arange(start=0, stop=5), + 1, + 2, + 11, + 1e-10, + ), + ( # Number 3 the difference order is 0 + np.arange(start=0, stop=10), + 0, + 2, + 3, + 1e-10, + ), + ( # Number 4 the difference order is negative + np.arange(start=0, stop=10), + -1, + 2, + 3, + 1e-10, + ), + ( # Number 5 the accuracy is odd + np.arange(start=0, stop=10), + 2, + 3, + 3, + 1e-10, + ), + ( # Number 6 the accuracy is odd + np.arange(start=0, stop=10), + 2, + 5, + 3, + 1e-10, + ), + ( # Number 7 the accuracy is 1 + np.arange(start=0, stop=10), + 2, + 1, + 3, + 1e-10, + ), + ( # Number 8 the accuracy is 0 + np.arange(start=0, stop=10), + 2, + 0, + 3, + 1e-10, + ), + ( # Number 9 the accuracy is negative + np.arange(start=0, stop=10), + 2, + -1, + 3, + 1e-10, + ), + ( # Number 10 the window size is even + np.arange(start=0, stop=10), + 1, + 2, + 6, + 1e-10, + ), + ( # Number 11 the window size is 0 + np.arange(start=0, stop=10), + 1, + 2, + 0, + 1e-10, + ), + ( # Number 12 the window size is negative + np.arange(start=0, stop=10), + 1, + 2, + -1, + 1e-10, + ), + ( # Number 13 the minimum standard deviation is zero + np.arange(start=0, stop=5), + 1, + 2, + 3, + 0.0, + ), + ( # Number 14 the minimum standard deviation is negative + np.arange(start=0, stop=5), + 1, + 2, + 3, + -10.0, + ), + ], +) +def test_estimate_noise_stddev_invalid_input( + series: np.ndarray, + differences: int, + accuracy: int, + window_size: Optional[int], + stddev_min: float, +) -> None: + """ + Tests the input validation of the function :func:`estimate_noise_stddev`. + + The combinations of + + - the series length, + - the difference order, + - the accuracy, + - the window size, and + - the minimum standard deviation + + are chosen such that the input is invalid. + + """ + + with pytest.raises(ValueError): + estimate_noise_stddev( + series=series, + differences=differences, + diff_accuracy=accuracy, + window_size=window_size, + stddev_min=stddev_min, + ) + + return + + +def test_noise_level_estimation( + noise_level_estimation_signal: np.ndarray, # noqa: F811 + noise_level_estimation_refs: List[NoiseEstimationReference], # noqa: F811 +) -> None: + """ + Tests the noise level estimation function :func:`estimate_noise_stddev`. + + The function is tested for all the reference noise levels. + + """ + + for ref in noise_level_estimation_refs: + # the noise level is estimated + noise_level = estimate_noise_stddev( + series=noise_level_estimation_signal, + differences=ref.differences, + diff_accuracy=ref.accuracy, + window_size=ref.window_size, + stddev_min=ref.min_noise_level, + ) + # then, the noise level itself is compared to the reference in a quite strict + # way because both results were computed in the same way with the only + # difference being that Chemotools uses Python and the reference uses + # LibreOffice Calc + assert np.allclose(noise_level, ref.noise_level, rtol=1e-12) + + # then, all the available powers to which the noise level can be raised are + # compared to the reference + for power, raised_noise_level_ref in ref.raised_noise_levels.items(): + raised_noise_level = estimate_noise_stddev( + series=noise_level_estimation_signal, + differences=ref.differences, + diff_accuracy=ref.accuracy, + window_size=ref.window_size, + stddev_min=ref.min_noise_level, + power=power, + ) + + # again, the comparison is quite strict + assert np.allclose(raised_noise_level, raised_noise_level_ref, atol=1e-12) + + return diff --git a/tests/test_for_utils/utils_models.py b/tests/test_for_utils/utils_models.py index 30b8a28b..1a4c85b1 100644 --- a/tests/test_for_utils/utils_models.py +++ b/tests/test_for_utils/utils_models.py @@ -6,8 +6,10 @@ ### Imports ### -from dataclasses import dataclass -from typing import Tuple +from dataclasses import dataclass, field +from typing import Dict, Literal, Optional, Tuple + +import numpy as np from chemotools.utils import _models from tests.test_for_utils.utils_funcs import float_is_bit_equal @@ -15,6 +17,44 @@ ### Dataclasses ### +@dataclass +class RefDifferenceKernel: + """ + Dataclass for storing the reference for the difference kernel validity check. + + """ + + differences: int + accuracy: int + kernel: np.ndarray + + size: int = field(init=False) + + def __post_init__(self) -> None: + self.size = self.kernel.size + + +@dataclass +class NoiseEstimationReference: + """ + Dataclass for storing the reference for the noise estimation validity check. + + """ + + window_size: Optional[int] + min_noise_level: float + differences: int + accuracy: int + noise_level: np.ndarray + + raised_noise_levels: Dict[Literal[-2, -1, 1, 2], np.ndarray] = field(init=False) + + def __post_init__(self) -> None: + self.raised_noise_levels = { + power: self.noise_level**power for power in (-2, -1, 1, 2) + } + + @dataclass class ExpectedWhittakerSmoothLambda: """ From 77b9f6e8a29f211036182a93a1648fd8db158f76 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 21 May 2024 00:48:07 +0200 Subject: [PATCH 104/118] test/refactor: included `power` failure into wrong input test of noise estimation; restructured reference finite difference kernels --- tests/fixtures.py | 6 ++- .../reference_forward_differences.csv} | 0 .../test_for_utils/test_finite_differences.py | 42 +++++++++++++++++-- 3 files changed, 43 insertions(+), 5 deletions(-) rename tests/resources/{reference_finite_differences.csv => finite_differences/reference_forward_differences.csv} (100%) diff --git a/tests/fixtures.py b/tests/fixtures.py index 29fe6ed3..0ee46462 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -134,8 +134,12 @@ def noise_level_whittaker_auto_lambda() -> np.ndarray: @pytest.fixture def reference_forward_finite_differences() -> List[RefDifferenceKernel]: + fpath = os.path.join( + path_to_resources, + "./finite_differences/reference_forward_differences.csv", + ) fin_diff_table = np.genfromtxt( - os.path.join(path_to_resources, "reference_finite_differences.csv"), + fpath, skip_header=2, delimiter=",", filling_values=np.nan, diff --git a/tests/resources/reference_finite_differences.csv b/tests/resources/finite_differences/reference_forward_differences.csv similarity index 100% rename from tests/resources/reference_finite_differences.csv rename to tests/resources/finite_differences/reference_forward_differences.csv diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index 06f8786b..c3428b57 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -152,13 +152,14 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( @pytest.mark.parametrize( - "series, differences, accuracy, window_size, stddev_min", + "series, differences, accuracy, window_size, power, stddev_min", [ ( # Number 0 series is too small for difference kernel np.arange(start=0, stop=5), 10, 2, 3, + 1, 1e-10, ), ( # Number 1 series is too small for difference kernel @@ -166,6 +167,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 10, 2, None, + 1, 1e-10, ), ( # Number 2 series is too small for window size @@ -173,6 +175,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 1, 2, 11, + 1, 1e-10, ), ( # Number 3 the difference order is 0 @@ -180,6 +183,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 0, 2, 3, + 1, 1e-10, ), ( # Number 4 the difference order is negative @@ -187,6 +191,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( -1, 2, 3, + 1, 1e-10, ), ( # Number 5 the accuracy is odd @@ -194,6 +199,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 2, 3, 3, + 1, 1e-10, ), ( # Number 6 the accuracy is odd @@ -201,6 +207,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 2, 5, 3, + 1, 1e-10, ), ( # Number 7 the accuracy is 1 @@ -208,6 +215,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 2, 1, 3, + 1, 1e-10, ), ( # Number 8 the accuracy is 0 @@ -215,6 +223,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 2, 0, 3, + 1, 1e-10, ), ( # Number 9 the accuracy is negative @@ -222,6 +231,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 2, -1, 3, + 1, 1e-10, ), ( # Number 10 the window size is even @@ -229,6 +239,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 1, 2, 6, + 1, 1e-10, ), ( # Number 11 the window size is 0 @@ -236,6 +247,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 1, 2, 0, + 1, 1e-10, ), ( # Number 12 the window size is negative @@ -243,20 +255,39 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( 1, 2, -1, + 1, + 1e-10, + ), + ( # Number 13 the power is -3 + np.arange(start=0, stop=10), + 1, + 2, + 3, + -3, + 1e-10, + ), + ( # Number 14 the power is 3 + np.arange(start=0, stop=10), + 1, + 2, + 3, + 3, 1e-10, ), - ( # Number 13 the minimum standard deviation is zero + ( # Number 15 the minimum standard deviation is zero np.arange(start=0, stop=5), 1, 2, 3, + 1, 0.0, ), - ( # Number 14 the minimum standard deviation is negative + ( # Number 16 the minimum standard deviation is negative np.arange(start=0, stop=5), 1, 2, 3, + 1, -10.0, ), ], @@ -266,6 +297,7 @@ def test_estimate_noise_stddev_invalid_input( differences: int, accuracy: int, window_size: Optional[int], + power: int, stddev_min: float, ) -> None: """ @@ -276,7 +308,8 @@ def test_estimate_noise_stddev_invalid_input( - the series length, - the difference order, - the accuracy, - - the window size, and + - the window size, + - the power to which the noise level is raised, and - the minimum standard deviation are chosen such that the input is invalid. @@ -289,6 +322,7 @@ def test_estimate_noise_stddev_invalid_input( differences=differences, diff_accuracy=accuracy, window_size=window_size, + power=power, # type: ignore stddev_min=stddev_min, ) From f859bab2b45f915d66eda333a58735e7b8fb7274 Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 21 May 2024 13:29:42 +0200 Subject: [PATCH 105/118] test/feat/refactor: added central finite differences test; made finite difference fixture smarter --- tests/fixtures.py | 4 +- .../reference_central_differences.csv | 22 ++++++ .../test_for_utils/test_finite_differences.py | 74 ++++++++++++++++--- 3 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 tests/resources/finite_differences/reference_central_differences.csv diff --git a/tests/fixtures.py b/tests/fixtures.py index 0ee46462..8007ba4c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -133,10 +133,10 @@ def noise_level_whittaker_auto_lambda() -> np.ndarray: @pytest.fixture -def reference_forward_finite_differences() -> List[RefDifferenceKernel]: +def reference_finite_differences(kind: str) -> List[RefDifferenceKernel]: fpath = os.path.join( path_to_resources, - "./finite_differences/reference_forward_differences.csv", + f"./finite_differences/reference_{kind}_differences.csv", ) fin_diff_table = np.genfromtxt( fpath, diff --git a/tests/resources/finite_differences/reference_central_differences.csv b/tests/resources/finite_differences/reference_central_differences.csv new file mode 100644 index 00000000..ca0e00a6 --- /dev/null +++ b/tests/resources/finite_differences/reference_central_differences.csv @@ -0,0 +1,22 @@ +From https://en.wikipedia.org/wiki/Finite_difference_coefficient#Central_finite_difference,,,,,,,,,,,, +Difference Order,Accuracy,,,,,,,,,,, +1,2,-0.5,0,0.5,,,,,,,, +1,4,0.0833333333333333,-0.666666666666667,0,0.666666666666667,-0.0833333333333333,,,,,, +1,6,-0.0166666666666667,0.15,-0.75,0,0.75,-0.15,0.0166666666666667,,,, +1,8,0.00357142857142857,-0.0380952380952381,0.2,-0.8,0,0.8,-0.2,0.0380952380952381,-0.00357142857142857,, +2,2,1,-2,1,,,,,,,, +2,4,-0.0833333333333333,1.33333333333333,-2.5,1.33333333333333,-0.0833333333333333,,,,,, +2,6,0.0111111111111111,-0.15,1.5,-2.72222222222222,1.5,-0.15,0.0111111111111111,,,, +2,8,-0.00178571428571429,0.0253968253968254,-0.2,1.6,-2.84722222222222,1.6,-0.2,0.0253968253968254,-0.00178571428571429,, +3,2,-0.5,1,0,-1,0.5,,,,,, +3,4,0.125,-1,1.625,0,-1.625,1,-0.125,,,, +3,6,-0.0291666666666667,0.3,-1.40833333333333,2.03333333333333,0,-2.03333333333333,1.40833333333333,-0.3,0.0291666666666667,, +4,2,1,-4,6,-4,1,,,,,, +4,4,-0.166666666666667,2,-6.5,9.33333333333333,-6.5,2,-0.166666666666667,,,, +4,6,0.0291666666666667,-0.4,2.81666666666667,-8.13333333333333,11.375,-8.13333333333333,2.81666666666667,-0.4,0.0291666666666667,, +5,2,-0.5,2,-2.5,0,2.5,-2,0.5,,,, +5,4,0.166666666666667,-1.5,4.33333333333333,-4.83333333333333,0,4.83333333333333,-4.33333333333333,1.5,-0.166666666666667,, +5,6,-0.0451388888888889,0.527777777777778,-2.71875,6.5,-6.72916666666667,0,6.72916666666667,-6.5,2.71875,-0.527777777777778,0.0451388888888889 +6,2,1,-6,15,-20,15,-6,1,,,, +6,4,-0.25,3,-13,29,-37.5,29,-13,3,-0.25,, +6,6,0.0541666666666667,-0.791666666666667,5.4375,-19.5,40.375,-51.15,40.375,-19.5,5.4375,-0.791666666666667,0.0541666666666667 diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index c3428b57..040a39d4 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -12,13 +12,14 @@ import pytest from chemotools.utils._finite_differences import ( + calc_central_diff_kernel, calc_forward_diff_kernel, estimate_noise_stddev, gen_squ_fw_fin_diff_mat_cho_banded, ) from tests.fixtures import noise_level_estimation_refs # noqa: F401 from tests.fixtures import noise_level_estimation_signal # noqa: F401 -from tests.fixtures import reference_forward_finite_differences # noqa: F401 +from tests.fixtures import reference_finite_differences # noqa: F401 from tests.test_for_utils.utils_funcs import ( conv_upper_cho_banded_storage_to_sparse, multiply_vect_with_squ_fw_fin_diff_orig_first, @@ -32,20 +33,61 @@ ### Test Suite ### +# parametrizes the fixture ``reference_finite_differences`` +@pytest.mark.parametrize("kind", ["forward"]) def test_forward_diff_kernel( - reference_forward_finite_differences: List[RefDifferenceKernel], # noqa: F811 + reference_finite_differences: List[RefDifferenceKernel], # noqa: F811 ) -> None: + """ + Tests the calculation of the forward finite difference kernel. + + """ + # each kernel is calculated and compared to the reference - for ref_diff_kernel in reference_forward_finite_differences: + for ref_diff_kernel in reference_finite_differences: kernel = calc_forward_diff_kernel(differences=ref_diff_kernel.differences) + # first, the size of the kernel is checked ... + assert kernel.size == ref_diff_kernel.size, ( + f"Difference order {ref_diff_kernel.differences} with accuracy 1 - " + f"Expected kernel size {ref_diff_kernel.size} but got {kernel.size}" + ) + # ... followed by the comparison of the kernel itself + assert np.allclose(kernel, ref_diff_kernel.kernel, atol=1e-8), ( + f"Difference order {ref_diff_kernel.differences} with accuracy 1 - " + f"Expected kernel {ref_diff_kernel.kernel.tolist()} but got " + f"{kernel.tolist()}" + ) + + +# parametrizes the fixture ``reference_finite_differences`` +@pytest.mark.parametrize("kind", ["central"]) +def test_central_diff_kernel( + reference_finite_differences: List[RefDifferenceKernel], # noqa: F811 +) -> None: + """ + Tests the calculation of the central finite difference kernel. + + """ + + # each kernel is calculated and compared to the reference + for ref_diff_kernel in reference_finite_differences: + kernel = calc_central_diff_kernel( + differences=ref_diff_kernel.differences, + accuracy=ref_diff_kernel.accuracy, + ) + + # first, the size of the kernel is checked ... assert kernel.size == ref_diff_kernel.size, ( - f"Difference order {ref_diff_kernel.differences} with accuracy 1 expected " - f"kernel size {ref_diff_kernel.size} but got {kernel.size}" + f"Difference order {ref_diff_kernel.differences} with accuracy " + f"{ref_diff_kernel.accuracy} - Expected kernel size {ref_diff_kernel.size} " + f"but got {kernel.size}" ) + # ... followed by the comparison of the kernel itself assert np.allclose(kernel, ref_diff_kernel.kernel, atol=1e-8), ( - f"Difference order {ref_diff_kernel.differences} with accuracy 1 expected " - f"kernel {ref_diff_kernel.kernel.tolist()} but got {kernel.tolist()}" + f"Difference order {ref_diff_kernel.differences} with accuracy " + f"{ref_diff_kernel.accuracy} - Expected kernel " + f"{ref_diff_kernel.kernel.tolist()} but got {kernel.tolist()}" ) @@ -322,7 +364,7 @@ def test_estimate_noise_stddev_invalid_input( differences=differences, diff_accuracy=accuracy, window_size=window_size, - power=power, # type: ignore + power=power, # type: ignore stddev_min=stddev_min, ) @@ -353,7 +395,12 @@ def test_noise_level_estimation( # way because both results were computed in the same way with the only # difference being that Chemotools uses Python and the reference uses # LibreOffice Calc - assert np.allclose(noise_level, ref.noise_level, rtol=1e-12) + assert np.allclose(noise_level, ref.noise_level, rtol=1e-12), ( + f"Original noise level differs from reference noise for differences " + f"{ref.differences} with accuracy {ref.accuracy} and window size " + f"{ref.window_size} given a minimum standard deviation of " + f"{ref.min_noise_level}." + ) # then, all the available powers to which the noise level can be raised are # compared to the reference @@ -368,6 +415,13 @@ def test_noise_level_estimation( ) # again, the comparison is quite strict - assert np.allclose(raised_noise_level, raised_noise_level_ref, atol=1e-12) + assert np.allclose( + raised_noise_level, raised_noise_level_ref, atol=1e-12 + ), ( + f"Raised noise level differs from reference noise for differences " + f"{ref.differences} with accuracy {ref.accuracy} and window size " + f"{ref.window_size} given a minimum standard deviation of " + f"{ref.min_noise_level} and a power of {power}." + ) return From 7f13f9bce343e4063da0fe5b2516ced1d8ae2f2f Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 21 May 2024 13:38:50 +0200 Subject: [PATCH 106/118] feat: exposed noise level estimation where it can be useful --- chemotools/smooth/__init__.py | 5 ++++- chemotools/utils/__init__.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/chemotools/smooth/__init__.py b/chemotools/smooth/__init__.py index ed6cdba2..7fb4133f 100644 --- a/chemotools/smooth/__init__.py +++ b/chemotools/smooth/__init__.py @@ -7,15 +7,18 @@ - :class:`SavitzkyGolayFilter` - :class:`WhittakerSmooth` -as well as auxiliary models to allow for convenient usage of the them: +as well as auxiliary models and functions to allow for convenient usage of the them: - :class:`WhittakerSmoothMethods` and :class:`WhittakerSmoothLambda` for the :class:`WhittakerSmooth` class. +- :func:`estimate_noise_stddev` to estimate the local/global noise level of a spectrum + which can then be used for weighting the data. """ ### Imports ### +from chemotools.utils._finite_differences import estimate_noise_stddev # noqa: F401 from chemotools.utils._models import ( # noqa: F401 WhittakerSmoothLambda, WhittakerSmoothMethods, diff --git a/chemotools/utils/__init__.py b/chemotools/utils/__init__.py index e69de29b..82c0d1ae 100644 --- a/chemotools/utils/__init__.py +++ b/chemotools/utils/__init__.py @@ -0,0 +1,14 @@ +""" +The utility module of ``chemotools`` that offers access to various utility functions +that can come in handy when working with chemical data. + +The module contains the following functions: + +- :func:`estimate_noise_stddev` to estimate the local/global noise level of a spectrum + which can then be used for weighting the data. + +""" + +### Imports ### + +from chemotools.utils._finite_differences import estimate_noise_stddev # noqa: F401 From b042e60b6c724012f83f48b8e64a8ba5fb67756c Mon Sep 17 00:00:00 2001 From: MothNik Date: Tue, 21 May 2024 17:54:34 +0200 Subject: [PATCH 107/118] refactor/docs: added kwargs for extrapolator; fixed wrong docstrings and comments; improved docstring --- chemotools/utils/_finite_differences.py | 48 ++++++++++++++++++++----- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py index c3a558b8..93a47195 100644 --- a/chemotools/utils/_finite_differences.py +++ b/chemotools/utils/_finite_differences.py @@ -12,7 +12,7 @@ from math import comb, factorial from numbers import Integral, Real -from typing import Any, Callable, Literal, Optional, Tuple, Union +from typing import Any, Callable, Dict, Literal, Optional, Tuple, Union import numpy as np from scipy.ndimage import median_filter @@ -380,6 +380,7 @@ def estimate_noise_stddev( window_size: Optional[int] = None, extrapolator: Callable[..., np.ndarray] = np.pad, extrapolator_args: Tuple[Any, ...] = ("reflect",), + extrapolator_kwargs: Optional[Dict[str, Any]] = None, power: Literal[-2, -1, 1, 2] = 1, stddev_min: Union[float, int] = 1e-10, ) -> np.ndarray: @@ -387,7 +388,7 @@ def estimate_noise_stddev( EXPERIMENTAL FEATURE Estimates the local/global noise standard deviation of a series even in the presence - of trends, like baselines and peaks, as well as outliers by using forward finite + of trends, like baselines and peaks, as well as outliers by using central finite differences. Please see the Notes section for further details. @@ -405,6 +406,8 @@ def estimate_noise_stddev( diff_accuracy : int, default=2 The accuracy of the finite difference approximation, which has to be an even integer ``>= 2``. + Higher values will enhance the effect of outliers that will corrupt the noise + estimation of their neighborhood. window_size : int or None, default=None The odd window size around a datapoint to estimate its local noise standard deviation. @@ -423,7 +426,12 @@ def estimate_noise_stddev( It has to be a callable with the following signature: ```python - series_extrap = extrapolator(series, pad_width, *extrapolator_args) + series_extrap = extrapolator( + series, + pad_width, + *extrapolator_args, + **extrapolator_kwargs, + ) ``` If ``window_size`` is ``None``, only the central finite differences kernel is @@ -432,8 +440,12 @@ def estimate_noise_stddev( side, but of course the quality of the noise estimation can be improved by using a more sophisticated extrapolation method. extrapolator_args : tuple, default=("reflect",) - Additional arguments that are passed to the extrapolator function as described - for ``extrapolator``. + Additional positional arguments that are passed to the extrapolator function as + described for ``extrapolator``. + extrapolator_kwargs : dict or None, default=None + Additional keyword arguments that are passed to the extrapolator function as + described for ``extrapolator``. + If ``None``, no additional keyword arguments are passed. power : {-2, -1, 1, 2}, default=1 The power to which the noise standard deviation is raised. This can be used to compute the: @@ -447,6 +459,8 @@ def estimate_noise_stddev( The minimum noise standard deviation that is allowed. Any estimated noise standard deviation below this value will be set to this value. + Borrowing an idea from image processing, the minimum noise standard deviation + can, e.g., be estimated from one or more feature-free regions of ``series``. It must be at least ``1e-15``. Returns @@ -483,7 +497,17 @@ def estimate_noise_stddev( applying a modified version of the Median Absolute Deviation (MAD) to the derivative/differences of the signal. By using a moving MAD filter, the local noise level can be estimated as well. - The algorithms does not work well for signals that are perfectly noise-free. + + From a workflow perspective, the following steps are performed on the signal: + + - The signal is extrapolated to avoid edge effects. + - The central finite differences are computed. + - Their absolute values are taken. + - The median (global) or median filter (local) is applied to these absolute + differences. With proper scaling, this will give an estimate of the noise level. + + There is one limitation, namely that the algorithm does not work well for signals + that are perfectly noise-free, but this is a rare case in practice. The kernel size for the central finite difference kernel is given by ``2 * floor((differences + 1) / 2) - 1 + diff_accuracy``. @@ -506,7 +530,7 @@ def estimate_noise_stddev( ) if window_size % 2 == 0: raise ValueError( - "Got window_size = {window_size}, expected an odd integer." + f"Got window_size = {window_size}, expected an odd integer." ) # power @@ -543,6 +567,13 @@ def estimate_noise_stddev( "size)." ) + ### Preparation ### + + # the keyword arguments for the extrapolator are set up + extrapolator_kwargs = ( + extrapolator_kwargs if extrapolator_kwargs is not None else dict() + ) + ### Noise Standard Deviation Estimation ### # the signal is extrapolated to avoid edge effects @@ -552,9 +583,10 @@ def estimate_noise_stddev( series, pad_width, *extrapolator_args, + **extrapolator_kwargs, ) - # the absolute forward finite differences are computed ... + # the absolute central finite differences are computed ... abs_diff_series = np.abs( np.convolve(series_extrap, np.flip(diff_kernel), mode="valid") ) From e6e8405933212950e973f4ac1706398906ed2cda Mon Sep 17 00:00:00 2001 From: MothNik Date: Thu, 23 May 2024 13:08:38 +0200 Subject: [PATCH 108/118] refactor: renamed `window_size` to `window_length` --- chemotools/utils/_finite_differences.py | 36 +++++++++---------- tests/fixtures.py | 6 ++-- .../test_for_utils/test_finite_differences.py | 14 ++++---- tests/test_for_utils/utils_models.py | 2 +- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py index 93a47195..066776e7 100644 --- a/chemotools/utils/_finite_differences.py +++ b/chemotools/utils/_finite_differences.py @@ -377,7 +377,7 @@ def estimate_noise_stddev( series: np.ndarray, differences: int = 6, diff_accuracy: int = 2, - window_size: Optional[int] = None, + window_length: Optional[int] = None, extrapolator: Callable[..., np.ndarray] = np.pad, extrapolator_args: Tuple[Any, ...] = ("reflect",), extrapolator_kwargs: Optional[Dict[str, Any]] = None, @@ -408,7 +408,7 @@ def estimate_noise_stddev( integer ``>= 2``. Higher values will enhance the effect of outliers that will corrupt the noise estimation of their neighborhood. - window_size : int or None, default=None + window_length : int or None, default=None The odd window size around a datapoint to estimate its local noise standard deviation. Higher values will lead to a smoother noise standard deviation estimate by @@ -420,7 +420,7 @@ def estimate_noise_stddev( extrapolator : callable, default=np.pad The extrapolator function that is used to pad the series before the finite differences and the median filter are applied. It will pad the signal with - ``pad_width = (diff_kernel_size // 2) + (window_size // 2)`` elements on each + ``pad_width = (diff_kernel_size // 2) + (window_length // 2)`` elements on each side where ``diff_kernel_size`` is the size of the central finite differences kernel (see the Notes for details). It has to be a callable with the following signature: @@ -434,7 +434,7 @@ def estimate_noise_stddev( ) ``` - If ``window_size`` is ``None``, only the central finite differences kernel is + If ``window_length`` is ``None``, only the central finite differences kernel is considered. By default, the signal is padded by reflecting ``series`` at the edges on either side, but of course the quality of the noise estimation can be improved by using @@ -479,7 +479,7 @@ def estimate_noise_stddev( ValueError If ``diff_accuracy`` is not an even integer ``>= 2``. ValueError - If ``window_size`` is below 1. + If ``window_length`` is below 1. References @@ -520,17 +520,17 @@ def estimate_noise_stddev( # NOTE: the difference order and accuracy are by the central finite differences # kernel function # window size - if window_size is not None: + if window_length is not None: check_scalar( - window_size, - name="window_size", + window_length, + name="window_length", target_type=Integral, min_val=1, include_boundaries="left", ) - if window_size % 2 == 0: + if window_length % 2 == 0: raise ValueError( - f"Got window_size = {window_size}, expected an odd integer." + f"Got window_length = {window_length}, expected an odd integer." ) # power @@ -560,10 +560,10 @@ def estimate_noise_stddev( f"size)." ) - if window_size is not None: - if series.size < window_size: + if window_length is not None: + if series.size < window_length: raise ValueError( - f"Got series.size = {series.size}, must be >= {window_size} (window " + f"Got series.size = {series.size}, must be >= {window_length} (window " "size)." ) @@ -578,7 +578,7 @@ def estimate_noise_stddev( # the signal is extrapolated to avoid edge effects pad_width = diff_kernel.size // 2 - pad_width += 0 if window_size is None else window_size // 2 + pad_width += 0 if window_length is None else window_length // 2 series_extrap = extrapolator( series, pad_width, @@ -595,7 +595,7 @@ def estimate_noise_stddev( # ... and the median filter is applied to theses differences prefactor = _MAD_PREFACTOR / np.linalg.norm(diff_kernel) # Case 1: the global noise standard deviation is estimated - if window_size is None: + if window_length is None: noise_stddev = np.full_like( series, fill_value=prefactor * np.median(abs_diff_series), @@ -603,14 +603,14 @@ def estimate_noise_stddev( # Case 2: the local noise standard deviation is estimated else: - half_window_size = window_size // 2 + half_window_length = window_length // 2 noise_stddev = ( prefactor * median_filter( abs_diff_series, - size=window_size, + size=window_length, mode="constant", - )[half_window_size : size_after_diff - half_window_size] + )[half_window_length : size_after_diff - half_window_length] ) # the minimum-bounded noise standard deviation is raised to the power diff --git a/tests/fixtures.py b/tests/fixtures.py index 8007ba4c..81597c92 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -209,11 +209,11 @@ def noise_level_estimation_refs() -> List[NoiseEstimationReference]: row = data[row_idx, ::] # if the window size is 0, it is set to None because this indicates that the # global noise level is to be estimated rather than a local one - window_size = int(row[0]) - window_size = window_size if window_size > 0 else None + window_length = int(row[0]) + window_length = window_length if window_length > 0 else None noise_level_refs.append( NoiseEstimationReference( - window_size=window_size, + window_length=window_length, min_noise_level=row[1], differences=round(row[2]), accuracy=round(row[3]), diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index 040a39d4..424ba139 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -194,7 +194,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( @pytest.mark.parametrize( - "series, differences, accuracy, window_size, power, stddev_min", + "series, differences, accuracy, window_length, power, stddev_min", [ ( # Number 0 series is too small for difference kernel np.arange(start=0, stop=5), @@ -338,7 +338,7 @@ def test_estimate_noise_stddev_invalid_input( series: np.ndarray, differences: int, accuracy: int, - window_size: Optional[int], + window_length: Optional[int], power: int, stddev_min: float, ) -> None: @@ -363,7 +363,7 @@ def test_estimate_noise_stddev_invalid_input( series=series, differences=differences, diff_accuracy=accuracy, - window_size=window_size, + window_length=window_length, power=power, # type: ignore stddev_min=stddev_min, ) @@ -388,7 +388,7 @@ def test_noise_level_estimation( series=noise_level_estimation_signal, differences=ref.differences, diff_accuracy=ref.accuracy, - window_size=ref.window_size, + window_length=ref.window_length, stddev_min=ref.min_noise_level, ) # then, the noise level itself is compared to the reference in a quite strict @@ -398,7 +398,7 @@ def test_noise_level_estimation( assert np.allclose(noise_level, ref.noise_level, rtol=1e-12), ( f"Original noise level differs from reference noise for differences " f"{ref.differences} with accuracy {ref.accuracy} and window size " - f"{ref.window_size} given a minimum standard deviation of " + f"{ref.window_length} given a minimum standard deviation of " f"{ref.min_noise_level}." ) @@ -409,7 +409,7 @@ def test_noise_level_estimation( series=noise_level_estimation_signal, differences=ref.differences, diff_accuracy=ref.accuracy, - window_size=ref.window_size, + window_length=ref.window_length, stddev_min=ref.min_noise_level, power=power, ) @@ -420,7 +420,7 @@ def test_noise_level_estimation( ), ( f"Raised noise level differs from reference noise for differences " f"{ref.differences} with accuracy {ref.accuracy} and window size " - f"{ref.window_size} given a minimum standard deviation of " + f"{ref.window_length} given a minimum standard deviation of " f"{ref.min_noise_level} and a power of {power}." ) diff --git a/tests/test_for_utils/utils_models.py b/tests/test_for_utils/utils_models.py index 1a4c85b1..cc51fccc 100644 --- a/tests/test_for_utils/utils_models.py +++ b/tests/test_for_utils/utils_models.py @@ -41,7 +41,7 @@ class NoiseEstimationReference: """ - window_size: Optional[int] + window_length: Optional[int] min_noise_level: float differences: int accuracy: int From c60c8e356840a3b5ee31fc7ee2291080713f4b17 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 24 May 2024 13:54:43 +0200 Subject: [PATCH 109/118] doc: added proper Notes for weight selection strategies of `WhittakerSmooth` --- chemotools/smooth/_whittaker_smooth.py | 46 ++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 71b38163..007c5a94 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -256,13 +256,37 @@ def transform( sample_weight : ndarray of shape (n_features,), (n_samples, n_features), (1, n_features), or None, default=None Individual weights for each of the input data. If only 1 weight vector is provided, it is assumed to be the same for the features all samples. + No weights may be negative (< 0.0) and at least one weight needs to be + positive (> 0.0). + Providing them is mandatory when the optimum penalty weight ``lam`` is to be + determined automatically via the log marginal likelihood (``"logml"``) + method. If ``None``, all features are assumed to have the same weight. + Please refer to the Notes section for further details on selecting the + weights. Returns ------- X_smoothed : ndarray of shape (n_samples, n_features) The transformed data. + Notes + ----- + If estimates of the standard deviations ``s_i`` of each data point are + available, e.g., from theoretical considerations or repeated measurements, it is + recommended to use the inverse of the squared standard deviations as weights, + i.e., ``w_i = 1 / (s_i * s_i)``. This is a very effective way to down-weight + noisy data points and thus reduce the risk of noise-induced artifacts in the + smoothed signal. On the other hand, features measured with high confidence will + remain well-preserved even under strong smoothing. + Sometimes, it is infeasible to provide standard deviations because theoretical + considerations are not appropriate and replicate measurements are not available/ + feasible. In such scenarios, the weights can still be estimated by making use of + the function :func:`chemotools.smooth.estimate_noise_stddev` with a `power=-2`. + It relies on the parameter ``window_length`` to estimate the local/global noise + standard deviation of the spectrum, but please refer to the documentation of the + function for further details. + """ # noqa: E501 # Check that the estimator is fitted @@ -313,13 +337,35 @@ def fit_transform( provided, it is assumed to be the same for the features all samples. No weights may be negative (< 0.0) and at least one weight needs to be positive (> 0.0). + Providing them is mandatory when the optimum penalty weight ``lam`` is to be + determined automatically via the log marginal likelihood (``"logml"``) + method. If ``None``, all features are assumed to have the same weight. + Please refer to the Notes section for further details on selecting the + weights. Returns ------- X_smoothed : ndarray of shape (n_samples, n_features) The transformed data. + Notes + ----- + If estimates of the standard deviations ``s_i`` of each data point are + available, e.g., from theoretical considerations or repeated measurements, it is + recommended to use the inverse of the squared standard deviations as weights, + i.e., ``w_i = 1 / (s_i * s_i)``. This is a very effective way to down-weight + noisy data points and thus reduce the risk of noise-induced artifacts in the + smoothed signal. On the other hand, features measured with high confidence will + remain well-preserved even under strong smoothing. + Sometimes, it is infeasible to provide standard deviations because theoretical + considerations are not appropriate and replicate measurements are not available/ + feasible. In such scenarios, the weights can still be estimated by making use of + the function :func:`chemotools.smooth.estimate_noise_stddev` with a `power=-2`. + It relies on the parameter ``window_length`` to estimate the local/global noise + standard deviation of the spectrum, but please refer to the documentation of the + function for further details. + """ # noqa: E501 return self.fit(X=X).transform(X=X, sample_weight=sample_weight) From 4a1be7e86b9e5061942c483eb0ed638ef01257d9 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 24 May 2024 15:00:14 +0200 Subject: [PATCH 110/118] refactor: added `psutil`-install for `pytest-xdist` --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 7d04e5dd..85a1b2d2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,4 +5,4 @@ matplotlib pentapy pytest pytest-cov -pytest-xdist \ No newline at end of file +pytest-xdist[psutil] \ No newline at end of file From 109be51168ac782b506995a445235bcdced1c9b2 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 24 May 2024 17:57:29 +0200 Subject: [PATCH 111/118] Revert "refactor: renamed `window_size` to `window_length`" This reverts commit e6e8405933212950e973f4ac1706398906ed2cda. --- chemotools/utils/_finite_differences.py | 36 +++++++++---------- tests/fixtures.py | 6 ++-- .../test_for_utils/test_finite_differences.py | 14 ++++---- tests/test_for_utils/utils_models.py | 2 +- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py index 066776e7..93a47195 100644 --- a/chemotools/utils/_finite_differences.py +++ b/chemotools/utils/_finite_differences.py @@ -377,7 +377,7 @@ def estimate_noise_stddev( series: np.ndarray, differences: int = 6, diff_accuracy: int = 2, - window_length: Optional[int] = None, + window_size: Optional[int] = None, extrapolator: Callable[..., np.ndarray] = np.pad, extrapolator_args: Tuple[Any, ...] = ("reflect",), extrapolator_kwargs: Optional[Dict[str, Any]] = None, @@ -408,7 +408,7 @@ def estimate_noise_stddev( integer ``>= 2``. Higher values will enhance the effect of outliers that will corrupt the noise estimation of their neighborhood. - window_length : int or None, default=None + window_size : int or None, default=None The odd window size around a datapoint to estimate its local noise standard deviation. Higher values will lead to a smoother noise standard deviation estimate by @@ -420,7 +420,7 @@ def estimate_noise_stddev( extrapolator : callable, default=np.pad The extrapolator function that is used to pad the series before the finite differences and the median filter are applied. It will pad the signal with - ``pad_width = (diff_kernel_size // 2) + (window_length // 2)`` elements on each + ``pad_width = (diff_kernel_size // 2) + (window_size // 2)`` elements on each side where ``diff_kernel_size`` is the size of the central finite differences kernel (see the Notes for details). It has to be a callable with the following signature: @@ -434,7 +434,7 @@ def estimate_noise_stddev( ) ``` - If ``window_length`` is ``None``, only the central finite differences kernel is + If ``window_size`` is ``None``, only the central finite differences kernel is considered. By default, the signal is padded by reflecting ``series`` at the edges on either side, but of course the quality of the noise estimation can be improved by using @@ -479,7 +479,7 @@ def estimate_noise_stddev( ValueError If ``diff_accuracy`` is not an even integer ``>= 2``. ValueError - If ``window_length`` is below 1. + If ``window_size`` is below 1. References @@ -520,17 +520,17 @@ def estimate_noise_stddev( # NOTE: the difference order and accuracy are by the central finite differences # kernel function # window size - if window_length is not None: + if window_size is not None: check_scalar( - window_length, - name="window_length", + window_size, + name="window_size", target_type=Integral, min_val=1, include_boundaries="left", ) - if window_length % 2 == 0: + if window_size % 2 == 0: raise ValueError( - f"Got window_length = {window_length}, expected an odd integer." + f"Got window_size = {window_size}, expected an odd integer." ) # power @@ -560,10 +560,10 @@ def estimate_noise_stddev( f"size)." ) - if window_length is not None: - if series.size < window_length: + if window_size is not None: + if series.size < window_size: raise ValueError( - f"Got series.size = {series.size}, must be >= {window_length} (window " + f"Got series.size = {series.size}, must be >= {window_size} (window " "size)." ) @@ -578,7 +578,7 @@ def estimate_noise_stddev( # the signal is extrapolated to avoid edge effects pad_width = diff_kernel.size // 2 - pad_width += 0 if window_length is None else window_length // 2 + pad_width += 0 if window_size is None else window_size // 2 series_extrap = extrapolator( series, pad_width, @@ -595,7 +595,7 @@ def estimate_noise_stddev( # ... and the median filter is applied to theses differences prefactor = _MAD_PREFACTOR / np.linalg.norm(diff_kernel) # Case 1: the global noise standard deviation is estimated - if window_length is None: + if window_size is None: noise_stddev = np.full_like( series, fill_value=prefactor * np.median(abs_diff_series), @@ -603,14 +603,14 @@ def estimate_noise_stddev( # Case 2: the local noise standard deviation is estimated else: - half_window_length = window_length // 2 + half_window_size = window_size // 2 noise_stddev = ( prefactor * median_filter( abs_diff_series, - size=window_length, + size=window_size, mode="constant", - )[half_window_length : size_after_diff - half_window_length] + )[half_window_size : size_after_diff - half_window_size] ) # the minimum-bounded noise standard deviation is raised to the power diff --git a/tests/fixtures.py b/tests/fixtures.py index 81597c92..8007ba4c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -209,11 +209,11 @@ def noise_level_estimation_refs() -> List[NoiseEstimationReference]: row = data[row_idx, ::] # if the window size is 0, it is set to None because this indicates that the # global noise level is to be estimated rather than a local one - window_length = int(row[0]) - window_length = window_length if window_length > 0 else None + window_size = int(row[0]) + window_size = window_size if window_size > 0 else None noise_level_refs.append( NoiseEstimationReference( - window_length=window_length, + window_size=window_size, min_noise_level=row[1], differences=round(row[2]), accuracy=round(row[3]), diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index 424ba139..040a39d4 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -194,7 +194,7 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( @pytest.mark.parametrize( - "series, differences, accuracy, window_length, power, stddev_min", + "series, differences, accuracy, window_size, power, stddev_min", [ ( # Number 0 series is too small for difference kernel np.arange(start=0, stop=5), @@ -338,7 +338,7 @@ def test_estimate_noise_stddev_invalid_input( series: np.ndarray, differences: int, accuracy: int, - window_length: Optional[int], + window_size: Optional[int], power: int, stddev_min: float, ) -> None: @@ -363,7 +363,7 @@ def test_estimate_noise_stddev_invalid_input( series=series, differences=differences, diff_accuracy=accuracy, - window_length=window_length, + window_size=window_size, power=power, # type: ignore stddev_min=stddev_min, ) @@ -388,7 +388,7 @@ def test_noise_level_estimation( series=noise_level_estimation_signal, differences=ref.differences, diff_accuracy=ref.accuracy, - window_length=ref.window_length, + window_size=ref.window_size, stddev_min=ref.min_noise_level, ) # then, the noise level itself is compared to the reference in a quite strict @@ -398,7 +398,7 @@ def test_noise_level_estimation( assert np.allclose(noise_level, ref.noise_level, rtol=1e-12), ( f"Original noise level differs from reference noise for differences " f"{ref.differences} with accuracy {ref.accuracy} and window size " - f"{ref.window_length} given a minimum standard deviation of " + f"{ref.window_size} given a minimum standard deviation of " f"{ref.min_noise_level}." ) @@ -409,7 +409,7 @@ def test_noise_level_estimation( series=noise_level_estimation_signal, differences=ref.differences, diff_accuracy=ref.accuracy, - window_length=ref.window_length, + window_size=ref.window_size, stddev_min=ref.min_noise_level, power=power, ) @@ -420,7 +420,7 @@ def test_noise_level_estimation( ), ( f"Raised noise level differs from reference noise for differences " f"{ref.differences} with accuracy {ref.accuracy} and window size " - f"{ref.window_length} given a minimum standard deviation of " + f"{ref.window_size} given a minimum standard deviation of " f"{ref.min_noise_level} and a power of {power}." ) diff --git a/tests/test_for_utils/utils_models.py b/tests/test_for_utils/utils_models.py index cc51fccc..1a4c85b1 100644 --- a/tests/test_for_utils/utils_models.py +++ b/tests/test_for_utils/utils_models.py @@ -41,7 +41,7 @@ class NoiseEstimationReference: """ - window_length: Optional[int] + window_size: Optional[int] min_noise_level: float differences: int accuracy: int From 124cf503ecf3bf348c74f812081ef93f53dcc713 Mon Sep 17 00:00:00 2001 From: MothNik Date: Fri, 24 May 2024 18:00:50 +0200 Subject: [PATCH 112/118] refactor: renamed `window_length` in docstring to `window_size` --- chemotools/smooth/_whittaker_smooth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 007c5a94..0d565991 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -283,7 +283,7 @@ def transform( considerations are not appropriate and replicate measurements are not available/ feasible. In such scenarios, the weights can still be estimated by making use of the function :func:`chemotools.smooth.estimate_noise_stddev` with a `power=-2`. - It relies on the parameter ``window_length`` to estimate the local/global noise + It relies on the parameter ``window_size`` to estimate the local/global noise standard deviation of the spectrum, but please refer to the documentation of the function for further details. @@ -362,7 +362,7 @@ def fit_transform( considerations are not appropriate and replicate measurements are not available/ feasible. In such scenarios, the weights can still be estimated by making use of the function :func:`chemotools.smooth.estimate_noise_stddev` with a `power=-2`. - It relies on the parameter ``window_length`` to estimate the local/global noise + It relies on the parameter ``window_size`` to estimate the local/global noise standard deviation of the spectrum, but please refer to the documentation of the function for further details. From 5697fd99b17997fa361d3fd4ef44407f1e5e8537 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 24 Jun 2024 20:58:47 +0200 Subject: [PATCH 113/118] style: [44] - renamed all variables in `utils.banded_linalg` and `utils.finite_differences` and the respective tests to be more concise --- chemotools/utils/_banded_linalg.py | 70 ++--- chemotools/utils/_finite_differences.py | 187 +++++++----- chemotools/utils/_models.py | 30 +- .../utils/_whittaker_base/initialisation.py | 14 +- tests/fixtures.py | 2 +- tests/test_for_utils/test_banded_linalg.py | 116 ++++--- .../test_for_utils/test_finite_differences.py | 190 +++++++----- tests/test_for_utils/test_whittaker_base.py | 2 +- tests/test_for_utils/utils_funcs.py | 288 ++++++++++-------- 9 files changed, 517 insertions(+), 382 deletions(-) diff --git a/chemotools/utils/_banded_linalg.py b/chemotools/utils/_banded_linalg.py index 7ceb2e6d..5479424b 100644 --- a/chemotools/utils/_banded_linalg.py +++ b/chemotools/utils/_banded_linalg.py @@ -46,7 +46,7 @@ def _datacopied(arr, original): return arr.base is None -def conv_upper_chol_banded_to_lu_banded_storage( +def convert_upper_chol_banded_to_lu_banded_storage( ab: np.ndarray, ) -> tuple[LAndUBandCounts, np.ndarray]: """ @@ -117,20 +117,20 @@ def conv_upper_chol_banded_to_lu_banded_storage( """ # an Array is initialised to store the subdiagonal part - num_low_diags = ab.shape[0] - 1 - main_diag_idx = num_low_diags - n_cols = ab.shape[1] - ab_subdiags = np.zeros(shape=(num_low_diags, n_cols), dtype=ab.dtype) - - for offset in range(1, num_low_diags + 1): - ab_subdiags[offset - 1, 0 : n_cols - offset] = ab[ - main_diag_idx - offset, offset:None + num_subdiagonals = ab.shape[0] - 1 + main_diagonal_index = num_subdiagonals + num_columns = ab.shape[1] + ab_subdiagonals = np.zeros(shape=(num_subdiagonals, num_columns), dtype=ab.dtype) + + for offset in range(1, num_subdiagonals + 1): + ab_subdiagonals[offset - 1, 0 : num_columns - offset] = ab[ + main_diagonal_index - offset, offset:None ] # the subdiagonal part is then concatenated to the original array and the result is # returned - l_and_u = (num_low_diags, num_low_diags) - return l_and_u, np.row_stack((ab, ab_subdiags)) + l_and_u = (num_subdiagonals, num_subdiagonals) + return l_and_u, np.row_stack((ab, ab_subdiagonals)) ### LAPACK-Wrappers for banded LU decomposition ### @@ -219,12 +219,12 @@ def lu_banded( # then, the number of lower and upper subdiagonals needs to be checked for being # consistent with the shape of ``ab`` - num_low_diags, num_upp_diags = l_and_u - if num_low_diags + num_upp_diags + 1 != ab.shape[0]: # pragma: no cover + num_subdiagonals, num_superdiagonals = l_and_u + if num_subdiagonals + num_superdiagonals + 1 != ab.shape[0]: # pragma: no cover raise ValueError( - f"\nInvalid values for the number of lower and upper " - f"diagonals: l+u+1 ({num_low_diags + num_upp_diags + 1}) does not equal " - f"ab.shape[0] ({ab.shape[0]})." + f"\nInvalid values for the number of sub- and super " + f"diagonals: l+u+1 ({num_subdiagonals + num_superdiagonals + 1}) does not " + f"equal ab.shape[0] ({ab.shape[0]})." ) # now, the LAPACK-routines can be called @@ -235,12 +235,12 @@ def lu_banded( (gbtrf,) = lapack.get_lapack_funcs((lapack_routine,), (ab,)) lpkc_ab = np.row_stack( ( - np.zeros((num_low_diags, ab.shape[1]), dtype=ab.dtype), + np.zeros((num_subdiagonals, ab.shape[1]), dtype=ab.dtype), ab, ) ) lub, ipiv, info = gbtrf( - ab=lpkc_ab, kl=num_low_diags, ku=num_upp_diags, overwrite_ab=True + ab=lpkc_ab, kl=num_subdiagonals, ku=num_superdiagonals, overwrite_ab=True ) # then, the results needs to be validated and returned @@ -308,29 +308,29 @@ def lu_solve_banded( if check_finite: lub_factorization.lub = np.asarray_chkfinite(lub_factorization.lub) lub_factorization.ipiv = np.asarray_chkfinite(lub_factorization.ipiv) - b_inter = np.asarray_chkfinite(b) + b_internal = np.asarray_chkfinite(b) else: lub_factorization.lub = np.asarray(lub_factorization.lub) lub_factorization.ipiv = np.asarray(lub_factorization.ipiv) - b_inter = np.asarray(b) + b_internal = np.asarray(b) - overwrite_b = overwrite_b or _datacopied(b_inter, b) + overwrite_b = overwrite_b or _datacopied(b_internal, b) # then, the shapes of the LU decomposition and ``b`` need to be validated against # each other - if lub_factorization.n_cols != b_inter.shape[0]: # pragma: no cover + if lub_factorization.num_cols != b_internal.shape[0]: # pragma: no cover raise ValueError( - f"\nShapes of lub ({lub_factorization.n_cols}) and b " - f"({b_inter.shape[0]}) are not compatible." + f"\nShapes of lub ({lub_factorization.num_cols}) and b " + f"({b_internal.shape[0]}) are not compatible." ) # now, the LAPACK-routine is called - (gbtrs,) = lapack.get_lapack_funcs(("gbtrs",), (lub_factorization.lub, b)) + (gbtrs,) = lapack.get_lapack_funcs(("gbtrs",), (lub_factorization.lub, b_internal)) x, info = gbtrs( ab=lub_factorization.lub, kl=lub_factorization.l_and_u[0], ku=lub_factorization.l_and_u[1], - b=b, + b=b_internal, ipiv=lub_factorization.ipiv, overwrite_b=overwrite_b, ) @@ -338,7 +338,7 @@ def lu_solve_banded( # then, the results needs to be validated and returned # Case 1: the solution could be computed truly successfully, i.e., without any # NaN-values - if info == 0 and not np.any(np.isnan(x)): + if info == 0 and not np.isnan(x).any(): return x # Case 2: the solution was computed, but there were NaN-values in it @@ -389,13 +389,15 @@ def slogdet_lu_banded( """ # first, the number of actual row exchanges needs to be counted - unchanged_row_idxs = np.arange( + unchanged_row_indices = np.arange( start=0, - stop=lub_factorization.n_cols, + stop=lub_factorization.num_cols, step=1, dtype=lub_factorization.ipiv.dtype, ) - num_row_exchanges = np.count_nonzero(lub_factorization.ipiv != unchanged_row_idxs) + num_row_exchanges = np.count_nonzero( + lub_factorization.ipiv != unchanged_row_indices + ) # the sign-prefactor of the determinant is either +1 or -1 depending on whether the # number of row exchanges is even or odd @@ -404,10 +406,10 @@ def slogdet_lu_banded( # since the determinant (without sign prefactor) is just the product of the diagonal # product of L and the diagonal product of U, the calculation simplifies. As the # main diagonal of L is a vector of ones, only the diagonal product of U is required - main_diag = lub_factorization.lub[lub_factorization.main_diag_row_idx, ::] - u_diag_sign_is_pos = np.count_nonzero(main_diag < 0.0) % 2 == 0 + main_diagonal = lub_factorization.lub[lub_factorization.main_diagonal_row_index, ::] + u_diagonal_sign_is_positive = np.count_nonzero(main_diagonal < 0.0) % 2 == 0 with np.errstate(divide="ignore", over="ignore"): - logabsdet = np.log(np.abs(main_diag)).sum() + logabsdet = np.log(np.abs(main_diagonal)).sum() # logarithms of zero are already properly handled, so there is not reason to worry # about, since they are -inf which will result in a zero determinant in exp(); @@ -425,7 +427,7 @@ def slogdet_lu_banded( if np.isneginf(logabsdet): # pragma: no cover return 0.0, logabsdet - if u_diag_sign_is_pos: + if u_diagonal_sign_is_positive: return sign, logabsdet return -sign, logabsdet diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py index 93a47195..662dda78 100644 --- a/chemotools/utils/_finite_differences.py +++ b/chemotools/utils/_finite_differences.py @@ -25,7 +25,7 @@ ### Functions ### -def calc_forward_diff_kernel( +def forward_finite_difference_kernel( *, differences: int, ) -> np.ndarray: @@ -68,17 +68,22 @@ def calc_forward_diff_kernel( include_boundaries="left", ) - # afterwards, the kernel is computed using the binomial coefficients + # afterwards, the kernel is computed using the binomial coefficients with + # alternating signs return np.array( [ - ((-1) ** iter_i) * comb(differences, iter_i) + (-1 if iter_i % 2 == 1 else 1) * comb(differences, iter_i) for iter_i in range(differences, -1, -1) ], dtype=np.int64, ) -def calc_central_diff_kernel(*, differences: int, accuracy: int = 2) -> np.ndarray: +def central_finite_difference_coefficients( + *, + differences: int, + accuracy: int = 2, +) -> np.ndarray: """ Computes the kernel for central finite differences which can be applied to a series by means of a convolution, e.g., @@ -153,23 +158,31 @@ def calc_central_diff_kernel(*, differences: int, accuracy: int = 2) -> np.ndarr half_kernel_size = kernel_size // 2 # then, the linear system to solve for the coefficients is set up - grid_vect = np.arange( + grid_point_vect = np.arange( start=-half_kernel_size, stop=half_kernel_size + 1, step=1, dtype=np.int64, ) - lhs_mat = np.vander(grid_vect, N=kernel_size, increasing=True).transpose() + # NOTE: lhs is "left-hand side" and rhs is "right-hand side" + lhs_matrix = np.vander( + grid_point_vect, + N=kernel_size, + increasing=True, + ) rhs_vect = np.zeros(shape=(kernel_size,), dtype=np.int64) rhs_vect[differences] = factorial(differences) # the coefficients are computed by solving the linear system - return np.linalg.solve(lhs_mat, rhs_vect) + return np.linalg.solve( + lhs_matrix.transpose(), + rhs_vect, + ) -def _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( +def _squared_forward_difference_matrix_banded_transpose_first( *, - n_data: int, + num_data: int, differences: int, ) -> np.ndarray: """ @@ -184,31 +197,40 @@ def _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( # the problems has to be split into a leading, a central, and a trailing part # first, the leading part is computed because it might be that this is already # enough - # first, the kernel for the forward differences is computed and the bandwidth is + # for this, the kernel for the forward differences is computed and the bandwidth is # determined - kernel = calc_forward_diff_kernel(differences=differences) - n_bands = 1 + 2 * differences - lead_n_rows = min(kernel.size, n_data - kernel.size + 1) - lead_n_cols = kernel.size + lead_n_rows - 1 + kernel = forward_finite_difference_kernel(differences=differences) + num_diagonals = 1 + 2 * differences + leading_num_rows = min(kernel.size, num_data - kernel.size + 1) + leading_num_cols = kernel.size + leading_num_rows - 1 # the leading matrix is computed as a dense matrix - dtd = np.zeros(shape=(lead_n_rows, lead_n_cols), dtype=np.int64) - for row_idx in range(0, lead_n_rows): - dtd[row_idx, row_idx : row_idx + kernel.size] = kernel + leading_dt_dot_d_dense = np.zeros( + shape=(leading_num_rows, leading_num_cols), + dtype=np.int64, + ) + for row_index in range(0, leading_num_rows): + leading_dt_dot_d_dense[row_index, row_index : row_index + kernel.size] = kernel # its squared form is computed - dtd = dtd.T @ dtd + leading_dt_dot_d_dense = leading_dt_dot_d_dense.T @ leading_dt_dot_d_dense # now, the leading matrix is converted to a banded matrix - dtd_banded = np.zeros(shape=(differences + 1, lead_n_cols), dtype=np.int64) - for diag_idx in range(0, differences + 1): - offset = differences - diag_idx - dtd_banded[diag_idx, offset:None] = np.diag(dtd, k=offset) + leading_dt_dot_d_banded = np.zeros( + shape=(differences + 1, leading_num_cols), + dtype=np.int64, + ) + for diagonal_index in range(0, differences + 1): + offset = differences - diagonal_index + leading_dt_dot_d_banded[diagonal_index, offset:None] = np.diag( + leading_dt_dot_d_dense, + k=offset, + ) # if the number of data points is less than the kernel size minus one, the # leading matrix is already the final matrix - if n_data <= n_bands: - return dtd_banded + if num_data <= num_diagonals: + return leading_dt_dot_d_banded # otherwise, a central part has to be inserted # this turns out to be just a column-wise repetition of the kernel computed with @@ -216,21 +238,23 @@ def _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( # computed leading D.T @ D matrix # NOTE: the doubled kernel is the most central column of the banded D.T @ D already # computed - central_n_cols = n_data - dtd_banded.shape[1] - kernel_double = dtd_banded[::, kernel.size - 1].reshape((-1, 1)) + central_n_cols = num_data - leading_dt_dot_d_banded.shape[1] + kernel_double_differences = leading_dt_dot_d_banded[::, kernel.size - 1].reshape( + (-1, 1) + ) return np.concatenate( ( - dtd_banded[::, 0 : kernel.size], - np.tile(kernel_double, (1, central_n_cols)), - dtd_banded[::, kernel.size :], + leading_dt_dot_d_banded[::, 0 : kernel.size], + np.tile(kernel_double_differences, (1, central_n_cols)), + leading_dt_dot_d_banded[::, kernel.size :], ), axis=1, ) -def _gen_squ_fw_fin_diff_mat_cho_banded_orig_first( +def _squared_forward_difference_matrix_banded_original_first( *, - n_data: int, + num_data: int, differences: int, ) -> np.ndarray: """ @@ -244,30 +268,35 @@ def _gen_squ_fw_fin_diff_mat_cho_banded_orig_first( # this case is simpler than the transposed case because the matrix is just a # Toeplitz matrix with the kernel of double the difference order on the diagonal - kernel_double = calc_forward_diff_kernel(differences=2 * differences)[ - differences:None - ] + kernel_double_differences = forward_finite_difference_kernel( + differences=2 * differences + )[differences:None] # for an odd difference order, the sign of the kernel has to be flipped if differences % 2 == 1: - kernel_double = np.negative(kernel_double) + kernel_double_differences = np.negative(kernel_double_differences) - n_rows = n_data - kernel_double.size + 1 - n_upp_plus_central_bands = min(n_rows, 1 + differences) + num_rows = num_data - kernel_double_differences.size + 1 + num_upper_plus_central_diagonals = min(num_rows, 1 + differences) # the matrix is computed as a dense and simple filled by means of a loop - ddt_banded = np.zeros(shape=(n_upp_plus_central_bands, n_rows), dtype=np.int64) - main_diag_idx = min(differences, n_upp_plus_central_bands - 1) - for offset in range(0, n_upp_plus_central_bands): - ddt_banded[main_diag_idx - offset, offset:None] = kernel_double[offset] + d_dot_dt_banded = np.zeros( + shape=(num_upper_plus_central_diagonals, num_rows), + dtype=np.int64, + ) + main_diagonal_row_index = min(differences, num_upper_plus_central_diagonals - 1) + for offset in range(0, num_upper_plus_central_diagonals): + d_dot_dt_banded[main_diagonal_row_index - offset, offset:None] = ( + kernel_double_differences[offset] + ) - return ddt_banded + return d_dot_dt_banded -def gen_squ_fw_fin_diff_mat_cho_banded( +def squared_forward_difference_matrix_banded( *, - n_data: int, + num_data: int, differences: int, - orig_first: bool, + original_first: bool, ) -> np.ndarray: """ Generates the squared forward finite differences matrix ``D @ D.T`` or ``D.T @ D`` @@ -279,7 +308,7 @@ def gen_squ_fw_fin_diff_mat_cho_banded( Parameters ---------- - n_data : int + num_data : int The number of data points in the series to which the forward finite differences are applied. differences : int @@ -287,7 +316,7 @@ def gen_squ_fw_fin_diff_mat_cho_banded( first order, 2 for the second order, ..., and ``m`` for the ``m``-th order differences. Values below 1 are not allowed. - orig_first : bool + original_first : bool If ``True``, the squared forward finite differences matrix ``D @ D.T`` is computed. Otherwise, the squared forward finite differences matrix ``D.T @ D`` is computed. @@ -349,7 +378,7 @@ def gen_squ_fw_fin_diff_mat_cho_banded( # support the kernel for the respective difference order at least once try: check_scalar( - n_data, + num_data, name="n_data", target_type=Integral, min_val=differences + 1, @@ -358,17 +387,17 @@ def gen_squ_fw_fin_diff_mat_cho_banded( # NOTE: this is only for Sklearn compatibility except ValueError: - raise ValueError(f"Got n_features = {n_data}, must be >= {differences + 1}.") + raise ValueError(f"Got n_features = {num_data}, must be >= {differences + 1}.") # afterwards, the squared forward finite differences matrix is computed - if orig_first: - return _gen_squ_fw_fin_diff_mat_cho_banded_orig_first( - n_data=n_data, + if original_first: + return _squared_forward_difference_matrix_banded_original_first( + num_data=num_data, differences=differences, ) - return _gen_squ_fw_fin_diff_mat_cho_banded_transp_first( - n_data=n_data, + return _squared_forward_difference_matrix_banded_transpose_first( + num_data=num_data, differences=differences, ) @@ -376,12 +405,12 @@ def gen_squ_fw_fin_diff_mat_cho_banded( def estimate_noise_stddev( series: np.ndarray, differences: int = 6, - diff_accuracy: int = 2, + differences_accuracy: int = 2, window_size: Optional[int] = None, extrapolator: Callable[..., np.ndarray] = np.pad, extrapolator_args: Tuple[Any, ...] = ("reflect",), extrapolator_kwargs: Optional[Dict[str, Any]] = None, - power: Literal[-2, -1, 1, 2] = 1, + stddev_power: Literal[-2, -1, 1, 2] = 1, stddev_min: Union[float, int] = 1e-10, ) -> np.ndarray: """ @@ -403,7 +432,7 @@ def estimate_noise_stddev( Empirically, 5-6 was found as a sweet spot, but even numbers work better with the default ``extrapolator``. Values below 1 are not allowed. - diff_accuracy : int, default=2 + differences_accuracy : int, default=2 The accuracy of the finite difference approximation, which has to be an even integer ``>= 2``. Higher values will enhance the effect of outliers that will corrupt the noise @@ -446,7 +475,7 @@ def estimate_noise_stddev( Additional keyword arguments that are passed to the extrapolator function as described for ``extrapolator``. If ``None``, no additional keyword arguments are passed. - power : {-2, -1, 1, 2}, default=1 + stddev_power : {-2, -1, 1, 2}, default=1 The power to which the noise standard deviation is raised. This can be used to compute the: @@ -534,8 +563,10 @@ def estimate_noise_stddev( ) # power - if power not in {-2, -1, 1, 2}: - raise ValueError(f"Got power = {power}, expected -2, -1, 1, or 2.") + if stddev_power not in {-2, -1, 1, 2}: + raise ValueError( + f"Got stddeev_power = {stddev_power}, expected -2, -1, 1, or 2." + ) # minimum standard deviation check_scalar( @@ -548,16 +579,16 @@ def estimate_noise_stddev( # for validation of the series, the central finite differences kernel has to be # computed - diff_kernel = calc_central_diff_kernel( + difference_kernel = central_finite_difference_coefficients( differences=differences, - accuracy=diff_accuracy, + accuracy=differences_accuracy, ) # afterwards, the series is validated - if series.size < diff_kernel.size: + if series.size < difference_kernel.size: raise ValueError( - f"Got series.size = {series.size}, must be >= {diff_kernel.size} (kernel " - f"size)." + f"Got series.size = {series.size}, must be >= {difference_kernel.size} " + f"(kernel size)." ) if window_size is not None: @@ -577,9 +608,9 @@ def estimate_noise_stddev( ### Noise Standard Deviation Estimation ### # the signal is extrapolated to avoid edge effects - pad_width = diff_kernel.size // 2 + pad_width = difference_kernel.size // 2 pad_width += 0 if window_size is None else window_size // 2 - series_extrap = extrapolator( + extrapolated_series = extrapolator( series, pad_width, *extrapolator_args, @@ -587,18 +618,22 @@ def estimate_noise_stddev( ) # the absolute central finite differences are computed ... - abs_diff_series = np.abs( - np.convolve(series_extrap, np.flip(diff_kernel), mode="valid") + absolute_differences_series = np.abs( + np.convolve( + extrapolated_series, + np.flip(difference_kernel), + mode="valid", + ) ) - size_after_diff = abs_diff_series.size + size_after_differentiation = absolute_differences_series.size # ... and the median filter is applied to theses differences - prefactor = _MAD_PREFACTOR / np.linalg.norm(diff_kernel) + prefactor = _MAD_PREFACTOR / np.linalg.norm(difference_kernel) # Case 1: the global noise standard deviation is estimated if window_size is None: noise_stddev = np.full_like( series, - fill_value=prefactor * np.median(abs_diff_series), + fill_value=prefactor * np.median(absolute_differences_series), ) # Case 2: the local noise standard deviation is estimated @@ -607,19 +642,19 @@ def estimate_noise_stddev( noise_stddev = ( prefactor * median_filter( - abs_diff_series, + absolute_differences_series, size=window_size, mode="constant", - )[half_window_size : size_after_diff - half_window_size] + )[half_window_size : size_after_differentiation - half_window_size] ) # the minimum-bounded noise standard deviation is raised to the power noise_stddev = np.maximum(noise_stddev, stddev_min) - if power in {-2, 2}: + if stddev_power in {-2, 2}: noise_stddev = np.square(noise_stddev) - if power in {-2, -1}: + if stddev_power in {-2, -1}: noise_stddev = np.reciprocal(noise_stddev) return noise_stddev diff --git a/chemotools/utils/_models.py b/chemotools/utils/_models.py index b126279f..cd88a3f9 100644 --- a/chemotools/utils/_models.py +++ b/chemotools/utils/_models.py @@ -121,7 +121,7 @@ class WhittakerSmoothLambda: __zero_tol: float = field(default=1e-25, init=False, repr=False) __diff_tol: float = field(default=1e-5, init=False, repr=False) - def _validate_n_set_method(self) -> None: + def _set_validated_method(self) -> None: try: self.method_used = WhittakerSmoothMethods(self.method.lower()) except ValueError: @@ -137,7 +137,7 @@ def __post_init__(self): # Case 1: a single value is provided if isinstance(self.bounds, (int, float)): # first, the method is validated - self._validate_n_set_method() + self._set_validated_method() # in this case, the method has to be set to FIXED if self.method_used != WhittakerSmoothMethods.FIXED: @@ -163,25 +163,25 @@ def __post_init__(self): elif isinstance(self.bounds, tuple): # the bounds are flipped if necessary - low_bound, upp_bound = sorted(self.bounds) + lower_bound, upper_bound = sorted(self.bounds) # the bounds have to be greater than or equal to the zero tolerance - if low_bound < self.__zero_tol or upp_bound < self.__zero_tol: + if lower_bound < self.__zero_tol or upper_bound < self.__zero_tol: raise ValueError( f"\nThe bounds for the penalty weight lambda have to be greater " f"than or equal to the zero tolerance {self.__zero_tol}, but " - f"they are {low_bound} and {upp_bound}." + f"they are {lower_bound} and {upper_bound}." ) # the difference has to be at least 1e-5 * upp_bound to be considered # as a search space - if abs(upp_bound - low_bound) >= self.__diff_tol * upp_bound: + if abs(upper_bound - lower_bound) >= self.__diff_tol * upper_bound: # for this, the method is validated - self._validate_n_set_method() + self._set_validated_method() # if the method is not FIXED, the bounds are set as the search space if self.method_used != WhittakerSmoothMethods.FIXED: - self.auto_bounds = (float(low_bound), float(upp_bound)) + self.auto_bounds = (float(lower_bound), float(upper_bound)) self.fit_auto = True return @@ -189,13 +189,13 @@ def __post_init__(self): # an error is raised raise ValueError( f"\nThe bounds for the penalty weight lambda are a search space " - f"({low_bound}, {upp_bound}), but the method is set to FIXED." + f"({lower_bound}, {upper_bound}), but the method is set to FIXED." ) # otherwise, if the penalty weights is fixed, the method is set to FIXED as # well self.method_used = WhittakerSmoothMethods.FIXED - self.fixed_lambda = float(upp_bound) + self.fixed_lambda = float(upper_bound) self.fit_auto = False return @@ -271,11 +271,11 @@ class BandedLUFactorization: singular: bool shape: tuple[int, int] = field(default=(-1, -1), init=False) - n_rows: int = field(default=-1, init=False) - n_cols: int = field(default=-1, init=False) - main_diag_row_idx: int = field(default=-1, init=False) + num_rows: int = field(default=-1, init=False) + num_cols: int = field(default=-1, init=False) + main_diagonal_row_index: int = field(default=-1, init=False) def __post_init__(self): self.shape = self.lub.shape # type: ignore - self.n_rows, self.n_cols = self.shape - self.main_diag_row_idx = self.n_rows - 1 - self.l_and_u[0] + self.num_rows, self.num_cols = self.shape + self.main_diagonal_row_index = self.num_rows - 1 - self.l_and_u[0] diff --git a/chemotools/utils/_whittaker_base/initialisation.py b/chemotools/utils/_whittaker_base/initialisation.py index 090f4b86..abffe877 100644 --- a/chemotools/utils/_whittaker_base/initialisation.py +++ b/chemotools/utils/_whittaker_base/initialisation.py @@ -99,14 +99,14 @@ def get_squ_fw_diff_mat_banded( # NOTE: the matrix is returned with integer entries because integer computations # can be carried out at maximum precision; this has to be converted to # double precision for the LU decomposition - penalty_mat_banded = fdiff.gen_squ_fw_fin_diff_mat_cho_banded( - n_data=n_data, + penalty_mat_banded = fdiff.squared_forward_difference_matrix_banded( + num_data=n_data, differences=differences, - orig_first=orig_first, + original_first=orig_first, ).astype(dtype) # ... and cast to the banded storage format for LAPACK's LU decomposition - return bla.conv_upper_chol_banded_to_lu_banded_storage(ab=penalty_mat_banded) + return bla.convert_upper_chol_banded_to_lu_banded_storage(ab=penalty_mat_banded) def get_flipped_fw_diff_kernel(differences: int, dtype: Type) -> np.ndarray: @@ -116,9 +116,9 @@ def get_flipped_fw_diff_kernel(differences: int, dtype: Type) -> np.ndarray: """ - return np.flip(fdiff.calc_forward_diff_kernel(differences=differences)).astype( - dtype - ) + return np.flip( + fdiff.forward_finite_difference_kernel(differences=differences) + ).astype(dtype) def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> float: diff --git a/tests/fixtures.py b/tests/fixtures.py index 8007ba4c..c63f6637 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -184,7 +184,7 @@ def noise_level_estimation_signal() -> np.ndarray: @pytest.fixture -def noise_level_estimation_refs() -> List[NoiseEstimationReference]: +def noise_level_estimation_references() -> List[NoiseEstimationReference]: fpath = os.path.join( path_to_resources, "noise_level_estimation/noise_estimation_refs.csv", diff --git a/tests/test_for_utils/test_banded_linalg.py b/tests/test_for_utils/test_banded_linalg.py index 670c2b5f..cea367b3 100644 --- a/tests/test_for_utils/test_banded_linalg.py +++ b/tests/test_for_utils/test_banded_linalg.py @@ -14,7 +14,7 @@ from chemotools.utils._banded_linalg import ( _datacopied, - conv_upper_chol_banded_to_lu_banded_storage, + convert_upper_chol_banded_to_lu_banded_storage, lu_banded, lu_solve_banded, slogdet_lu_banded, @@ -80,11 +80,11 @@ def test_datacopied( @pytest.mark.parametrize("with_finite_check", [True, False]) @pytest.mark.parametrize("overwrite_b", [True, False]) -@pytest.mark.parametrize("n_rhs", [0, 1, 2]) -@pytest.mark.parametrize("n_upp_bands", [1, 2, 3, 4, 5, 6]) -@pytest.mark.parametrize("n_low_bands", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize("num_right_hand_sides", [0, 1, 2]) +@pytest.mark.parametrize("num_superdiagonals", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize("num_subdiagonals", [1, 2, 3, 4, 5, 6]) @pytest.mark.parametrize( - "n_rows", + "num_rows", [ 1, 2, @@ -110,10 +110,10 @@ def test_datacopied( ], ) def test_lu_banded_solve( - n_rows: int, - n_low_bands: int, - n_upp_bands: int, - n_rhs: int, + num_rows: int, + num_subdiagonals: int, + num_superdiagonals: int, + num_right_hand_sides: int, overwrite_b: bool, with_finite_check: bool, ) -> None: @@ -127,28 +127,34 @@ def test_lu_banded_solve( """ # if the matrix cannot exist with the given shape, the test is skipped - n_rows_min = n_low_bands + n_upp_bands + 1 - if n_rows < n_rows_min: + num_rows_min = num_subdiagonals + num_superdiagonals + 1 + if num_rows < num_rows_min: pytest.skip( - f"Test skipped because the number of rows {n_rows} is smaller than the " - f"minimum number of rows {n_rows_min} required by the number of sub- " - f"{n_low_bands} and superdiagonals {n_upp_bands}." + f"Test skipped because the number of rows {num_rows} is smaller than the " + f"minimum number of rows {num_rows_min} required by the number of sub- " + f"{num_subdiagonals} and superdiagonals {num_superdiagonals}." ) # a random banded matrix and right-hand-side-vector/-matrix are generated np.random.seed(seed=42) - ab = -1.0 + 2.0 * np.random.rand(n_low_bands + n_upp_bands + 1, n_rows) - b = np.random.rand(n_rows) if n_rhs == 0 else np.random.rand(n_rows, n_rhs) + ab = -1.0 + 2.0 * np.random.rand( + num_subdiagonals + num_superdiagonals + 1, num_rows + ) + b = ( + np.random.rand(num_rows) + if num_right_hand_sides == 0 + else np.random.rand(num_rows, num_right_hand_sides) + ) # first, the Scipy solution is computed because if this fails due to singularity, # the test has to not test for equivalent results, but for failure # NOTE: failure is indicated by the solution being ``None`` # NOTE: this order of evaluation is also better for testing if the overwrite flag # is working correctly because otherwise SciPy would get the overwritten b - l_and_u = (n_low_bands, n_upp_bands) - x_ref = None + l_and_u = (num_subdiagonals, num_superdiagonals) + x_reference = None try: - x_ref = scipy_solve_banded( + x_reference = scipy_solve_banded( l_and_u=l_and_u, ab=ab, b=b, @@ -157,14 +163,14 @@ def test_lu_banded_solve( # NOTE: even if SciPy computes the solution "successfully", there might be NaNs # NOTE: in the result, so the test has to check for that as well - if np.any(np.isnan(x_ref)): - x_ref = None + if np.any(np.isnan(x_reference)): + x_reference = None except np.linalg.LinAlgError: pass # the banded matrix is LU decomposed with the respective Chemotools function - lu_fact = lu_banded( + lu_factorization = lu_banded( l_and_u=l_and_u, ab=ab, check_finite=with_finite_check, @@ -172,11 +178,11 @@ def test_lu_banded_solve( # the linear system is solved with the respective Chemotools function # Case 1: Scipy failed - if x_ref is None: + if x_reference is None: # in this case, the Chemotools function has to raise an exception as well with pytest.raises(np.linalg.LinAlgError): - x = lu_solve_banded( - lub_factorization=lu_fact, + x_chemotools = lu_solve_banded( + lub_factorization=lu_factorization, b=b, overwrite_b=overwrite_b, check_finite=with_finite_check, @@ -185,8 +191,8 @@ def test_lu_banded_solve( # Case 2: Scipy succeeded # in this case, the Chemotools function has to return the same result as Scipy - x = lu_solve_banded( - lub_factorization=lu_fact, + x_chemotools = lu_solve_banded( + lub_factorization=lu_factorization, b=b, overwrite_b=overwrite_b, check_finite=with_finite_check, @@ -195,14 +201,19 @@ def test_lu_banded_solve( # NOTE: the following check has to be fairly strict when it comes to equivalence # NOTE: since the SciPy and Chemotools are basically doing the same under the hood # NOTE: when it comes to the solution process (first LU, then triangular solve) - assert np.allclose(x, x_ref, atol=1e-10, rtol=1e-10) + assert np.allclose( + x_chemotools, + x_reference, + atol=1e-10, + rtol=1e-10, + ) @pytest.mark.parametrize("with_finite_check", [True, False]) -@pytest.mark.parametrize("ensure_posdef", [True, False]) -@pytest.mark.parametrize("n_upp_low_bands", [1, 2, 3, 4, 5, 6]) +@pytest.mark.parametrize("ensure_positive_definite", [True, False]) +@pytest.mark.parametrize("num_sub_and_superdiagonals", [1, 2, 3, 4, 5, 6]) @pytest.mark.parametrize( - "n_rows", + "num_rows", [ 1, 2, @@ -228,9 +239,9 @@ def test_lu_banded_solve( ], ) def test_lu_banded_slogdet( - n_rows: int, - n_upp_low_bands: int, - ensure_posdef: bool, + num_rows: int, + num_sub_and_superdiagonals: int, + ensure_positive_definite: bool, with_finite_check: bool, ) -> None: """ @@ -240,12 +251,13 @@ def test_lu_banded_slogdet( """ # if the matrix cannot exist with the given shape, the test is skipped - n_rows_min = 2 * n_upp_low_bands + 1 - if n_rows < n_rows_min: + n_rows_min = 2 * num_sub_and_superdiagonals + 1 + if num_rows < n_rows_min: pytest.skip( - f"Test skipped because the number of rows {n_rows} is smaller than the " + f"Test skipped because the number of rows {num_rows} is smaller than the " f"minimum number of rows {n_rows_min} required by the number of sub- " - f"{n_upp_low_bands} and superdiagonals {n_upp_low_bands}." + f"{num_sub_and_superdiagonals} and superdiagonals " + f"{num_sub_and_superdiagonals}." ) # a random banded matrix is generated in the upper banded storage used for Cholesky @@ -256,17 +268,19 @@ def test_lu_banded_slogdet( # flag is set # NOTE: for an indefinite matrix, the matrix is shifted and scaled to be in the # interval [-1, 1] - ab_for_chol = np.random.rand(n_upp_low_bands + 1, n_rows) - if ensure_posdef: - ab_for_chol[n_upp_low_bands, ::] += 1.0 + 2.0 * float(n_upp_low_bands) + ab_for_chol = np.random.rand(num_sub_and_superdiagonals + 1, num_rows) + if ensure_positive_definite: + ab_for_chol[num_sub_and_superdiagonals, ::] += 1.0 + 2.0 * float( + num_sub_and_superdiagonals + ) else: ab_for_chol = -1.0 + 2.0 * ab_for_chol - l_and_u, ab_for_lu = conv_upper_chol_banded_to_lu_banded_storage(ab=ab_for_chol) + l_and_u, ab_for_lu = convert_upper_chol_banded_to_lu_banded_storage(ab=ab_for_chol) # first, the log determinant is computed with the literal definition as the sum of # the logarithms of the eigenvalues of the matrix - sign_ref, logabsdet_ref = get_banded_slogdet(ab=ab_for_chol) + sign_reference, logabsdet_reference = get_banded_slogdet(ab=ab_for_chol) # the banded matrix is LU decomposed with the respective Chemotools function ... lu_fact = lu_banded( @@ -275,8 +289,18 @@ def test_lu_banded_slogdet( check_finite=with_finite_check, ) # ... and the sign and log determinant are computed - sign, logabsdet = slogdet_lu_banded(lub_factorization=lu_fact) + sign_chemotools, logabsdet_chemotools = slogdet_lu_banded(lub_factorization=lu_fact) # the results are compared - assert np.isclose(sign, sign_ref, atol=1e-5, rtol=1e-5) - assert np.isclose(logabsdet, logabsdet_ref, atol=1e-5, rtol=1e-5) + assert np.isclose( + sign_chemotools, + sign_reference, + atol=1e-5, + rtol=1e-5, + ) + assert np.isclose( + logabsdet_chemotools, + logabsdet_reference, + atol=1e-5, + rtol=1e-5, + ) diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/test_for_utils/test_finite_differences.py index 040a39d4..2be19a22 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/test_for_utils/test_finite_differences.py @@ -12,18 +12,18 @@ import pytest from chemotools.utils._finite_differences import ( - calc_central_diff_kernel, - calc_forward_diff_kernel, + central_finite_difference_coefficients, estimate_noise_stddev, - gen_squ_fw_fin_diff_mat_cho_banded, + forward_finite_difference_kernel, + squared_forward_difference_matrix_banded, ) -from tests.fixtures import noise_level_estimation_refs # noqa: F401 +from tests.fixtures import noise_level_estimation_references # noqa: F401 from tests.fixtures import noise_level_estimation_signal # noqa: F401 from tests.fixtures import reference_finite_differences # noqa: F401 from tests.test_for_utils.utils_funcs import ( conv_upper_cho_banded_storage_to_sparse, - multiply_vect_with_squ_fw_fin_diff_orig_first, - multiply_vect_with_squ_fw_fin_diff_transpose_first, + multiply_vect_with_squared_forward_finite_differences_original_first, + multiply_vect_with_squared_forward_finite_differences_transpose_first, ) from tests.test_for_utils.utils_models import ( NoiseEstimationReference, @@ -44,19 +44,22 @@ def test_forward_diff_kernel( """ # each kernel is calculated and compared to the reference - for ref_diff_kernel in reference_finite_differences: - kernel = calc_forward_diff_kernel(differences=ref_diff_kernel.differences) + for kernel_reference in reference_finite_differences: + kernel_chemotools = forward_finite_difference_kernel( + differences=kernel_reference.differences + ) # first, the size of the kernel is checked ... - assert kernel.size == ref_diff_kernel.size, ( - f"Difference order {ref_diff_kernel.differences} with accuracy 1 - " - f"Expected kernel size {ref_diff_kernel.size} but got {kernel.size}" + assert kernel_chemotools.size == kernel_reference.size, ( + f"Difference order {kernel_reference.differences} with accuracy 1 - " + f"Expected kernel size {kernel_reference.size} but got " + f"{kernel_chemotools.size}" ) # ... followed by the comparison of the kernel itself - assert np.allclose(kernel, ref_diff_kernel.kernel, atol=1e-8), ( - f"Difference order {ref_diff_kernel.differences} with accuracy 1 - " - f"Expected kernel {ref_diff_kernel.kernel.tolist()} but got " - f"{kernel.tolist()}" + assert np.allclose(kernel_chemotools, kernel_reference.kernel, atol=1e-8), ( + f"Difference order {kernel_reference.differences} with accuracy 1 - " + f"Expected kernel {kernel_reference.kernel.tolist()} but got " + f"{kernel_chemotools.tolist()}" ) @@ -71,33 +74,34 @@ def test_central_diff_kernel( """ # each kernel is calculated and compared to the reference - for ref_diff_kernel in reference_finite_differences: - kernel = calc_central_diff_kernel( - differences=ref_diff_kernel.differences, - accuracy=ref_diff_kernel.accuracy, + for kernel_reference in reference_finite_differences: + kernel_chemotools = central_finite_difference_coefficients( + differences=kernel_reference.differences, + accuracy=kernel_reference.accuracy, ) # first, the size of the kernel is checked ... - assert kernel.size == ref_diff_kernel.size, ( - f"Difference order {ref_diff_kernel.differences} with accuracy " - f"{ref_diff_kernel.accuracy} - Expected kernel size {ref_diff_kernel.size} " - f"but got {kernel.size}" + assert kernel_chemotools.size == kernel_reference.size, ( + f"Difference order {kernel_reference.differences} with accuracy " + f"{kernel_reference.accuracy} - Expected kernel size " + f"{kernel_reference.size} but got {kernel_chemotools.size}" ) # ... followed by the comparison of the kernel itself - assert np.allclose(kernel, ref_diff_kernel.kernel, atol=1e-8), ( - f"Difference order {ref_diff_kernel.differences} with accuracy " - f"{ref_diff_kernel.accuracy} - Expected kernel " - f"{ref_diff_kernel.kernel.tolist()} but got {kernel.tolist()}" + assert np.allclose(kernel_chemotools, kernel_reference.kernel, atol=1e-8), ( + f"Difference order {kernel_reference.differences} with accuracy " + f"{kernel_reference.accuracy} - Expected kernel " + f"{kernel_reference.kernel.tolist()} but got {kernel_chemotools.tolist()}" ) @pytest.mark.parametrize( - "n_add_size", + "num_additional_values", list(range(0, 11)) + list(range(20, 101, 10)) + list(range(200, 1001, 100)), ) @pytest.mark.parametrize("differences", list(range(1, 11))) def test_squ_fw_fin_diff_mat_cho_banded_orig_first( - differences: int, n_add_size: int + differences: int, + num_additional_values: int, ) -> None: """ Tests the generation of the squared forward finite difference matrix ``D @ D.T`` @@ -110,45 +114,53 @@ def test_squ_fw_fin_diff_mat_cho_banded_orig_first( """ # first, the finite difference kernel is calculated - kernel = calc_forward_diff_kernel(differences=differences) + kernel = forward_finite_difference_kernel(differences=differences) # then, the banded matrix D @ D.T is generated ... - n_data = kernel.size + n_add_size - ddt_banded = gen_squ_fw_fin_diff_mat_cho_banded( - n_data=n_data, + num_data = kernel.size + num_additional_values + d_dot_dt_banded = squared_forward_difference_matrix_banded( + num_data=num_data, differences=differences, - orig_first=True, + original_first=True, ) # ... and converted to a sparse matrix - ddt_sparse = conv_upper_cho_banded_storage_to_sparse(ab=ddt_banded) + d_dot_dt_sparse = conv_upper_cho_banded_storage_to_sparse(ab=d_dot_dt_banded) # a random vector is created np.random.seed(42) - vector = np.random.rand(n_add_size + 1) + vector = np.random.rand(num_additional_values + 1) # this vector is multiplied with the matrix - result = ddt_sparse @ vector + result_chemotools = d_dot_dt_sparse @ vector # afterwards, the result is compared to the result of the convolution - result_conv = multiply_vect_with_squ_fw_fin_diff_orig_first( - differences=differences, - kernel=kernel, - vector=vector, + result_reference = ( + multiply_vect_with_squared_forward_finite_differences_original_first( + differences=differences, + kernel=kernel, + vector=vector, + ) ) # the results are compared # NOTE: the following check has to be fairly strict when it comes to equivalence # since the NumPy and Chemotools are basically doing the same under the hood - assert np.allclose(result, result_conv, atol=1e-10, rtol=1e-10) + assert np.allclose( + result_chemotools, + result_reference, + atol=1e-10, + rtol=1e-10, + ) @pytest.mark.parametrize( - "n_add_size", + "num_additional_values", list(range(0, 11)) + list(range(20, 101, 10)) + list(range(200, 1001, 100)), ) @pytest.mark.parametrize("differences", list(range(1, 11))) def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( - differences: int, n_add_size: int + differences: int, + num_additional_values: int, ) -> None: """ Tests the generation of the squared forward finite difference matrix ``D.T @ D`` @@ -161,40 +173,42 @@ def test_squ_fw_fin_diff_mat_cho_banded_transpose_first( """ # first, the finite difference kernel is calculated - kernel = calc_forward_diff_kernel(differences=differences) + kernel = forward_finite_difference_kernel(differences=differences) # then, the banded matrix D.T @ D is generated ... - n_data = kernel.size + n_add_size - dtd_banded = gen_squ_fw_fin_diff_mat_cho_banded( - n_data=n_data, + num_data = kernel.size + num_additional_values + dt_dot_d_banded = squared_forward_difference_matrix_banded( + num_data=num_data, differences=differences, - orig_first=False, + original_first=False, ) # ... and converted to a sparse matrix - dtd_sparse = conv_upper_cho_banded_storage_to_sparse(ab=dtd_banded) + dt_dot_d_sparse = conv_upper_cho_banded_storage_to_sparse(ab=dt_dot_d_banded) # a random vector is created np.random.seed(42) - vector = np.random.rand(n_data) + vector = np.random.rand(num_data) # this vector is multiplied with the matrix - result = dtd_sparse @ vector + result_chemotools = dt_dot_d_sparse @ vector # afterwards, the result is compared to the result of the convolution - result_conv = multiply_vect_with_squ_fw_fin_diff_transpose_first( - differences=differences, - kernel=kernel, - vector=vector, + result_reference = ( + multiply_vect_with_squared_forward_finite_differences_transpose_first( + differences=differences, + kernel=kernel, + vector=vector, + ) ) # the results are compared # NOTE: the following check has to be fairly strict when it comes to equivalence # since the NumPy and Chemotools are basically doing the same under the hood - assert np.allclose(result, result_conv, atol=1e-10, rtol=1e-10) + assert np.allclose(result_chemotools, result_reference, atol=1e-10, rtol=1e-10) @pytest.mark.parametrize( - "series, differences, accuracy, window_size, power, stddev_min", + "series, differences, accuracy, window_size, stddev_power, stddev_min", [ ( # Number 0 series is too small for difference kernel np.arange(start=0, stop=5), @@ -339,7 +353,7 @@ def test_estimate_noise_stddev_invalid_input( differences: int, accuracy: int, window_size: Optional[int], - power: int, + stddev_power: int, stddev_min: float, ) -> None: """ @@ -362,9 +376,9 @@ def test_estimate_noise_stddev_invalid_input( estimate_noise_stddev( series=series, differences=differences, - diff_accuracy=accuracy, + differences_accuracy=accuracy, window_size=window_size, - power=power, # type: ignore + stddev_power=stddev_power, # type: ignore stddev_min=stddev_min, ) @@ -373,7 +387,7 @@ def test_estimate_noise_stddev_invalid_input( def test_noise_level_estimation( noise_level_estimation_signal: np.ndarray, # noqa: F811 - noise_level_estimation_refs: List[NoiseEstimationReference], # noqa: F811 + noise_level_estimation_references: List[NoiseEstimationReference], # noqa: F811 ) -> None: """ Tests the noise level estimation function :func:`estimate_noise_stddev`. @@ -382,46 +396,58 @@ def test_noise_level_estimation( """ - for ref in noise_level_estimation_refs: + for noise_level_reference in noise_level_estimation_references: # the noise level is estimated - noise_level = estimate_noise_stddev( + noise_level_chemotools = estimate_noise_stddev( series=noise_level_estimation_signal, - differences=ref.differences, - diff_accuracy=ref.accuracy, - window_size=ref.window_size, - stddev_min=ref.min_noise_level, + differences=noise_level_reference.differences, + differences_accuracy=noise_level_reference.accuracy, + window_size=noise_level_reference.window_size, + stddev_min=noise_level_reference.min_noise_level, ) # then, the noise level itself is compared to the reference in a quite strict # way because both results were computed in the same way with the only # difference being that Chemotools uses Python and the reference uses # LibreOffice Calc - assert np.allclose(noise_level, ref.noise_level, rtol=1e-12), ( + assert np.allclose( + noise_level_chemotools, + noise_level_reference.noise_level, + rtol=1e-12, + ), ( f"Original noise level differs from reference noise for differences " - f"{ref.differences} with accuracy {ref.accuracy} and window size " - f"{ref.window_size} given a minimum standard deviation of " - f"{ref.min_noise_level}." + f"{noise_level_reference.differences} with accuracy " + f"{noise_level_reference.accuracy} and window size " + f"{noise_level_reference.window_size} given a minimum standard deviation " + f"of {noise_level_reference.min_noise_level}." ) # then, all the available powers to which the noise level can be raised are # compared to the reference - for power, raised_noise_level_ref in ref.raised_noise_levels.items(): + for ( + stddev_power, + raised_noise_level_ref, + ) in noise_level_reference.raised_noise_levels.items(): raised_noise_level = estimate_noise_stddev( series=noise_level_estimation_signal, - differences=ref.differences, - diff_accuracy=ref.accuracy, - window_size=ref.window_size, - stddev_min=ref.min_noise_level, - power=power, + differences=noise_level_reference.differences, + differences_accuracy=noise_level_reference.accuracy, + window_size=noise_level_reference.window_size, + stddev_min=noise_level_reference.min_noise_level, + stddev_power=stddev_power, ) # again, the comparison is quite strict assert np.allclose( - raised_noise_level, raised_noise_level_ref, atol=1e-12 + raised_noise_level, + raised_noise_level_ref, + atol=1e-12, ), ( f"Raised noise level differs from reference noise for differences " - f"{ref.differences} with accuracy {ref.accuracy} and window size " - f"{ref.window_size} given a minimum standard deviation of " - f"{ref.min_noise_level} and a power of {power}." + f"{noise_level_reference.differences} with accuracy " + f"{noise_level_reference.accuracy} and window size " + f"{noise_level_reference.window_size} given a minimum standard " + f"deviation of {noise_level_reference.min_noise_level} and a power of " + f"{stddev_power}." ) return diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py index f8a0e4e8..7eca6c95 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/test_for_utils/test_whittaker_base.py @@ -573,7 +573,7 @@ def test_auto_lambda_log_marginal_likelihood( weight_vect=weights[0, ::], differences=differences, log_lambda_bounds=(log(lambda_bounds[0]), log(lambda_bounds[1])), - n_opts=100, + num_optimizations=100, ) # the results are compared with 1% relative tolerance diff --git a/tests/test_for_utils/utils_funcs.py b/tests/test_for_utils/utils_funcs.py index 0e72d4f2..fc1b9af5 100644 --- a/tests/test_for_utils/utils_funcs.py +++ b/tests/test_for_utils/utils_funcs.py @@ -20,7 +20,7 @@ from scipy.sparse import diags as sp_diags from scipy.sparse import linalg as spla -from chemotools.utils._finite_differences import calc_forward_diff_kernel +from chemotools.utils._finite_differences import forward_finite_difference_kernel from chemotools.utils._whittaker_base import WhittakerLikeSolver ### Utility Functions ### @@ -177,29 +177,34 @@ def conv_upper_cho_banded_storage_to_sparse(ab: np.ndarray) -> csr_matrix: """ # the offset vector is initialised - n_diags, n_cols = ab.shape - n_diags -= 1 - main_diag_idx = n_diags - offsets = np.arange(start=-n_diags, stop=n_diags + 1, step=1, dtype=np.int64) + num_diagonals, num_columns = ab.shape + num_diagonals -= 1 + main_diagonal_index = num_diagonals + offsets = np.arange( + start=-num_diagonals, + stop=num_diagonals + 1, + step=1, + dtype=np.int64, + ) # then, the list of diagonals is created diagonals = [] # the subdiagonals are added first ... - for offset in range(n_diags, 0, -1): - diagonals.append(ab[main_diag_idx - offset, offset:n_cols]) + for offset in range(num_diagonals, 0, -1): + diagonals.append(ab[main_diagonal_index - offset, offset:num_columns]) # ... followed by the main diagonal ... - diagonals.append(ab[main_diag_idx, ::]) + diagonals.append(ab[main_diagonal_index, ::]) # ... and finally the superdiagonals - for offset in range(1, n_diags + 1): - diagonals.append(ab[main_diag_idx - offset, offset:n_cols]) + for offset in range(1, num_diagonals + 1): + diagonals.append(ab[main_diagonal_index - offset, offset:num_columns]) # the sparse matrix is created return sp_diags( # type: ignore diagonals=diagonals, offsets=offsets, # type: ignore - shape=(n_cols, n_cols), + shape=(num_columns, num_columns), format="csr", ) @@ -326,12 +331,12 @@ def conv_lu_banded_storage_to_sparse( """ # the offset vector is initialised - n_low_bands, n_upp_bands = l_and_u - main_diag_idx = n_upp_bands - n_cols = ab.shape[1] + num_subdiagonals, num_superdiagonals = l_and_u + main_diagonal_index = num_superdiagonals + num_columns = ab.shape[1] offsets = np.arange( - start=-n_low_bands, - stop=n_upp_bands + 1, + start=-num_subdiagonals, + stop=num_superdiagonals + 1, step=1, dtype=np.int64, ) @@ -339,26 +344,26 @@ def conv_lu_banded_storage_to_sparse( # then, the list of diagonals is created diagonals = [] # the subdiagonals are added first ... - for offset in range(n_low_bands, 0, -1): - diagonals.append(ab[main_diag_idx + offset, 0 : n_cols - offset]) + for offset in range(num_subdiagonals, 0, -1): + diagonals.append(ab[main_diagonal_index + offset, 0 : num_columns - offset]) # ... followed by the main diagonal ... - diagonals.append(ab[main_diag_idx, ::]) + diagonals.append(ab[main_diagonal_index, ::]) # ... and finally the superdiagonals - for offset in range(1, n_upp_bands + 1): - diagonals.append(ab[main_diag_idx - offset, offset:n_cols]) + for offset in range(1, num_superdiagonals + 1): + diagonals.append(ab[main_diagonal_index - offset, offset:num_columns]) # the matrix is created from the diagonals return sp_diags( # type: ignore diagonals=diagonals, offsets=offsets, # type: ignore - shape=(n_cols, n_cols), + shape=(num_columns, num_columns), format="csr", ) -def multiply_vect_with_squ_fw_fin_diff_orig_first( +def multiply_vect_with_squared_forward_finite_differences_original_first( differences: int, kernel: np.ndarray, vector: np.ndarray, @@ -379,7 +384,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> # Imports >>> import numpy as np >>> from tests.test_for_utils.utils_funcs import ( - ... multiply_vect_with_squ_fw_fin_diff_orig_first, + ... multiply_vect_with_squared_forward_finite_differences_original_first, ... ) >>> # All the following tests were checked using LibreOffice Calc @@ -388,7 +393,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 1 >>> kernel = np.array([-1, 1]) >>> vector = np.array([1, 2]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -399,7 +404,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 1 >>> kernel = np.array([-1, 1]) >>> vector = np.array([-10, 3, 11]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -410,7 +415,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 1 >>> kernel = np.array([-1, 1]) >>> vector = np.array([ 25, 17, -13, -12]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -421,7 +426,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 2 >>> kernel = np.array([1, -2, 1]) >>> vector = np.array([1, 2, 3]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -432,7 +437,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 2 >>> kernel = np.array([1, -2, 1]) >>> vector = np.array([-10, 3, 11, 27]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -443,7 +448,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 2 >>> kernel = np.array([1, -2, 1]) >>> vector = np.array([ 25, 17, -13, -12, 38]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -454,7 +459,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 3 >>> kernel = np.array([-1, 3, -3, 1]) >>> vector = np.array([1, 2, 3, 4]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -465,7 +470,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 3 >>> kernel = np.array([-1, 3, -3, 1]) >>> vector = np.array([-10, 3, 11, 27, -5]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -476,7 +481,7 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( >>> differences = 3 >>> kernel = np.array([-1, 3, -3, 1]) >>> vector = np.array([ 25, 17, -13, -12, 38, -8]) - >>> multiply_vect_with_squ_fw_fin_diff_orig_first( + >>> multiply_vect_with_squared_forward_finite_differences_original_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -494,14 +499,14 @@ def multiply_vect_with_squ_fw_fin_diff_orig_first( constant_values=0, ) # NOTE: since NumPy already flips the kernel internally, there is no need to flip it - vector_conv = np.convolve(vector_padded, kernel, mode="valid") + vector_convolved = np.convolve(vector_padded, kernel, mode="valid") # then, the result is convolved with the kernel # NOTE: here, the kernel has to be flipped to counteract NumPy's internal flipping - return np.convolve(vector_conv, np.flip(kernel), mode="valid") + return np.convolve(vector_convolved, np.flip(kernel), mode="valid") -def multiply_vect_with_squ_fw_fin_diff_transpose_first( +def multiply_vect_with_squared_forward_finite_differences_transpose_first( differences: int, kernel: np.ndarray, vector: np.ndarray, @@ -521,14 +526,14 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> # Imports >>> import numpy as np >>> from tests.test_for_utils.utils_funcs import ( - ... multiply_vect_with_squ_fw_fin_diff_transpose_first, + ... multiply_vect_with_squared_forward_finite_differences_transpose_first, ... ) >>> # Test 1 >>> differences = 1 >>> kernel = np.array([-1, 1]) >>> vector = np.array([1, 2]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -539,7 +544,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 1 >>> kernel = np.array([-1, 1]) >>> vector = np.array([-10, 3, 11]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -550,7 +555,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 1 >>> kernel = np.array([-1, 1]) >>> vector = np.array([ 25, 17, -13, -12]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -561,7 +566,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 2 >>> kernel = np.array([1, -2, 1]) >>> vector = np.array([1, 2, 3]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -572,7 +577,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 2 >>> kernel = np.array([1, -2, 1]) >>> vector = np.array([-10, 3, 11, 27]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -583,7 +588,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 2 >>> kernel = np.array([1, -2, 1]) >>> vector = np.array([ 25, 17, -13, -12, 38]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -594,7 +599,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 3 >>> kernel = np.array([-1, 3, -3, 1]) >>> vector = np.array([1, 2, 3, 4]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -605,7 +610,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 3 >>> kernel = np.array([-1, 3, -3, 1]) >>> vector = np.array([-10, 3, 11, 27, -5]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -616,7 +621,7 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( >>> differences = 3 >>> kernel = np.array([-1, 3, -3, 1]) >>> vector = np.array([ 25, 17, -13, -12, 38, -8]) - >>> multiply_vect_with_squ_fw_fin_diff_transpose_first( + >>> multiply_vect_with_squared_forward_finite_differences_transpose_first( ... differences=differences, ... kernel=kernel, ... vector=vector, @@ -627,11 +632,11 @@ def multiply_vect_with_squ_fw_fin_diff_transpose_first( # first, the vector is convolved with the kernel # NOTE: here, the kernel has to be flipped to counteract NumPy's internal flipping - vector_conv = np.convolve(vector, np.flip(kernel), mode="valid") + vector_convolved = np.convolve(vector, np.flip(kernel), mode="valid") # then, the result is convolved with the flipped kernel and zero-padded vector_padded = np.pad( - vector_conv, + vector_convolved, pad_width=(differences, differences), mode="constant", constant_values=0, @@ -750,38 +755,53 @@ def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: # since the log determinant can be expressed as the sum of the logarithms of the # absolute eigenvalues, an eigenvalue evaluation is sufficient to determine the # sign and the log determinant - eigvals = eigvals_banded(a_band=ab, lower=False, select="a") - if np.count_nonzero(eigvals < 0.0) % 2 == 0: # type: ignore + eigenvalues = eigvals_banded( + a_band=ab, + lower=False, + select="a", + ) + if np.count_nonzero(eigenvalues < 0.0) % 2 == 0: # type: ignore sign = 1.0 else: sign = -1.0 with np.errstate(divide="ignore", over="ignore"): - logabsdet = np.log(np.abs(eigvals)).sum() # type: ignore + logabsdet = np.log(np.abs(eigenvalues)).sum() # type: ignore return sign, logabsdet -def get_sparse_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: +def get_sparse_forward_finite_difference_matrix( + num_data: int, + differences: int, +) -> csc_matrix: """ Creates a dense forward finite difference matrix ``D`` of a given difference order. Doctests -------- >>> # Imports - >>> from tests.test_for_utils.utils_funcs import get_sparse_fw_fin_diff_mat + >>> from tests.test_for_utils.utils_funcs import ( + ... get_sparse_forward_finite_difference_matrix, + ... ) >>> # Matrix 1 - >>> n_data, differences = 5, 1 - >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> num_data, differences = 5, 1 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() array([[-1., 1., 0., 0., 0.], [ 0., -1., 1., 0., 0.], [ 0., 0., -1., 1., 0.], [ 0., 0., 0., -1., 1.]]) >>> # Matrix 2 - >>> n_data, differences = 10, 1 - >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> num_data, differences = 10, 1 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() array([[-1., 1., 0., 0., 0., 0., 0., 0., 0., 0.], [ 0., -1., 1., 0., 0., 0., 0., 0., 0., 0.], [ 0., 0., -1., 1., 0., 0., 0., 0., 0., 0.], @@ -793,15 +813,21 @@ def get_sparse_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: [ 0., 0., 0., 0., 0., 0., 0., 0., -1., 1.]]) >>> # Matrix 3 - >>> n_data, differences = 5, 2 - >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> num_data, differences = 5, 2 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() array([[ 1., -2., 1., 0., 0.], [ 0., 1., -2., 1., 0.], [ 0., 0., 1., -2., 1.]]) >>> # Matrix 4 - >>> n_data, differences = 10, 2 - >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> num_data, differences = 10, 2 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() array([[ 1., -2., 1., 0., 0., 0., 0., 0., 0., 0.], [ 0., 1., -2., 1., 0., 0., 0., 0., 0., 0.], [ 0., 0., 1., -2., 1., 0., 0., 0., 0., 0.], @@ -812,14 +838,20 @@ def get_sparse_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: [ 0., 0., 0., 0., 0., 0., 0., 1., -2., 1.]]) >>> # Matrix 4 - >>> n_data, differences = 5, 3 - >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> num_data, differences = 5, 3 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() array([[-1., 3., -3., 1., 0.], [ 0., -1., 3., -3., 1.]]) >>> # Matrix 5 - >>> n_data, differences = 10, 3 - >>> get_sparse_fw_fin_diff_mat(n_data=n_data, differences=differences).toarray() + >>> num_data, differences = 10, 3 + >>> get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, + ... differences=differences, + ... ).toarray() array([[-1., 3., -3., 1., 0., 0., 0., 0., 0., 0.], [ 0., -1., 3., -3., 1., 0., 0., 0., 0., 0.], [ 0., 0., -1., 3., -3., 1., 0., 0., 0., 0.], @@ -835,12 +867,12 @@ def get_sparse_fw_fin_diff_mat(n_data: int, differences: int) -> csc_matrix: # then, the dense finite difference matrix D is created from the forward difference # kernel - diff_kernel = calc_forward_diff_kernel(differences=differences) + diff_kernel = forward_finite_difference_kernel(differences=differences) offsets = np.arange(start=0, stop=diff_kernel.size, step=1, dtype=np.int64) return sp_diags( diagonals=diff_kernel, offsets=offsets, # type: ignore - shape=(n_data - diff_kernel.size + 1, n_data), + shape=(num_data - diff_kernel.size + 1, num_data), dtype=dtype, format="csc", ) @@ -952,23 +984,27 @@ def find_min_num_swaps(arr: np.ndarray): # the logarithm of the determinant is the sum of the logarithms of the diagonal # elements of the LU decomposition, but since L is unit lower triangular, only the # diagonal elements of U are considered - diagU = splu.U.diagonal() - logabsdet = np.log(np.abs(diagU)).sum() + u_diagonal = splu.U.diagonal() + logabsdet = np.log(np.abs(u_diagonal)).sum() # then, the sign is determined from the diagonal elements of U as well as the row # and column permutations # NOTE: odd number of negative elements/swaps leads to a negative sign - fact_sign = -1 if np.count_nonzero(diagU < 0.0) % 2 == 1 else 1 - row_sign = -1 if find_min_num_swaps(splu.perm_r) % 2 == 1 else 1 - col_sign = -1 if find_min_num_swaps(splu.perm_c) % 2 == 1 else 1 - sign = -1.0 if fact_sign * row_sign * col_sign < 0 else 1.0 + factorization_sign = -1 if np.count_nonzero(u_diagonal < 0.0) % 2 == 1 else 1 + row_permutation_sign = -1 if find_min_num_swaps(splu.perm_r) % 2 == 1 else 1 + column_permutation_sign = -1 if find_min_num_swaps(splu.perm_c) % 2 == 1 else 1 + total_sign = ( + -1.0 + if factorization_sign * row_permutation_sign * column_permutation_sign < 0 + else 1.0 + ) - return sign, logabsdet + return total_sign, logabsdet def calc_whittaker_smooth_log_marginal_likelihood_const_term( differences: int, - diff_mat: csc_matrix, + difference_matrix: csc_matrix, weight_vect: np.ndarray, ) -> float: """ @@ -992,52 +1028,56 @@ def calc_whittaker_smooth_log_marginal_likelihood_const_term( >>> import numpy as np >>> from tests.test_for_utils.utils_funcs import ( ... calc_whittaker_smooth_log_marginal_likelihood_const_term, - ... get_sparse_fw_fin_diff_mat, + ... get_sparse_forward_finite_difference_matrix, ... ) >>> # Generation of the weight matrix W and the finite difference matrix D >>> weights = np.array([0.5, 1.0, 0.5, 1.0, 0.5]) - >>> n_data, differences = weights.size, 1 - >>> diff_mat = get_sparse_fw_fin_diff_mat( - ... n_data=n_data, + >>> num_data, differences = weights.size, 1 + >>> difference_matrix_sparse = get_sparse_forward_finite_difference_matrix( + ... num_data=num_data, ... differences=differences, ... ) - >>> diff_mat_dense = diff_mat.toarray() + >>> difference_matrix_dense = difference_matrix_sparse.toarray() >>> # Test 1 with all weights being non-zero >>> # Calculation of the log pseudo-determinant of the weight matrix W >>> # since it is diagonal, the log-determinant is the sum of the logarithms of the >>> # diagonal elements - >>> log_pseudo_det_w = np.log(weights).sum() - >>> log_pseudo_det_w + >>> log_pseudo_determinant_w = np.log(weights).sum() + >>> log_pseudo_determinant_w -2.0794415416798357 >>> # Calculation of the log pseudo-determinant via the Cholesky decomposition of >>> # the product D @ D.T - >>> squ_diff_mat_chol = np.linalg.cholesky(diff_mat_dense @ diff_mat_dense.T) - >>> squ_diff_mat_chol + >>> squared_difference_matrix_chol = np.linalg.cholesky( + ... difference_matrix_dense @ difference_matrix_dense.T + ... ) + >>> squared_difference_matrix_chol array([[ 1.41421356, 0. , 0. , 0. ], [-0.70710678, 1.22474487, 0. , 0. ], [ 0. , -0.81649658, 1.15470054, 0. ], [ 0. , 0. , -0.8660254 , 1.11803399]]) >>> # the sum of the doubled logarithms of the main diagonal elements is the log >>> # pseudo-determinant of the matrix D.T @ D - >>> log_pseudo_det_dtd = 2.0 * np.log(np.diag(squ_diff_mat_chol)).sum() - >>> log_pseudo_det_dtd + >>> log_pseudo_determinant_dt_dot_d = ( + ... 2.0 * np.log(np.diag(squared_difference_matrix_chol)).sum() + ... ) + >>> log_pseudo_determinant_dt_dot_d 1.6094379124341003 >>> # Calculation of the theoretical constant term >>> logml_theoretical = ( - ... (n_data - differences) * np.log(2.0 * np.pi) - ... - log_pseudo_det_w - ... - log_pseudo_det_dtd + ... (num_data - differences) * np.log(2.0 * np.pi) + ... - log_pseudo_determinant_w + ... - log_pseudo_determinant_dt_dot_d ... ) >>> # Calculation of the constant term via the utility function >>> logml_via_function = calc_whittaker_smooth_log_marginal_likelihood_const_term( ... differences=differences, - ... diff_mat=diff_mat, + ... difference_matrix=difference_matrix_sparse, ... weight_vect=weights, ... ) >>> logml_via_function @@ -1049,19 +1089,19 @@ def calc_whittaker_smooth_log_marginal_likelihood_const_term( >>> weights[1] = 0.0 >>> weights[3] = 0.0 >>> nonzero_weights_flags = weights > 0.0 - >>> log_pseudo_det_w = np.log(weights[nonzero_weights_flags]).sum() + >>> log_pseudo_determinant_w = np.log(weights[nonzero_weights_flags]).sum() >>> # Calculation of the theoretical constant term >>> logml_theoretical = ( ... (nonzero_weights_flags.sum() - differences) * np.log(2.0 * np.pi) - ... - log_pseudo_det_w - ... - log_pseudo_det_dtd + ... - log_pseudo_determinant_w + ... - log_pseudo_determinant_dt_dot_d ... ) >>> # Calculation of the constant term via the utility function >>> logml_via_function = calc_whittaker_smooth_log_marginal_likelihood_const_term( ... differences=differences, - ... diff_mat=diff_mat, + ... difference_matrix=difference_matrix_sparse, ... weight_vect=weights, ... ) >>> logml_via_function @@ -1077,22 +1117,22 @@ def calc_whittaker_smooth_log_marginal_likelihood_const_term( zero_weight_tol = WhittakerLikeSolver._WhittakerLikeSolver__zero_weight_tol # type: ignore # for W, the log pseudo-determinant is calculated ... - w_nonzero_idxs = weight_vect > weight_vect.max() * zero_weight_tol - nnz_w = w_nonzero_idxs.sum() - w_log_pseudo_det = np.log(weight_vect[w_nonzero_idxs]).sum() + w_nonzero_indices = weight_vect > weight_vect.max() * zero_weight_tol + num_nonzero_w = w_nonzero_indices.sum() + w_log_pseudo_determinant = np.log(weight_vect[w_nonzero_indices]).sum() # ... followed by the log pseudo-determinant of the penalty matrix D.T @ D which is # equivalent to the determinant of the flipped matrix D @ D.T which is not # rank-deficient - _, penalty_log_pseudo_det = sparse_slogdet_from_superlu( - splu=spla.splu(A=diff_mat @ diff_mat.transpose()) + _, penalty_log_pseudo_determinant = sparse_slogdet_from_superlu( + splu=spla.splu(A=difference_matrix @ difference_matrix.transpose()) ) # from all of this, the constant term is computed return ( - (nnz_w - differences) * np.log(2.0 * np.pi) - - w_log_pseudo_det - - penalty_log_pseudo_det + (num_nonzero_w - differences) * np.log(2.0 * np.pi) + - w_log_pseudo_determinant + - penalty_log_pseudo_determinant ) @@ -1101,7 +1141,7 @@ def find_whittaker_smooth_opt_lambda_log_marginal_likelihood( weight_vect: np.ndarray, differences: int, log_lambda_bounds: Tuple[float, float], - n_opts: int, + num_optimizations: int, ) -> Tuple[float, float, np.ndarray]: """ Finds the optimal lambda value for a Whittaker smoother by maximising the log @@ -1131,8 +1171,9 @@ def get_smooth_solution( lam = exp(log_lam) - lhs_mat = lam * penalty_mat - lhs_mat += sp_diags( + # NOTE: lhs is "left-hand side" + lhs_matrix = lam * penalty_matrix + lhs_matrix += sp_diags( diagonals=weight_vect, offsets=0, shape=(b_vect.size, b_vect.size), @@ -1140,7 +1181,7 @@ def get_smooth_solution( ) # then, the solution is obtained - lhs_splu = spla.splu(A=lhs_mat) + lhs_splu = spla.splu(A=lhs_matrix) smooth_solution = lhs_splu.solve(rhs=weight_vect * b_vect) return ( @@ -1160,44 +1201,51 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: # matrix and the lambda value smooth_solution, lhs_splu, lam, log_lam = get_smooth_solution(log_lam=log_lam) - # the log-determinant of the lhs matrix is calculated - _, logdet_lhs = sparse_slogdet_from_superlu(splu=lhs_splu) + # the log-determinant of the left-hand-side matrix is calculated + # NOTE: lhs is "left-hand side" + _, log_determinant_lhs = sparse_slogdet_from_superlu(splu=lhs_splu) # finally, the log marginal likelihood is computed from: # 1) the weighted residual sum of squares - wrss = (weight_vect * np.square(b_vect - smooth_solution)).sum() + weighted_sum_of_squared_residuals = ( + weight_vect * np.square(b_vect - smooth_solution) + ).sum() # 2) the sum of squared penalties # NOTE: the order of multiplications for the following term is important because # the last multiplication is a matrix-vector resulting in another vector; # the other way around would result in another matrix followed by # a matrix-vector multiplication - pss = lam * (smooth_solution @ (penalty_mat @ smooth_solution)) + weights_sum_of_squared_penalties = lam * ( + smooth_solution @ (penalty_matrix @ smooth_solution) + ) # 3) the log-determinant of the lhs matrix and the constant term # NOTE: the sign is positive because the log marginal likelihood is maximised # and not minimised return 0.5 * ( - wrss - + pss + weighted_sum_of_squared_residuals + + weights_sum_of_squared_penalties - (b_vect.size - differences) * log_lam - + logdet_lhs + + log_determinant_lhs + logml_constant_term ) ### Pre-computations ### # then, some pre-computations are made - n_data = b_vect.size + num_data = b_vect.size log_lambda_min, log_lambda_max = log_lambda_bounds - diff_mat = get_sparse_fw_fin_diff_mat( - n_data=n_data, + difference_matrix_sparse = get_sparse_forward_finite_difference_matrix( + num_data=num_data, differences=differences, ) - penalty_mat = (diff_mat.transpose() @ diff_mat).tocsc() # type: ignore + penalty_matrix = ( + difference_matrix_sparse.transpose() @ difference_matrix_sparse + ).tocsc() # type: ignore logml_constant_term = calc_whittaker_smooth_log_marginal_likelihood_const_term( differences=differences, - diff_mat=diff_mat, + difference_matrix=difference_matrix_sparse, weight_vect=weight_vect, ) @@ -1208,7 +1256,7 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: opt_log_lam = brute( func=logml_target_func, ranges=((log_lambda_min, log_lambda_max),), - Ns=n_opts, + Ns=num_optimizations, finish=None, full_output=False, ) @@ -1222,7 +1270,7 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: opt_log_lam = brute( func=logml_target_func, ranges=((log_lambda_min, log_lambda_max),), - Ns=n_opts, + Ns=num_optimizations, finish=None, full_output=False, ) From fc8572fd64af788908e782f8b0cc7329c4cee44a Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 24 Jun 2024 22:01:46 +0200 Subject: [PATCH 114/118] style: [44] - renamed all variables and function of the `_whittaker_base` and all related tests to make them more concise - removed `combination` in `pytest.mark.parametrize` in favour of individual variables which are more concise - made error checks explicit for error messages and not only the Exception type to avoid unintended error behaviour --- chemotools/baseline/_air_pls.py | 2 +- chemotools/baseline/_ar_pls.py | 2 +- chemotools/smooth/_whittaker_smooth.py | 2 +- chemotools/utils/_finite_differences.py | 14 +- .../_whittaker_base/auto_lambda/__init__.py | 2 +- .../{logml.py => log_marginal_likelihood.py} | 44 ++-- .../auto_lambda/optimization.py | 44 ++-- .../_whittaker_base/auto_lambda/shared.py | 2 +- .../utils/_whittaker_base/initialisation.py | 69 +++--- chemotools/utils/_whittaker_base/main.py | 114 +++++---- chemotools/utils/_whittaker_base/misc.py | 6 +- chemotools/utils/_whittaker_base/solvers.py | 48 ++-- tests/test_for_utils/test_whittaker_base.py | 218 ++++++++++-------- tests/test_for_utils/utils_funcs.py | 4 +- tests/test_functionality.py | 64 ++--- 15 files changed, 358 insertions(+), 277 deletions(-) rename chemotools/utils/_whittaker_base/auto_lambda/{logml.py => log_marginal_likelihood.py} (86%) diff --git a/chemotools/baseline/_air_pls.py b/chemotools/baseline/_air_pls.py index ce3a05bb..214a35b8 100644 --- a/chemotools/baseline/_air_pls.py +++ b/chemotools/baseline/_air_pls.py @@ -139,7 +139,7 @@ def fit(self, X: np.ndarray, y=None) -> "AirPls": # the internal solver is set up self._setup_for_fit( - n_data=X.shape[1], + num_data=X.shape[1], differences=self.polynomial_order, lam=self.lam, child_class_name=self.__class__.__name__, diff --git a/chemotools/baseline/_ar_pls.py b/chemotools/baseline/_ar_pls.py index 90c92925..958747b5 100644 --- a/chemotools/baseline/_ar_pls.py +++ b/chemotools/baseline/_ar_pls.py @@ -150,7 +150,7 @@ def fit(self, X: np.ndarray, y=None) -> "ArPls": # the internal solver is setup self._setup_for_fit( - n_data=X.shape[1], + num_data=X.shape[1], differences=self.differences, lam=self.lam, child_class_name=self.__class__.__name__, diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 0d565991..6ef5da58 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -224,7 +224,7 @@ def fit(self, X: ndarray, y: None = None) -> "WhittakerSmooth": self.n_features_in_ = X.shape[1] # ... and all the required attributes for fitting self._setup_for_fit( - n_data=self.n_features_in_, + num_data=self.n_features_in_, lam=self.lam, differences=self.differences, child_class_name=self.__class__.__name__, diff --git a/chemotools/utils/_finite_differences.py b/chemotools/utils/_finite_differences.py index 662dda78..68cb812d 100644 --- a/chemotools/utils/_finite_differences.py +++ b/chemotools/utils/_finite_differences.py @@ -323,18 +323,18 @@ def squared_forward_difference_matrix_banded( Returns ------- - squ_fw_fin_diff_mat_cho_banded : ndarray of shape (n_bands, n_data - differences + 1) or (n_bands, n_data) + squ_fw_fin_diff_mat_cho_banded : ndarray of shape (num_bands, num_data - differences + 1) or (n_bands, num_data) The squared forward finite differences matrix in the upper banded storage format used for LAPACK's banded Cholesky decomposition (see Notes for details). When ``orig_first`` is ``True``, the matrix has at maximum ``differences + 1`` - bands (rows) and ``n_data - differences + 1`` columns. + bands (rows) and ``num_data - differences + 1`` columns. Otherwise, the matrix has at maximum ``differences + 1`` bands (rows) and - ``n_data`` columns. + ``num_data`` columns. Raises ------ ValueError - If ``n_data`` is below ``differences + 1``, i.e., the kernel does not fit into + If ``num_data`` is below ``differences + 1``, i.e., the kernel does not fit into the data at least once. ValueError If ``differences`` is below 1. @@ -379,7 +379,7 @@ def squared_forward_difference_matrix_banded( try: check_scalar( num_data, - name="n_data", + name="num_data", target_type=Integral, min_val=differences + 1, include_boundaries="left", @@ -423,7 +423,7 @@ def estimate_noise_stddev( Parameters ---------- - series : ndarray of shape (n_data,) + series : ndarray of shape (num_data,) The series for which the noise standard deviation is estimated. differences : int, default=6 The order of the differences starting from 0 for the original curve, 1 for the @@ -494,7 +494,7 @@ def estimate_noise_stddev( Returns ------- - noise_stddev : ndarray of shape (n_data,) + noise_stddev : ndarray of shape (num_data,) The estimated noise standard deviation raised to ``power`` for each data point in the series. diff --git a/chemotools/utils/_whittaker_base/auto_lambda/__init__.py b/chemotools/utils/_whittaker_base/auto_lambda/__init__.py index eb50bf8b..5232d113 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/__init__.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/__init__.py @@ -7,7 +7,7 @@ class implementation. ### Imports ### -from chemotools.utils._whittaker_base.auto_lambda.logml import ( # noqa: F401 +from chemotools.utils._whittaker_base.auto_lambda.log_marginal_likelihood import ( # noqa: F401 get_log_marginal_likelihood, get_log_marginal_likelihood_constant_term, ) diff --git a/chemotools/utils/_whittaker_base/auto_lambda/logml.py b/chemotools/utils/_whittaker_base/auto_lambda/log_marginal_likelihood.py similarity index 86% rename from chemotools/utils/_whittaker_base/auto_lambda/logml.py rename to chemotools/utils/_whittaker_base/auto_lambda/log_marginal_likelihood.py index b8d1d250..3c698dd2 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/log_marginal_likelihood.py @@ -11,13 +11,15 @@ import numpy as np -from chemotools.utils import _banded_linalg as bla from chemotools.utils import _models -from chemotools.utils._whittaker_base.auto_lambda.shared import get_smooth_wrss +from chemotools.utils._banded_linalg import slogdet_lu_banded +from chemotools.utils._whittaker_base.auto_lambda.shared import ( + smooth_weighted_sum_of_squared_residuals, +) ### Constants ### -_LN_TWO_PI: float = 1.8378770664093453 # ln(2 * pi) +_LN_OF_TWO_PI: float = 1.8378770664093453 # ln(2 * pi) ### Type Aliases ### @@ -29,7 +31,7 @@ def get_log_marginal_likelihood_constant_term( differences: int, - penalty_mat_log_pseudo_det: float, + penalty_matrix_log_pseudo_determinant: float, weights: np.ndarray, zero_weight_tol: float, ) -> float: @@ -65,14 +67,14 @@ def get_log_marginal_likelihood_constant_term( # from the log pseudo-determinant of the weight matrix, i.e., the product of the # non-zero elements of the weight vector nonzero_w_flags = weights > weights.max() * zero_weight_tol - nnz_w = nonzero_w_flags.sum() - log_pseudo_det_w = np.log(weights[nonzero_w_flags]).sum() + num_nonzero_w = nonzero_w_flags.sum() + log_pseudo_determinant_w = np.log(weights[nonzero_w_flags]).sum() # the constant term of the log marginal likelihood is computed return ( - (nnz_w - differences) * _LN_TWO_PI - - log_pseudo_det_w - - penalty_mat_log_pseudo_det + (num_nonzero_w - differences) * _LN_OF_TWO_PI + - log_pseudo_determinant_w + - penalty_matrix_log_pseudo_determinant ) @@ -81,11 +83,11 @@ def get_log_marginal_likelihood( log_lam: float, lam: float, differences: int, - diff_kernel_flipped: np.ndarray, + difference_kernel_flipped: np.ndarray, rhs_b: np.ndarray, rhs_b_smooth: np.ndarray, weights: Union[float, np.ndarray], - w_plus_penalty_plus_n_samples_term: float, + w_plus_penalty_plus_num_samples_term: float, ) -> float: """ Computes the log marginal likelihood for the automatic fitting of the penalty @@ -105,13 +107,13 @@ def get_log_marginal_likelihood( The penalty weight lambda used for the smoothing, i.e., ``exp(log_lam)``. differences : int The order of the finite differences to use for the smoothing. - diff_kernel_flipped : ndarray of shape (differences + 1,) + difference_kernel_flipped : ndarray of shape (differences + 1,) The flipped forward finite differences kernel used for the smoothing. b, b_smooth : ndarray of shape (m,) The original series and its smoothed counterpart. w : float or ndarray of shape (m,) The weights to use for the smoothing. - w_plus_penalty_plus_n_samples_term : float + w_plus_penalty_plus_num_samples_term : float The last term of the log marginal likelihood that is constant since it involves the weights, the penalty matrix, and the number of data points which are all constant themselves (see the Notes for details). @@ -153,7 +155,7 @@ def get_log_marginal_likelihood( """ # noqa: E501 # first, the weighted Sum of Squared Residuals is computed ... - wrss = get_smooth_wrss( + weighted_sum_of_squared_residuals = smooth_weighted_sum_of_squared_residuals( rhs_b=rhs_b, rhs_b_smooth=rhs_b_smooth, weights=weights, @@ -162,14 +164,16 @@ def get_log_marginal_likelihood( # finite differences of the smoothed series # NOTE: ``np.convolve`` is used to compute the forward finite differences and # since it flips the provided kernel, an already flipped kernel is used - pss = ( + sum_of_squared_penalties = ( lam - * np.square(np.convolve(rhs_b_smooth, diff_kernel_flipped, mode="valid")).sum() + * np.square( + np.convolve(rhs_b_smooth, difference_kernel_flipped, mode="valid") + ).sum() ) # besides the determinant of the combined left hand side matrix has to be # computed from its decomposition - lhs_logdet_sign, lhs_logabsdet = bla.slogdet_lu_banded( + lhs_logdet_sign, lhs_logabsdet = slogdet_lu_banded( lub_factorization=factorization, ) @@ -177,11 +181,11 @@ def get_log_marginal_likelihood( # computed and returned if lhs_logdet_sign > 0.0: return -0.5 * ( - wrss - + pss + weighted_sum_of_squared_residuals + + sum_of_squared_penalties - (rhs_b.size - differences) * log_lam + lhs_logabsdet - + w_plus_penalty_plus_n_samples_term + + w_plus_penalty_plus_num_samples_term ) # otherwise, if the determinant is negative, the system is extremely diff --git a/chemotools/utils/_whittaker_base/auto_lambda/optimization.py b/chemotools/utils/_whittaker_base/auto_lambda/optimization.py index baf7b116..801cc0d4 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/optimization.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/optimization.py @@ -16,9 +16,9 @@ ### Constants ### -_LN_TEN: float = 2.302585092994046 # ln(10) -_half_log_decade: float = 0.5 * _LN_TEN -_X_ABS_LOG_TOL: float = 0.0049 # ~0.5% when converted from log to real +_LN_OF_A_DECADE: float = 2.302585092994046 # ln(10) +_half_ln_of_a_decade: float = 0.5 * _LN_OF_A_DECADE +_X_ABS_LN_TOL: float = 0.0049 # ~0.5% when converted from log to real ### Optimization Functions ### @@ -42,28 +42,34 @@ def get_optimized_lambda( # unless the search space spans less than 1 decade, i.e., ln(10) ~= 2.3, a grid # search is carried out to shrink the search space for the final optimization; # the grid is spanned with an integer number of steps of half a decade - log_low_bound, log_upp_bound = lam.log_auto_bounds - bound_log_diff = log_upp_bound - log_low_bound - if bound_log_diff > _LN_TEN: - target_best = float("inf") - n_steps = 1 + ceil(bound_log_diff / _half_log_decade) + log_lower_bound, log_upper_bound = lam.log_auto_bounds + bound_log_difference = log_upper_bound - log_lower_bound + if bound_log_difference > _LN_OF_A_DECADE: + target_best_so_far = float("inf") + num_steps = 1 + ceil(bound_log_difference / _half_ln_of_a_decade) # NOTE: the following ensures that the upper bound is not exceeded - step_size = bound_log_diff / (n_steps - 1) + step_size = bound_log_difference / (num_steps - 1) # all the trial values are evaluated and the best one is stored - for trial in range(0, n_steps): - log_lam_curr = log_low_bound + trial * step_size - target_curr = fun(log_lam_curr, *args) + for trial in range(0, num_steps): + log_lam_current = log_lower_bound + trial * step_size + target_current = fun(log_lam_current, *args) - if target_curr < target_best: - log_lam_best = log_lam_curr - target_best = target_curr + if target_current < target_best_so_far: + log_lam_best_so_far = log_lam_current + target_best_so_far = target_current # then, the bounds for the final optimization are shrunk to plus/minus half # a decade around the best trial value # NOTE: the following ensures that the bounds are not violated - log_low_bound = max(log_lam_best - _half_log_decade, log_low_bound) - log_upp_bound = min(log_lam_best + _half_log_decade, log_upp_bound) + log_lower_bound = max( + log_lam_best_so_far - _half_ln_of_a_decade, + log_lower_bound, + ) + log_upper_bound = min( + log_lam_best_so_far + _half_ln_of_a_decade, + log_upper_bound, + ) # finally, a scalar optimization is performed # NOTE: since the optimization is carried out over the log of lambda, the @@ -71,9 +77,9 @@ def get_optimized_lambda( return exp( minimize_scalar( fun=fun, - bounds=(log_low_bound, log_upp_bound), + bounds=(log_lower_bound, log_upper_bound), args=args, method="bounded", - options={"xatol": _X_ABS_LOG_TOL}, + options={"xatol": _X_ABS_LN_TOL}, ).x ) diff --git a/chemotools/utils/_whittaker_base/auto_lambda/shared.py b/chemotools/utils/_whittaker_base/auto_lambda/shared.py index 0f95c8eb..1760bab1 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/shared.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/shared.py @@ -22,7 +22,7 @@ ### Functions ### -def get_smooth_wrss( +def smooth_weighted_sum_of_squared_residuals( rhs_b: np.ndarray, rhs_b_smooth: np.ndarray, weights: Union[float, np.ndarray], diff --git a/chemotools/utils/_whittaker_base/initialisation.py b/chemotools/utils/_whittaker_base/initialisation.py index abffe877..bcdf3406 100644 --- a/chemotools/utils/_whittaker_base/initialisation.py +++ b/chemotools/utils/_whittaker_base/initialisation.py @@ -10,9 +10,17 @@ import numpy as np -from chemotools.utils import _banded_linalg as bla -from chemotools.utils import _finite_differences as fdiff from chemotools.utils import _models +from chemotools.utils._banded_linalg import ( + LAndUBandCounts, + convert_upper_chol_banded_to_lu_banded_storage, + lu_banded, + slogdet_lu_banded, +) +from chemotools.utils._finite_differences import ( + forward_finite_difference_kernel, + squared_forward_difference_matrix_banded, +) from chemotools.utils._types import RealNumeric ### Type Aliases ### @@ -82,12 +90,12 @@ def get_checked_lambda(lam: Any) -> _models.WhittakerSmoothLambda: ) -def get_squ_fw_diff_mat_banded( - n_data: int, +def get_squared_forward_finite_difference_matrix_banded( + num_data: int, differences: int, - orig_first: bool, + original_first: bool, dtype: Type, -) -> Tuple[bla.LAndUBandCounts, np.ndarray]: +) -> Tuple[LAndUBandCounts, np.ndarray]: """ Returns the squared forward finite difference penalty matrix ``D.T @ D`` or its "flipped" counterpart ``D @ D.T`` in the banded storage format used for LAPACK's @@ -99,29 +107,36 @@ def get_squ_fw_diff_mat_banded( # NOTE: the matrix is returned with integer entries because integer computations # can be carried out at maximum precision; this has to be converted to # double precision for the LU decomposition - penalty_mat_banded = fdiff.squared_forward_difference_matrix_banded( - num_data=n_data, + penalty_matrix_banded = squared_forward_difference_matrix_banded( + num_data=num_data, differences=differences, - original_first=orig_first, + original_first=original_first, ).astype(dtype) # ... and cast to the banded storage format for LAPACK's LU decomposition - return bla.convert_upper_chol_banded_to_lu_banded_storage(ab=penalty_mat_banded) + return convert_upper_chol_banded_to_lu_banded_storage(ab=penalty_matrix_banded) -def get_flipped_fw_diff_kernel(differences: int, dtype: Type) -> np.ndarray: +def get_flipped_fw_diff_kernel( + differences: int, + dtype: Type, +) -> np.ndarray: """ Returns the flipped forward finite difference kernel for the specified difference order. """ - return np.flip( - fdiff.forward_finite_difference_kernel(differences=differences) - ).astype(dtype) + return np.flip(forward_finite_difference_kernel(differences=differences)).astype( + dtype + ) -def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> float: +def get_penalty_log_pseudo_determinant( + num_data: int, + differences: int, + dtype: Type, +) -> float: """ Computes the natural logarithm of the pseudo-determinant of the squared forward finite differences matrix ``D.T @ D`` which is necessary for the calculation of @@ -129,7 +144,7 @@ def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> fl Returns ------- - log_pseudo_det : float + log_pseudo_determinant : float The natural logarithm of the pseudo-determinant of the penalty matrix. Raises @@ -157,19 +172,21 @@ def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> fl """ # the flipped penalty matrix D @ D.T is computed - flipped_l_and_u, flipped_penalty_matb = get_squ_fw_diff_mat_banded( - n_data=n_data, - differences=differences, - orig_first=True, - dtype=dtype, + flipped_l_and_u, flipped_penalty_matrix_banded = ( + get_squared_forward_finite_difference_matrix_banded( + num_data=num_data, + differences=differences, + original_first=True, + dtype=dtype, + ) ) # the pseudo-determinant is computed from the partially pivoted LU decomposition # of the flipped penalty matrix - log_pseudo_det_sign, log_pseudo_det = bla.slogdet_lu_banded( - lub_factorization=bla.lu_banded( + log_pseudo_det_sign, log_pseudo_determinant = slogdet_lu_banded( + lub_factorization=lu_banded( l_and_u=flipped_l_and_u, - ab=flipped_penalty_matb, + ab=flipped_penalty_matrix_banded, check_finite=False, ), ) @@ -177,14 +194,14 @@ def get_penalty_log_pseudo_det(n_data: int, differences: int, dtype: Type) -> fl # if the sign of the pseudo-determinant is positive, the log pseudo-determinant # is returned if log_pseudo_det_sign > 0.0: - return log_pseudo_det + return log_pseudo_determinant # otherwise, if is negative, the penalty matrix is extremely ill-conditioned and # the automatic fitting of the penalty weight is not possible raise RuntimeError( f"\nThe pseudo-determinant of the penalty D.T @ D matrix is negative, " f"indicating that the system is extremely ill-conditioned.\n" - f"Automatic fitting for {n_data} data points and difference order " + f"Automatic fitting for {num_data} data points and difference order " f"{differences} is not possible.\n" f"Please consider reducing the number of data points to smooth by, e.g., " f"binning or lowering the difference order." diff --git a/chemotools/utils/_whittaker_base/main.py b/chemotools/utils/_whittaker_base/main.py index 5aae7e70..373c8fa1 100644 --- a/chemotools/utils/_whittaker_base/main.py +++ b/chemotools/utils/_whittaker_base/main.py @@ -17,10 +17,21 @@ from chemotools._runtime import PENTAPY_AVAILABLE from chemotools.utils import _models from chemotools.utils._banded_linalg import LAndUBandCounts -from chemotools.utils._whittaker_base import auto_lambda as auto -from chemotools.utils._whittaker_base import initialisation as init -from chemotools.utils._whittaker_base import solvers +from chemotools.utils._whittaker_base.auto_lambda import ( + _Factorization, + get_log_marginal_likelihood, + get_log_marginal_likelihood_constant_term, + get_optimized_lambda, +) +from chemotools.utils._whittaker_base.initialisation import ( + _LambdaSpecs, + get_checked_lambda, + get_flipped_fw_diff_kernel, + get_penalty_log_pseudo_determinant, + get_squared_forward_finite_difference_matrix_banded, +) from chemotools.utils._whittaker_base.misc import get_weight_generator +from chemotools.utils._whittaker_base.solvers import solve_normal_equations ### Class Implementation ### @@ -34,7 +45,7 @@ class WhittakerLikeSolver: Attributes ---------- - n_data_ : int + num_data_ : int The number of data points within the series to smooth. It is equivalent to ``n_features_in_``, but it was renamed to be allow for definition after the initialisation. @@ -43,24 +54,24 @@ class WhittakerLikeSolver: smooth estimate of the ``m``-th order derivative, this should be set to at least ``m + 2``. For higher orders, the systems to solve tend to get numerically instable, - especially when ``n_data_`` grows large and high values for ``lam_`` are used. + especially when ``num_data_`` grows large and high values for ``lam_`` are used. Values below 1 are not allowed. - _lam_inter_ : WhittakerSmoothLambda + _lam_internal_ : WhittakerSmoothLambda The internal representation of the lambda parameter to use for the smoothing, a.k.a. the penalty weight or smoothing parameter. It is internally stored as an instance of the dataclass :class:`WhittakerSmoothLambda`. _l_and_u_ : (int, int) The number of sub- (first) and superdiagonals (second element) of the final matrix to solve for smoothing. Both elements will equal ``differences_``. - _diff_kernel_flipped_ : ndarray of shape (0, ) or (differences + 1,) + _difference_kernel_flipped_ : ndarray of shape (0, ) or (differences + 1,) The flipped kernel to use for the forward finite differences. It is only required for the automatic fitting of the lambda parameter by maximizing the log marginal likelihood, i.e., when ``lam_ == WhittakerSmoothMethods.LOG_MARGINAL_LIKELIHOOD``. Flipping is required due to NumPy's definition of convolution. - _penalty_mat_banded_ : ndarray of shape (n_data - differences + 1, n_data - differences + 1) + _penalty_matrix_banded_ : ndarray of shape (num_data - differences + 1, num_data - differences + 1) The squared forward finite differences matrix ``D.T @ D`` stored in the banded storage format used for LAPACK's banded LU decomposition. - _penalty_mat_log_pseudo_det_ : float + _penalty_matrix_log_pseudo_determinant_ : float The natural logarithm of the pseudo-determinant of the squared forward finite differences matrix ``D.T @ D`` which is used for the automatic fitting of the lambda parameter by maximizing the log marginal likelihood, i.e., when @@ -97,9 +108,9 @@ def __init__(self) -> None: # pragma: no cover def _setup_for_fit( self, - n_data: int, + num_data: int, differences: int, - lam: init._LambdaSpecs, + lam: _LambdaSpecs, child_class_name: str, ) -> None: """ @@ -111,11 +122,9 @@ def _setup_for_fit( """ # the input arguments are stored and validated - self.n_data_: int = n_data + self.num_data_: int = num_data self.differences_: int = differences - self._lam_inter_: _models.WhittakerSmoothLambda = init.get_checked_lambda( - lam=lam - ) + self._lam_internal_: _models.WhittakerSmoothLambda = get_checked_lambda(lam=lam) self.__child_class_name: str = child_class_name # if the difference order exceeds 2, a warning is issued because then the @@ -133,32 +142,38 @@ def _setup_for_fit( # the squared forward finite difference matrix D.T @ D is computed in band # storage format for LAPACK's banded LU decomposition self._l_and_u_: LAndUBandCounts - self._penalty_mat_banded_: np.ndarray - self._l_and_u_, self._penalty_mat_banded_ = init.get_squ_fw_diff_mat_banded( - n_data=self.n_data_, - differences=self.differences_, - orig_first=False, - dtype=self.__dtype, + self._penalty_matrix_banded_: np.ndarray + self._l_and_u_, self._penalty_matrix_banded_ = ( + get_squared_forward_finite_difference_matrix_banded( + num_data=self.num_data_, + differences=self.differences_, + original_first=False, + dtype=self.__dtype, + ) ) # if the penalty weight is fitted automatically by maximization of the # log marginal likelihood, the natural logarithm of the pseudo-determinant of # D.T @ D is pre-computed together with the forward finite difference kernel - self._diff_kernel_flipped_: np.ndarray = np.ndarray([], dtype=self.__dtype) - self._penalty_mat_log_pseudo_det_: float = float("nan") - if self._lam_inter_.fit_auto and self._lam_inter_.method_used in { + self._difference_kernel_flipped_: np.ndarray = np.ndarray( + [], dtype=self.__dtype + ) + self._penalty_matrix_log_pseudo_determinant_: float = float("nan") + if self._lam_internal_.fit_auto and self._lam_internal_.method_used in { _models.WhittakerSmoothMethods.LOGML, }: # NOTE: the kernel is also returned with integer entries because integer # computations can be carried out at maximum precision - self._diff_kernel_flipped_ = init.get_flipped_fw_diff_kernel( + self._difference_kernel_flipped_ = get_flipped_fw_diff_kernel( differences=self.differences_, dtype=self.__dtype, ) - self._penalty_mat_log_pseudo_det_ = init.get_penalty_log_pseudo_det( - n_data=self.n_data_, - differences=self.differences_, - dtype=self.__dtype, + self._penalty_matrix_log_pseudo_determinant_ = ( + get_penalty_log_pseudo_determinant( + num_data=self.num_data_, + differences=self.differences_, + dtype=self.__dtype, + ) ) # finally, Pentapy is enabled if available, the number of differences is 2, @@ -167,7 +182,7 @@ def _setup_for_fit( PENTAPY_AVAILABLE and self.differences_ == 2 and self.__allow_pentapy - and not self._lam_inter_.fit_auto + and not self._lam_internal_.fit_auto ) ### Solver Methods ### @@ -178,7 +193,7 @@ def _solve( lam: float, rhs_b_weighted: np.ndarray, weights: Union[float, np.ndarray], - ) -> tuple[np.ndarray, _models.BandedSolvers, auto._Factorization]: + ) -> tuple[np.ndarray, _models.BandedSolvers, _Factorization]: """ Internal wrapper for the solver methods to solve the linear system of equations for the Whittaker-like smoother. @@ -192,11 +207,11 @@ def _solve( """ # noqa: E501 - return solvers.solve_normal_equations( + return solve_normal_equations( lam=lam, differences=self.differences_, l_and_u=self._l_and_u_, - penalty_mat_banded=self._penalty_mat_banded_, + penalty_matrix_banded=self._penalty_matrix_banded_, rhs_b_weighted=rhs_b_weighted, weights=weights, pentapy_enabled=self._pentapy_enabled_, @@ -233,16 +248,16 @@ def _marginal_likelihood_objective( # finally, the log marginal likelihood is computed and returned (negative since # the objective function is minimized, but the log marginal likelihood is # to be maximized) - return (-1.0) * auto.get_log_marginal_likelihood( + return (-1.0) * get_log_marginal_likelihood( factorization=factorization, # type: ignore log_lam=log_lam, # type: ignore lam=lam, differences=self.differences_, - diff_kernel_flipped=self._diff_kernel_flipped_, + difference_kernel_flipped=self._difference_kernel_flipped_, rhs_b=rhs_b, rhs_b_smooth=b_smooth, weights=weights, - w_plus_penalty_plus_n_samples_term=w_plus_penalty_plus_n_samples_term, + w_plus_penalty_plus_num_samples_term=w_plus_penalty_plus_n_samples_term, ) ### Solver management methods ### @@ -261,7 +276,7 @@ def _solve_single_b_fixed_lam( # if no value was provided for the penalty weight lambda, the respective class # attribute is used instead - lam = self._lam_inter_.fixed_lambda if lam is None else lam + lam = self._lam_internal_.fixed_lambda if lam is None else lam # the weights and the weighted series are computed depending on whether weights # are provided or not @@ -313,18 +328,18 @@ def _solve_single_b_auto_lam_logml( ) # the term that is constant for the log marginal likelihood is computed - w_plus_n_samples_term = auto.get_log_marginal_likelihood_constant_term( + w_plus_num_samples_term = get_log_marginal_likelihood_constant_term( differences=self.differences_, - penalty_mat_log_pseudo_det=self._penalty_mat_log_pseudo_det_, + penalty_matrix_log_pseudo_determinant=self._penalty_matrix_log_pseudo_determinant_, weights=weights, zero_weight_tol=self.__zero_weight_tol, ) # the optimization of the log marginal likelihood is carried out - opt_lambda = auto.get_optimized_lambda( + opt_lambda = get_optimized_lambda( fun=self._marginal_likelihood_objective, - lam=self._lam_inter_, - args=(rhs_b, weights, w_plus_n_samples_term), + lam=self._lam_internal_, + args=(rhs_b, weights, w_plus_num_samples_term), ) # the optimal penalty weight lambda is returned together with the smoothed @@ -355,7 +370,7 @@ def _solve_multiple_b( # Case 1: no weights are provided if weights is None: X_smooth, _, _ = self._solve( - lam=self._lam_inter_.fixed_lambda, + lam=self._lam_internal_.fixed_lambda, rhs_b_weighted=X.transpose(), weights=1.0, ) @@ -363,14 +378,17 @@ def _solve_multiple_b( # Case 2: weights are provided else: X_smooth, _, _ = self._solve( - lam=self._lam_inter_.fixed_lambda, + lam=self._lam_internal_.fixed_lambda, rhs_b_weighted=(X * weights).transpose(), weights=weights[0, ::], ) return ( X_smooth.transpose(), - np.full(shape=(X.shape[0],), fill_value=self._lam_inter_.fixed_lambda), + np.full( + shape=(X.shape[0],), + fill_value=self._lam_internal_.fixed_lambda, + ), ) ### Main Solver Entry Point ### @@ -418,7 +436,7 @@ def _whittaker_solve( # if multiple x with the same weights are to be solved for fixed lambda, this # can be done more efficiently by leveraging LAPACK'S (not pentapy's) ability to # perform multiple solves from the same inversion at once - if use_same_w_for_all and not self._lam_inter_.fit_auto: + if use_same_w_for_all and not self._lam_internal_.fit_auto: return self._solve_multiple_b(X=X, weights=weights) # otherwise, the solution of the linear system of equations is computed for @@ -429,12 +447,12 @@ def _whittaker_solve( _models.WhittakerSmoothMethods.FIXED: self._solve_single_b_fixed_lam, _models.WhittakerSmoothMethods.LOGML: self._solve_single_b_auto_lam_logml, } - smooth_method = smooth_method_assignment[self._lam_inter_.method_used] + smooth_method = smooth_method_assignment[self._lam_internal_.method_used] # then, the solution is computed for each series by means of a loop X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) - w_gen = get_weight_generator(weights=weights, n_series=X.shape[0]) + w_gen = get_weight_generator(weights=weights, num_series=X.shape[0]) for iter_i, (x_vect, wght) in enumerate(zip(X, w_gen)): X_smooth[iter_i], lam[iter_i] = smooth_method( rhs_b=x_vect, diff --git a/chemotools/utils/_whittaker_base/misc.py b/chemotools/utils/_whittaker_base/misc.py index d6a191ce..6a61d2ff 100644 --- a/chemotools/utils/_whittaker_base/misc.py +++ b/chemotools/utils/_whittaker_base/misc.py @@ -15,7 +15,7 @@ def get_weight_generator( weights: Any, - n_series: int, + num_series: int, ) -> Generator[Union[float, np.ndarray], None, None]: """ Generates a generator that yields the weights for each series in a series matrix @@ -32,12 +32,12 @@ def get_weight_generator( # Case 1: No weights if weights is None: - for _ in range(n_series): + for _ in range(num_series): yield 1.0 # Case 2: 2D weights elif weights.ndim == 2: - for idx in range(0, n_series): + for idx in range(0, num_series): yield weights[idx] # Case 3: Invalid weights diff --git a/chemotools/utils/_whittaker_base/solvers.py b/chemotools/utils/_whittaker_base/solvers.py index e8bd2f80..5a4bfe1c 100644 --- a/chemotools/utils/_whittaker_base/solvers.py +++ b/chemotools/utils/_whittaker_base/solvers.py @@ -12,17 +12,19 @@ import numpy as np from chemotools._runtime import PENTAPY_AVAILABLE -from chemotools.utils import _banded_linalg as bla -from chemotools.utils import _models +from chemotools.utils._banded_linalg import LAndUBandCounts, lu_banded, lu_solve_banded +from chemotools.utils._models import ( + BandedLUFactorization, + BandedPentapyFactorization, + BandedSolvers, +) if PENTAPY_AVAILABLE: import pentapy as pp ### Type Aliases ### -_Factorization = Union[ - _models.BandedLUFactorization, _models.BandedPentapyFactorization -] +_Factorization = Union[BandedLUFactorization, BandedPentapyFactorization] ### Functions ### @@ -59,9 +61,11 @@ def solve_pentapy( # NOTE: the solutions are first written into the rows of the solution matrix # because row-access is more efficient for C-contiguous arrays; # afterwards, the solution matrix is transposed - solution = np.empty(shape=(rhs_b_weighted.shape[1], rhs_b_weighted.shape[0])) + solution_matrix = np.empty( + shape=(rhs_b_weighted.shape[1], rhs_b_weighted.shape[0]) + ) for iter_j in range(0, rhs_b_weighted.shape[1]): - solution[iter_j, ::] = pp.solve( + solution_matrix[iter_j, ::] = pp.solve( mat=lhs_a_banded, rhs=rhs_b_weighted[::, iter_j], is_flat=True, @@ -69,14 +73,14 @@ def solve_pentapy( solver=1, ) - return solution.transpose() + return solution_matrix.transpose() def solve_ppivoted_lu( - l_and_u: bla.LAndUBandCounts, + l_and_u: LAndUBandCounts, lhs_a_banded: np.ndarray, rhs_b_weighted: np.ndarray, -) -> tuple[np.ndarray, _models.BandedLUFactorization]: +) -> tuple[np.ndarray, BandedLUFactorization]: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with a partially pivoted LU decomposition. This is the same as solving the linear system @@ -88,13 +92,13 @@ def solve_ppivoted_lu( """ - lub_factorization = bla.lu_banded( + lub_factorization = lu_banded( l_and_u=l_and_u, ab=lhs_a_banded, check_finite=False, ) return ( - bla.lu_solve_banded( + lu_solve_banded( lub_factorization=lub_factorization, b=rhs_b_weighted, check_finite=False, @@ -107,12 +111,12 @@ def solve_ppivoted_lu( def solve_normal_equations( lam: float, differences: int, - l_and_u: bla.LAndUBandCounts, - penalty_mat_banded: np.ndarray, + l_and_u: LAndUBandCounts, + penalty_matrix_banded: np.ndarray, rhs_b_weighted: np.ndarray, weights: Union[float, np.ndarray], pentapy_enabled: bool, -) -> tuple[np.ndarray, _models.BandedSolvers, _Factorization]: +) -> tuple[np.ndarray, BandedSolvers, _Factorization]: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` where ``W`` is a diagonal matrix with the weights ``w`` on the main diagonal and ``D`` is @@ -129,7 +133,7 @@ def solve_normal_equations( The order of the finite differences to use for the smoothing. l_and_u : LAndUBandCounts The number of sub- and super-diagonals of ``penalty_mat_banded``. - penalty_mat_banded : ndarray of shape (2 * differences + 1, m) + penalty_matrix_banded : ndarray of shape (2 * differences + 1, m) The penalty matrix ``D.T @ D`` in the banded storage format used for LAPACK's banded LU decomposition. b_weighted : ndarray of shape (m,) or (m, n) @@ -180,7 +184,7 @@ def solve_normal_equations( # the banded storage format for the LAPACK LU decomposition is computed by # scaling the penalty matrix with the penalty weight lambda and then adding the # diagonal matrix with the weights - lhs_a_banded = lam * penalty_mat_banded + lhs_a_banded = lam * penalty_matrix_banded lhs_a_banded[differences, ::] += weights # the linear system of equations is solved with the most efficient method @@ -193,8 +197,8 @@ def solve_normal_equations( if np.isfinite(x).all(): return ( x, - _models.BandedSolvers.PENTAPY, - _models.BandedPentapyFactorization(), + BandedSolvers.PENTAPY, + BandedPentapyFactorization(), ) # Case 2: LU decomposition (final fallback for pentapy) @@ -206,14 +210,14 @@ def solve_normal_equations( ) return ( x, - _models.BandedSolvers.PIVOTED_LU, + BandedSolvers.PIVOTED_LU, lub_factorization, ) except np.linalg.LinAlgError: - available_solvers = f"{_models.BandedSolvers.PIVOTED_LU}" + available_solvers = f"{BandedSolvers.PIVOTED_LU}" if pentapy_enabled: - available_solvers = f"{_models.BandedSolvers.PENTAPY}, {available_solvers}" + available_solvers = f"{BandedSolvers.PENTAPY}, {available_solvers}" raise RuntimeError( f"\nAll available solvers ({available_solvers}) failed to solve the " diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py index 7eca6c95..bbeeab64 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/test_for_utils/test_whittaker_base.py @@ -7,16 +7,18 @@ ### Imports ### from math import log -from typing import Any, Tuple, Type, Union +from typing import Any, Tuple, Union import numpy as np import pytest from chemotools.utils import _models -from chemotools.utils._whittaker_base.auto_lambda.shared import get_smooth_wrss +from chemotools.utils._whittaker_base.auto_lambda.shared import ( + smooth_weighted_sum_of_squared_residuals, +) from chemotools.utils._whittaker_base.initialisation import ( get_checked_lambda, - get_penalty_log_pseudo_det, + get_penalty_log_pseudo_determinant, ) from chemotools.utils._whittaker_base.main import WhittakerLikeSolver from chemotools.utils._whittaker_base.misc import get_weight_generator @@ -43,7 +45,7 @@ @pytest.mark.parametrize( - "combination", + "lam, expected_result", [ ( # Number 0 (fixed float) 100.0, @@ -159,36 +161,27 @@ ), ( # Number 10 (wrong length tuple) (100.0, 10_000.0), - ValueError, + ValueError("must be a tuple of three elements"), ), ( # Number 11 (wrong type) "error", - TypeError, + TypeError("must be an integer, a float, a tuple of"), ), ], ) def test_get_checked_lambda( - combination: Tuple[ - _LambdaSpecsOrFlawed, Union[ExpectedWhittakerSmoothLambda, Type[Exception]] - ] + lam: _LambdaSpecsOrFlawed, + expected_result: Union[ExpectedWhittakerSmoothLambda, Exception], ) -> None: """ Tests the function that casts a penalty weight lambda to the respective dataclass. - The ``combination`` parameter defines - - - the lambda specification to be used and - - the expected result (will be an exception if the input should be considered - invalid by the function). - """ - # the input parameters are unpacked - lam, expected_result = combination - # if the expected output is an exception, the test is run in a context manager - if not isinstance(expected_result, ExpectedWhittakerSmoothLambda): - with pytest.raises(expected_result): + if isinstance(expected_result, Exception): + error_catch_phrase = str(expected_result) + with pytest.raises(type(expected_result), match=error_catch_phrase): get_checked_lambda(lam=lam) return @@ -207,53 +200,49 @@ def test_get_checked_lambda( @pytest.mark.parametrize( - "combination", + "weights, expected_output", [ - (None, 1.0), # Number 0 - ( # Number 1 + (None, 1.0), # Number 0 (no weights) + ( # Number 1 (2D weights) np.ones(shape=(10, 1_000), dtype=np.float64), np.ones(shape=(1_000), dtype=np.float64), ), - ( # Number 2 + ( # Number 2 (2D weights out of bounds) np.ones(shape=(5, 1_000), dtype=np.float64), - IndexError, + IndexError("is out of bounds for axis 0 with size"), ), - ( # Number 3 + ( # Number 3 (1D weights) np.ones(shape=(1_000), dtype=np.float64), - ValueError, + ValueError("If provided as an Array, the weights must be a 2D-Array"), ), - ( # Number 4 + ( # Number 4 (3D weights) np.ones(shape=(1, 5, 1_000), dtype=np.float64), - ValueError, + ValueError("If provided as an Array, the weights must be a 2D-Array"), + ), + ( # Number 5 (wrong type) + "error", + TypeError("must either be None or a NumPy-2D-Array"), ), - ("error", TypeError), # Number 5 ], ) def test_weight_generator_identical_weights( - combination: Tuple[Any, Union[np.ndarray, float, Type[Exception]]] + weights: Any, + expected_output: Union[np.ndarray, float, Exception], ) -> None: """ Tests the weight generator when provided with weights that are identical for all signals. - The ``combination`` parameter defines - - - the weights to be used and - - the expected output at each iteration (will be an exception if the input should - be considered invalid by the function). - """ - # the input parameters are unpacked - weights, expected_output = combination - # the number of series is defined - n_series = 10 + num_series = 10 # if the expected output is an exception, the test is run in a context manager - if not isinstance(expected_output, (np.ndarray, float, int)): - with pytest.raises(expected_output): - for _ in get_weight_generator(weights=weights, n_series=n_series): + if isinstance(expected_output, Exception): + error_catch_phrase = str(expected_output) + with pytest.raises(type(expected_output), match=error_catch_phrase): + for _ in get_weight_generator(weights=weights, num_series=num_series): pass return @@ -261,14 +250,14 @@ def test_weight_generator_identical_weights( # otherwise, the output is compared to the expected output # Case 1: the expected output is a scalar if isinstance(expected_output, (float, int)): - for wght in get_weight_generator(weights=weights, n_series=n_series): + for wght in get_weight_generator(weights=weights, num_series=num_series): assert isinstance(wght, (float, int)) assert wght == expected_output return # Case 2: the expected output is an array - for wght in get_weight_generator(weights=weights, n_series=n_series): + for wght in get_weight_generator(weights=weights, num_series=num_series): assert isinstance(wght, np.ndarray) assert np.array_equal(wght, expected_output) @@ -288,54 +277,68 @@ def test_weight_generator_different_weights() -> None: [11.0, 12.0, 13.0, 14.0, 15.0], ] ) - weights_ref = weights.copy() + weights_reference = weights.copy() # the generator is tested - for idx, wght in enumerate(get_weight_generator(weights=weights, n_series=3)): - assert np.array_equal(wght, weights_ref[idx, ::]) + for index, wght in enumerate( + get_weight_generator(weights=weights, num_series=weights.shape[0]) + ): + assert np.array_equal(wght, weights_reference[index, ::]) -@pytest.mark.parametrize("combination", [(True, 244_9755_000.0), (False, 490_000.0)]) -def test_smooth_wrss(combination: Tuple[bool, float]) -> None: +@pytest.mark.parametrize( + "with_weights, weighted_residual_sum_of_squared_expected", + [ + (True, 244_9755_000.0), + (False, 490_000.0), + ], +) +def test_smooth_weighted_residual_sum_of_squares( + with_weights: bool, + weighted_residual_sum_of_squared_expected: float, +) -> None: """ Tests the weighted residual sum of squares calculation. - The ``combination`` parameter defines - - - whether weights are used (``True``) or not (``False``) and - - the expected weighted residual sum of squares. - """ - # the input parameters are unpacked - with_weights, wrss_expected = combination - # two series are generated where the difference between the elements is 7.0 np.random.seed(42) - n_data = 10_000 - a_signs = np.random.choice([-1.0, 1.0], size=(n_data,), replace=True) + num_data = 10_000 + a_signs = np.random.choice([-1.0, 1.0], size=(num_data,), replace=True) a_series = a_signs * 4.5 b_series = (-1.0) * a_signs * 2.5 # the weights are generated weights = ( - np.arange(start=0, stop=n_data, step=1.0, dtype=np.float64) + np.arange(start=0, stop=num_data, step=1.0, dtype=np.float64) if with_weights else 1.0 ) # the wrss is calculated ... - wrss = get_smooth_wrss(rhs_b=a_series, rhs_b_smooth=b_series, weights=weights) + weighted_sum_of_squared_residuals_chemotools = ( + smooth_weighted_sum_of_squared_residuals( + rhs_b=a_series, + rhs_b_smooth=b_series, + weights=weights, + ) + ) # ... and compared to the expected value with a very strict tolerance - assert np.isclose(wrss, wrss_expected, atol=1e-13, rtol=0.0) + assert np.isclose( + weighted_sum_of_squared_residuals_chemotools, + weighted_residual_sum_of_squared_expected, + atol=1e-13, + rtol=0.0, + ) # TODO: due to ill-conditioning, this is highly limited in the differences and number # of data points; in the future, this should be tackled by QR-decomposition for # extra numerical stability @pytest.mark.parametrize( - "differences_and_n_data_from_to", + "differences, num_data_from, num_data_to", [ (1, 0, 2_000), (1, 2_001, 4_000), @@ -350,17 +353,23 @@ def test_smooth_wrss(combination: Tuple[bool, float]) -> None: ], ) def test_penalty_log_pseudo_det_can_compute( - differences_and_n_data_from_to: Tuple[int, int, int] + differences: int, + num_data_from: int, + num_data_to: int, ) -> None: """ Tests the log pseudo-determinant of the penalty matrix for all the difference orders and number of data points. + The test is successful if the function does not raise an exception. """ - differences, n_data_from, n_data_to = differences_and_n_data_from_to - for nd in range(max(differences + 1, n_data_from), n_data_to + 1): - get_penalty_log_pseudo_det(n_data=nd, differences=differences, dtype=np.float64) + for num_data in range(max(differences + 1, num_data_from), num_data_to + 1): + get_penalty_log_pseudo_determinant( + num_data=num_data, + differences=differences, + dtype=np.float64, + ) # TODO: this test will not 100% reflect reality as intended; in the future this should @@ -368,7 +377,7 @@ def test_penalty_log_pseudo_det_can_compute( # right now, it is set to a number of data points that causes the intended # failure, but in the future, the condition number has to be used to detect # ill-conditioning -def test_penalty_log_pseudo_det_breaks_ill_conditioned() -> None: +def test_penalty_log_pseudo_determinant_breaks_ill_conditioned() -> None: """ Tests that the log pseudo-determinant of the penalty matrix breaks when the matrix is ill-conditioned. @@ -377,13 +386,18 @@ def test_penalty_log_pseudo_det_breaks_ill_conditioned() -> None: # the difference order and number of data points are set so high that the matrix # becomes ill-conditioned - n_data = 1_000 + num_data = 1_000 differences = 10 # the function is tested for breaking - with pytest.raises(RuntimeError): - get_penalty_log_pseudo_det( - n_data=n_data, differences=differences, dtype=np.float64 + with pytest.raises( + RuntimeError, + match="The pseudo-determinant of the penalty D.T @ D matrix is negative", + ): + get_penalty_log_pseudo_determinant( + num_data=num_data, + differences=differences, + dtype=np.float64, ) return @@ -410,23 +424,25 @@ def test_normal_condition_solve_breaks_ill_conditioned(with_pentapy: bool) -> No pytest.skip("Pentapy is not installed.") # a banded ill-conditioned matrix is created that has zeros on the diagonal - n_data = 10_000 + num_data = 10_000 differences = 2 - a_banded = np.ones(shape=(2 * differences + 1, n_data), dtype=np.float64) + a_banded = np.ones(shape=(2 * differences + 1, num_data), dtype=np.float64) a_banded[differences, :] = 0.0 # some further required variables are initialised lam = 1e100 - b_vect = np.ones(shape=(n_data,), dtype=np.float64) + b_vect = np.ones(shape=(num_data,), dtype=np.float64) weights = 0.0 # Test that the solver breaks - with pytest.raises(RuntimeError): + with pytest.raises( + RuntimeError, match="failed to solve the linear system of equations" + ): solve_normal_equations( lam=lam, differences=differences, l_and_u=(differences, differences), - penalty_mat_banded=a_banded, + penalty_matrix_banded=a_banded, rhs_b_weighted=b_vect, weights=weights, pentapy_enabled=with_pentapy, @@ -440,10 +456,13 @@ def test_whittakerlike_issues_warning_difference_order_too_high() -> None: """ - with pytest.warns(UserWarning): - whitt_base = WhittakerLikeSolver() - whitt_base._setup_for_fit( - n_data=500, + with pytest.warns( + UserWarning, + match="WARNING: With the current implementation, the numerical stability", + ): + whittaker_base = WhittakerLikeSolver() + whittaker_base._setup_for_fit( + num_data=500, differences=3, lam=_models.WhittakerSmoothLambda( bounds=(100.0, 10_000.0), @@ -470,10 +489,10 @@ def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( """ # the smoother is initialised ... - n_data = 500 - whitt_base = WhittakerLikeSolver() - whitt_base._setup_for_fit( - n_data=n_data, + num_data = 500 + whittaker_base = WhittakerLikeSolver() + whittaker_base._setup_for_fit( + num_data=num_data, differences=differences, lam=_models.WhittakerSmoothLambda( bounds=(100.0, 10_000.0), @@ -484,9 +503,12 @@ def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( # ... and the log marginal likelihood method is called without weights np.random.seed(42) - X = np.random.rand(n_data) - with pytest.raises(ValueError): - whitt_base._whittaker_solve( + X = np.random.rand(num_data) + with pytest.raises( + ValueError, + match="is only possible if weights are provided", + ): + whittaker_base._whittaker_solve( X=X, weights=None, use_same_w_for_all=same_weights_for_all, @@ -496,11 +518,11 @@ def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( @pytest.mark.parametrize("with_zero_weights", [True, False]) @pytest.mark.parametrize("same_weights_for_all", [True, False]) @pytest.mark.parametrize("differences", [1, 2]) -@pytest.mark.parametrize("n_series", [1, 5]) +@pytest.mark.parametrize("num_series", [1, 5]) def test_auto_lambda_log_marginal_likelihood( spectrum_whittaker_auto_lambda: np.ndarray, # noqa: F811 noise_level_whittaker_auto_lambda: np.ndarray, # noqa: F811 - n_series: int, + num_series: int, differences: int, same_weights_for_all: bool, with_zero_weights: bool, @@ -543,16 +565,16 @@ def test_auto_lambda_log_marginal_likelihood( # then, the weights are computed as the square of the inverse noise level ... weights = (1.0 / np.square(noise_level))[np.newaxis, ::] # ... and stacked as many times as required - weights = np.tile(weights, reps=(n_series, 1)) + weights = np.tile(weights, reps=(num_series, 1)) # then, the spectrum is repeated as many times as required - X = np.tile(spectrum_whittaker_auto_lambda[np.newaxis, ::], reps=(n_series, 1)) + X = np.tile(spectrum_whittaker_auto_lambda[np.newaxis, ::], reps=(num_series, 1)) # the smoothing is performed using the chemotools implementation lambda_bounds = (1e-15, 1e10) - whitt_base = WhittakerLikeSolver() - whitt_base._setup_for_fit( - n_data=X.shape[1], + whittaker_base = WhittakerLikeSolver() + whittaker_base._setup_for_fit( + num_data=X.shape[1], differences=differences, lam=_models.WhittakerSmoothLambda( bounds=lambda_bounds, @@ -560,7 +582,7 @@ def test_auto_lambda_log_marginal_likelihood( ), child_class_name="pytest_run", ) - _, lambda_opts = whitt_base._whittaker_solve( + _, lambda_opts = whittaker_base._whittaker_solve( X=X, weights=weights, use_same_w_for_all=same_weights_for_all, diff --git a/tests/test_for_utils/utils_funcs.py b/tests/test_for_utils/utils_funcs.py index fc1b9af5..006b5a00 100644 --- a/tests/test_for_utils/utils_funcs.py +++ b/tests/test_for_utils/utils_funcs.py @@ -1216,7 +1216,7 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: # the last multiplication is a matrix-vector resulting in another vector; # the other way around would result in another matrix followed by # a matrix-vector multiplication - weights_sum_of_squared_penalties = lam * ( + sum_of_squared_penalties = lam * ( smooth_solution @ (penalty_matrix @ smooth_solution) ) @@ -1225,7 +1225,7 @@ def logml_target_func(log_lam: Union[np.ndarray, float]) -> float: # and not minimised return 0.5 * ( weighted_sum_of_squared_residuals - + weights_sum_of_squared_penalties + + sum_of_squared_penalties - (b_vect.size - differences) * log_lam + log_determinant_lhs + logml_constant_term diff --git a/tests/test_functionality.py b/tests/test_functionality.py index e843216c..e6a0f82f 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -42,14 +42,14 @@ from tests.fixtures import spectrum -@pytest.mark.parametrize("n_samples", [1, 5]) +@pytest.mark.parametrize("num_series", [1, 5]) def test_air_pls( spectrum, reference_airpls, # noqa: F811 - n_samples: int, -): + num_series: int, +) -> None: # Arrange - reps = (n_samples, 1) + repetitions = (num_series, 1) air_pls = AirPls(lam=100, polynomial_order=1, nr_iterations=15) # Act @@ -57,7 +57,7 @@ def test_air_pls( # Assert assert np.allclose( - spectrum_corrected[0], np.tile(reference_airpls, reps=reps), atol=1e-7 + spectrum_corrected[0], np.tile(reference_airpls, reps=repetitions), atol=1e-7 ) @@ -78,10 +78,14 @@ def test_air_pls( # FIXME: working with such a high ``atol`` indicates that the reference is not up to # date anymore -@pytest.mark.parametrize("n_samples", [1, 5]) -def test_ar_pls(spectrum_arpls, reference_arpls, n_samples: int): # noqa: F811 +@pytest.mark.parametrize("num_series", [1, 5]) +def test_ar_pls( + spectrum_arpls, # noqa: F811 + reference_arpls, # noqa: F811 + num_series: int, +) -> None: # Arrange - reps = (n_samples, 1) + repetitions = (num_series, 1) arpls = ArPls(lam=1e2, differences=2, ratio=0.0001) reference = np.array(spectrum_arpls) - np.array(reference_arpls) @@ -89,7 +93,9 @@ def test_ar_pls(spectrum_arpls, reference_arpls, n_samples: int): # noqa: F811 spectrum_corrected = arpls.fit_transform(spectrum_arpls) # Assert - assert np.allclose(spectrum_corrected[0], np.tile(reference, reps=reps), atol=1e-4) + assert np.allclose( + spectrum_corrected[0], np.tile(reference, reps=repetitions), atol=1e-4 + ) # FIXME: Deactivated because it fails; Issue created: @@ -688,7 +694,9 @@ def test_range_cut_by_wavenumber_with_polars_dataframe(): # Arrange wavenumbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] spectrum = pl.DataFrame(np.array([[10, 12, 14, 16, 14, 12, 10, 12, 14, 16]])) - range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output(transform='polars') + range_cut = RangeCut(start=2.5, end=7.9, wavenumbers=wavenumbers).set_output( + transform="polars" + ) # Act spectrum_corrected = range_cut.fit_transform(spectrum) @@ -813,25 +821,25 @@ def test_uniform_noise(): @pytest.mark.parametrize("same_weights_for_all", [True, False]) @pytest.mark.parametrize("with_weights", [True, False]) -@pytest.mark.parametrize("n_samples", [1, 5]) +@pytest.mark.parametrize("num_series", [1, 5]) def test_whittaker_smooth( spectrum, reference_whittaker, # noqa: F811 - n_samples: int, + num_series: int, with_weights: bool, same_weights_for_all: bool, -): +) -> None: # Arrange - reps = (n_samples, 1) + repetitions = (num_series, 1) whittaker_smooth = WhittakerSmooth() if with_weights and not same_weights_for_all: - weights = np.ones(shape=(n_samples, len(spectrum[0]))) + weights = np.ones(shape=(num_series, len(spectrum[0]))) elif with_weights and same_weights_for_all: weights = np.ones(shape=(len(spectrum[0]),)) else: weights = None - spectrum_to_fit_original = np.tile(spectrum, reps=reps) + spectrum_to_fit_original = np.tile(spectrum, reps=repetitions) spectrum_to_fit = spectrum_to_fit_original.copy() # Act @@ -843,15 +851,17 @@ def test_whittaker_smooth( # NOTE: the following test makes sure nothing was overwritten assert np.array_equal(spectrum_to_fit, spectrum_to_fit_original) assert np.allclose( - spectrum_corrected, np.tile(reference_whittaker, reps=reps), atol=1e-8 + spectrum_corrected, np.tile(reference_whittaker, reps=repetitions), atol=1e-8 ) @pytest.mark.parametrize("same_weights_for_all", [True, False]) @pytest.mark.parametrize("with_weights", [True, False]) -@pytest.mark.parametrize("n_samples", [1, 5]) +@pytest.mark.parametrize("num_series", [1, 5]) def test_whittaker_with_pentapy( - n_samples: int, with_weights: bool, same_weights_for_all: bool + num_series: int, + with_weights: bool, + same_weights_for_all: bool, ): # this test is skipped with a warning if pentapy is not installed if not PENTAPY_AVAILABLE: @@ -859,12 +869,12 @@ def test_whittaker_with_pentapy( # Arrange np.random.seed(42) - spectrum = np.random.rand(n_samples, 1000) + spectrum = np.random.rand(num_series, 1000) whittaker_smooth = WhittakerSmooth(lam=100.0, differences=2) weights = None if with_weights and not same_weights_for_all: - weights = np.ones(shape=(n_samples, len(spectrum[0]))) + weights = np.ones(shape=(num_series, len(spectrum[0]))) elif with_weights and same_weights_for_all: weights = np.ones(shape=(len(spectrum[0]),)) @@ -876,7 +886,7 @@ def test_whittaker_with_pentapy( # Assert with pentapy # NOTE: the weight is not correct since the test only checks the method solve_method = whittaker_smooth._solve( - lam=whittaker_smooth._lam_inter_.fixed_lambda, + lam=whittaker_smooth._lam_internal_.fixed_lambda, rhs_b_weighted=spectrum.transpose(), weights=1.0, )[1] @@ -891,7 +901,7 @@ def test_whittaker_with_pentapy( # Assert without pentapy # NOTE: the weight is not correct since the test only checks the method solve_method = whittaker_smooth._solve( - lam=whittaker_smooth._lam_inter_.fixed_lambda, + lam=whittaker_smooth._lam_internal_.fixed_lambda, rhs_b_weighted=spectrum.transpose(), weights=1.0, )[1] @@ -904,16 +914,16 @@ def test_whittaker_with_pentapy( ) @pytest.mark.parametrize("difference", [1, 2]) @pytest.mark.parametrize("fill_value", [-5.0, 0.0, 5.0]) -@pytest.mark.parametrize("size", [5_000]) +@pytest.mark.parametrize("num_data", [5_000]) def test_whittaker_constant_signal( - size: int, + num_data: int, fill_value: float, difference: int, log10_lam: float, ) -> None: # Arrange - spectrum = np.full(shape=(size,), fill_value=fill_value).reshape((1, -1)) + spectrum = np.full(shape=(num_data,), fill_value=fill_value).reshape((1, -1)) whittaker_smooth = WhittakerSmooth(lam=10.0**log10_lam, differences=difference) # Act @@ -924,6 +934,6 @@ def test_whittaker_constant_signal( assert np.allclose( spectrum_corrected[0], spectrum[0], - atol=size * np.finfo(np.float64).eps, # type: ignore + atol=num_data * np.finfo(np.float64).eps, # type: ignore rtol=1e-6, ) From 45b82242f1221856987a41308a58f479c0cda9e8 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 24 Jun 2024 22:31:06 +0200 Subject: [PATCH 115/118] style: [44] - added error message pattern matching to the tests for models and input check utility functions --- chemotools/utils/check_inputs.py | 4 +- tests/test_for_utils/test_check_inputs.py | 77 ++++++++++++----------- tests/test_for_utils/test_models.py | 69 +++++++++----------- 3 files changed, 75 insertions(+), 75 deletions(-) diff --git a/chemotools/utils/check_inputs.py b/chemotools/utils/check_inputs.py index 17489b59..1854b197 100644 --- a/chemotools/utils/check_inputs.py +++ b/chemotools/utils/check_inputs.py @@ -39,7 +39,9 @@ def check_weights( # now, the need to be checked for having the right shape weights_checked = check_array( - weights_checked, ensure_2d=True, force_all_finite=True + weights_checked, + ensure_2d=True, + force_all_finite=True, ) # afterwards, they are checked for having the right shape diff --git a/tests/test_for_utils/test_check_inputs.py b/tests/test_for_utils/test_check_inputs.py index 87b6a7a9..749e4399 100644 --- a/tests/test_for_utils/test_check_inputs.py +++ b/tests/test_for_utils/test_check_inputs.py @@ -5,7 +5,7 @@ ### Imports ### -from typing import Optional, Tuple, Type, Union +from typing import Optional, Tuple, Union import numpy as np import pytest @@ -16,7 +16,7 @@ @pytest.mark.parametrize( - "combination", + "weights, expected_result", [ ( # Number 0 (no weights; for all) None, @@ -36,41 +36,41 @@ ), ( # Number 4 (invalid 1D-weights with wrong column number; for all) np.array([1.0, 2.0, 3.0, 4.0]), - ValueError, + ValueError("Weights must have 3 columns, but they have"), ), ( # Number 5 (invalid 1D-weights with wrong column number; for all) np.array([1.0, 2.0]), - ValueError, + ValueError("Weights must have 3 columns, but they have"), ), ( # Number 6 (invalid 2D-weights with wrong column number; for all) np.array([[1.0, 2.0, 3.0, 4.0]]), - ValueError, + ValueError("Weights must have 3 columns, but they have"), ), ( # Number 7 (invalid 2D-weights with wrong column number; for all) np.array([[1.0, 2.0]]), - ValueError, + ValueError("Weights must have 3 columns, but they have"), ), ( # Number 8 (invalid 2D-weights with wrong row number; individual) np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), - ValueError, + ValueError("Weights must have either 1 or 3 rows, but they have"), ), ( # Number 9 (invalid 2D-weights with wrong row number; individual) np.array( [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0]] ), - ValueError, + ValueError("Weights must have either 1 or 3 rows, but they have"), ), ( # Number 10 (invalid 2D-weights with wrong row and column number; # individual) np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), - ValueError, + ValueError("Weights must have either 1 or 3 rows, but they have"), ), ( # Number 10 (invalid 2D-weights with wrong row and column number; # individual) np.array([[1.0, 2.0], [3.0, 4.0]]), - ValueError, + ValueError("Weights must have either 1 or 3 rows, but they have"), ), ( # Number 11 (invalid 2D-weights with wrong row and column number; @@ -83,80 +83,79 @@ [13.0, 14.0, 15.0, 16.0], ] ), - ValueError, + ValueError("Weights must have either 1 or 3 rows, but they have"), ), ( # Number 12 (invalid 2D-weights with wrong row and column number; # individual) np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]), - ValueError, + ValueError("Weights must have either 1 or 3 rows, but they have"), ), ( # Number 13 (invalid 1D-weights with negative entry; for all) np.array([1.0, 2.0, -1_000.0]), - ValueError, + ValueError("Weights may not be negative, but"), ), ( # Number 14 (invalid 2D-weights with negative entry; for all) np.array([[1.0, 2.0, -1_000.0]]), - ValueError, + ValueError("Weights may not be negative, but"), ), ( # Number 15 (invalid 2D-weights with negative entry; individual) np.array([[1.0, 2.0, 3.0], [4.0, 5.0, -1_000.0], [7.0, 8.0, 9.0]]), - ValueError, + ValueError("Weights may not be negative, but"), ), ( # Number 16 (invalid 1D-weights with NaN entry; for all) np.array([1.0, 2.0, np.nan]), - ValueError, + ValueError("Input contains NaN"), ), ( # Number 17 (invalid 2D-weights with NaN entry; for all) np.array([[1.0, 2.0, np.nan]]), - ValueError, + ValueError("Input contains NaN"), ), ( # Number 18 (invalid 2D-weights with NaN entry; individual) np.array([[1.0, 2.0, 3.0], [4.0, 5.0, np.nan], [7.0, 8.0, 9.0]]), - ValueError, + ValueError("Input contains NaN"), ), ( # Number 19 (invalid 1D-weights with inf entry; for all) np.array([1.0, 2.0, np.inf]), - ValueError, + ValueError("Input contains infinity or a value too large"), ), ( # Number 20 (invalid 2D-weights with inf entry; for all) np.array([[1.0, 2.0, np.inf]]), - ValueError, + ValueError("Input contains infinity or a value too large"), ), ( # Number 21 (invalid 2D-weights with inf entry; individual) np.array([[1.0, 2.0, 3.0], [4.0, 5.0, np.inf], [7.0, 8.0, 9.0]]), - ValueError, + ValueError("Input contains infinity or a value too large"), ), ( # Number 22 (invalid 1D-weights with -inf entry; for all) np.array([1.0, 2.0, -np.inf]), - ValueError, + ValueError("Input contains infinity or a value too large"), ), ( # Number 23 (invalid 2D-weights with -inf entry; for all) np.array([[1.0, 2.0, -np.inf]]), - ValueError, + ValueError("Input contains infinity or a value too large"), ), ( # Number 24 (invalid 2D-weights with -inf entry; individual) np.array([[1.0, 2.0, 3.0], [4.0, 5.0, -np.inf], [7.0, 8.0, 9.0]]), - ValueError, + ValueError("Input contains infinity or a value too large"), ), ( # Number 25 (invalid 1D-weights with all zero entries; for all) np.array([0.0, 0.0, 0.0]), - ValueError, + ValueError("At least one weights needs to be > 0, but"), ), ( # Number 26 (invalid 2D-weights with all zero entries; for all) np.array([[0.0, 0.0, 0.0]]), - ValueError, + ValueError("At least one weights needs to be > 0, but"), ), ( # Number 27 (invalid 2D-weights with all zero entries; individual) np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]), - ValueError, + ValueError("At least one weights needs to be > 0, but"), ), ], ) def test_weight_checks( - combination: Tuple[ - Optional[np.ndarray], Union[Tuple[Optional[np.ndarray], bool], Type[Exception]] - ] + weights: Optional[np.ndarray], + expected_result: Union[Tuple[Optional[np.ndarray], bool], Exception], ) -> None: """ Tests the function :func:`chemotools.utils.check_inputs.check_weights` for different @@ -164,16 +163,22 @@ def test_weight_checks( """ - # the input parameters are unpacked ... - weights, expected_result = combination - # ... and the size of the matrix against which the weights are checked is set + # the size of the matrix against which the weights are checked is set n_samples, n_features = 3, 3 # if the expected output is an exception, the test is run in a context manager to # check if the respective exception is raised - if not isinstance(expected_result, tuple): - with pytest.raises(expected_result): - check_weights(weights=weights, n_samples=n_samples, n_features=n_features) + if isinstance(expected_result, Exception): + error_catch_phrase = str(expected_result) + with pytest.raises( + type(expected_result), + match=error_catch_phrase, + ): + check_weights( + weights=weights, + n_samples=n_samples, + n_features=n_features, + ) return diff --git a/tests/test_for_utils/test_models.py b/tests/test_for_utils/test_models.py index cfe67ad6..6baf8610 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/test_for_utils/test_models.py @@ -6,7 +6,7 @@ ### Imports ### from math import log -from typing import List, Tuple, Type, Union +from typing import List, Tuple, Union import pytest @@ -20,11 +20,6 @@ _LambdaValueNumericOrFlawed = Union[_LambdaValueNumeric, str] _WhittakerMethod = Union[str, _models.WhittakerSmoothMethods] _WhittakerMethodSequence = List[_WhittakerMethod] -_LambdaTestCombination = Tuple[ - _LambdaValueNumericOrFlawed, - _WhittakerMethodSequence, - Union[ExpectedWhittakerSmoothLambda, Type[Exception]], -] ### Constants ### @@ -48,7 +43,7 @@ @pytest.mark.parametrize( - "combination", + "lam, methods, expected", [ ( # Number 0 (fixed float; fixed method) 100.0, @@ -185,106 +180,104 @@ ( # Number 12 (fixed zero float; fixed method) 0.0, _FIXED_WHITTAKER_METHODS, - ValueError, + ValueError("has to be greater than or equal to the zero tolerance"), ), ( # Number 13 (fixed zero integer; fixed method) 0, _FIXED_WHITTAKER_METHODS, - ValueError, + ValueError("has to be greater than or equal to the zero tolerance"), ), ( # Number 14 (search space floats; fixed method) (100.0, 10_000.0), _FIXED_WHITTAKER_METHODS, - ValueError, + ValueError("for the penalty weight lambda are a search space"), ), ( # Number 15 (search space integers; fixed method) (100, 10_000), _FIXED_WHITTAKER_METHODS, - ValueError, + ValueError("for the penalty weight lambda are a search space"), ), ( # Number 16 (fixed float; automated method) 100.0, _aauto_whittaker_methods, - ValueError, + ValueError("was selected for a fixed penalty weight"), ), ( # Number 17 (fixed integer; automated method) 100, _aauto_whittaker_methods, - ValueError, + ValueError("was selected for a fixed penalty weight"), ), ( # Number 18 (search space floats with zero; all methods) (0.0, 100.0), _all_whittaker_methods, - ValueError, + ValueError("have to be greater than or equal to the zero tolerance"), ), ( # Number 19 (search space integers with zero; all methods) (0, 100), _all_whittaker_methods, - ValueError, + ValueError("have to be greater than or equal to the zero tolerance"), ), ( # Number 20 (flipped search space floats with zero; all methods) (100.0, 0.0), _all_whittaker_methods, - ValueError, + ValueError("have to be greater than or equal to the zero tolerance"), ), ( # Number 21 (flipped search space integer with zero; all methods) (100, 0), _all_whittaker_methods, - ValueError, + ValueError("have to be greater than or equal to the zero tolerance"), ), ( # Number 22 (all float zeros; all methods) (0.0, 0.0), _all_whittaker_methods, - ValueError, + ValueError("have to be greater than or equal to the zero tolerance"), ), ( # Number 23 (all float integers; all methods) (0, 0), _all_whittaker_methods, - ValueError, + ValueError("have to be greater than or equal to the zero tolerance"), ), ( # Number 24 (wrong type; all methods) "error", _all_whittaker_methods, - TypeError, + TypeError("have to be either a scalar or a tuple of two values"), ), ( # Number 25 (fixed float; wrong method) 100.0, "error", - ValueError, + ValueError("is not valid. Please choose one of the following"), ), ( # Number 26 (fixed integer; wrong method) 100, "error", - ValueError, + ValueError("is not valid. Please choose one of the following"), ), ], ) -def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> None: +def test_whittaker_smooth_lambda_model( + lam: _LambdaValueNumericOrFlawed, + methods: _WhittakerMethodSequence, + expected: Union[ExpectedWhittakerSmoothLambda, Exception], +) -> None: """ Tests the class :class:`WhittakerSmoothLambda` for the correct behavior of its ``__post_init__`` method. - The ``combination`` parameter defines - - - the lambda value(s) to be used, - - the method(s) to be used, and - - the expected result of the instantiation (will be an exception if the input - should be considered invalid by the dataclass). - """ - # the combination is unpacked - lambda_value, methods, expected_result = combination - # if the expected result is an exception, it is tested whether the correct exception # is raised - if not isinstance(expected_result, ExpectedWhittakerSmoothLambda): + if isinstance(expected, Exception): + error_catch_phrase = str(expected) for meth in methods: - with pytest.raises(expected_result): + with pytest.raises( + type(expected), + match=error_catch_phrase, + ): _models.WhittakerSmoothLambda( - bounds=lambda_value, # type: ignore + bounds=lam, # type: ignore method=meth, # type: ignore ) @@ -294,8 +287,8 @@ def test_whittaker_smooth_lambda_model(combination: _LambdaTestCombination) -> N # generated object is compared to the expected result for meth in methods: lambda_model = _models.WhittakerSmoothLambda( - bounds=lambda_value, # type: ignore + bounds=lam, # type: ignore method=meth, # type: ignore ) - expected_result.assert_is_equal_to(other=lambda_model) + expected.assert_is_equal_to(other=lambda_model) From 36cbf299ff586cbf1971fdf4e8211f3671ed2f1c Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 24 Jun 2024 23:25:58 +0200 Subject: [PATCH 116/118] refactor: [44] - renamed `test_for_utils` to `tests_for_utils` --- tests/fixtures.py | 2 +- .../__init__.py | 0 .../test_banded_linalg.py | 2 +- .../test_check_inputs.py | 0 .../test_finite_differences.py | 4 ++-- .../test_models.py | 2 +- .../test_whittaker_base.py | 4 ++-- .../utils_funcs.py | 18 +++++++++--------- .../utils_models.py | 2 +- 9 files changed, 17 insertions(+), 17 deletions(-) rename tests/{test_for_utils => tests_for_utils}/__init__.py (100%) rename tests/{test_for_utils => tests_for_utils}/test_banded_linalg.py (99%) rename tests/{test_for_utils => tests_for_utils}/test_check_inputs.py (100%) rename tests/{test_for_utils => tests_for_utils}/test_finite_differences.py (99%) rename tests/{test_for_utils => tests_for_utils}/test_models.py (99%) rename tests/{test_for_utils => tests_for_utils}/test_whittaker_base.py (99%) rename tests/{test_for_utils => tests_for_utils}/utils_funcs.py (98%) rename tests/{test_for_utils => tests_for_utils}/utils_models.py (97%) diff --git a/tests/fixtures.py b/tests/fixtures.py index c63f6637..757da973 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from tests.test_for_utils.utils_models import ( +from tests.tests_for_utils.utils_models import ( NoiseEstimationReference, RefDifferenceKernel, ) diff --git a/tests/test_for_utils/__init__.py b/tests/tests_for_utils/__init__.py similarity index 100% rename from tests/test_for_utils/__init__.py rename to tests/tests_for_utils/__init__.py diff --git a/tests/test_for_utils/test_banded_linalg.py b/tests/tests_for_utils/test_banded_linalg.py similarity index 99% rename from tests/test_for_utils/test_banded_linalg.py rename to tests/tests_for_utils/test_banded_linalg.py index cea367b3..049fc813 100644 --- a/tests/test_for_utils/test_banded_linalg.py +++ b/tests/tests_for_utils/test_banded_linalg.py @@ -19,7 +19,7 @@ lu_solve_banded, slogdet_lu_banded, ) -from tests.test_for_utils.utils_funcs import get_banded_slogdet +from tests.tests_for_utils.utils_funcs import get_banded_slogdet ### Constants ### diff --git a/tests/test_for_utils/test_check_inputs.py b/tests/tests_for_utils/test_check_inputs.py similarity index 100% rename from tests/test_for_utils/test_check_inputs.py rename to tests/tests_for_utils/test_check_inputs.py diff --git a/tests/test_for_utils/test_finite_differences.py b/tests/tests_for_utils/test_finite_differences.py similarity index 99% rename from tests/test_for_utils/test_finite_differences.py rename to tests/tests_for_utils/test_finite_differences.py index 2be19a22..ccc3ded7 100644 --- a/tests/test_for_utils/test_finite_differences.py +++ b/tests/tests_for_utils/test_finite_differences.py @@ -20,12 +20,12 @@ from tests.fixtures import noise_level_estimation_references # noqa: F401 from tests.fixtures import noise_level_estimation_signal # noqa: F401 from tests.fixtures import reference_finite_differences # noqa: F401 -from tests.test_for_utils.utils_funcs import ( +from tests.tests_for_utils.utils_funcs import ( conv_upper_cho_banded_storage_to_sparse, multiply_vect_with_squared_forward_finite_differences_original_first, multiply_vect_with_squared_forward_finite_differences_transpose_first, ) -from tests.test_for_utils.utils_models import ( +from tests.tests_for_utils.utils_models import ( NoiseEstimationReference, RefDifferenceKernel, ) diff --git a/tests/test_for_utils/test_models.py b/tests/tests_for_utils/test_models.py similarity index 99% rename from tests/test_for_utils/test_models.py rename to tests/tests_for_utils/test_models.py index 6baf8610..69f308ca 100644 --- a/tests/test_for_utils/test_models.py +++ b/tests/tests_for_utils/test_models.py @@ -11,7 +11,7 @@ import pytest from chemotools.utils import _models -from tests.test_for_utils.utils_models import ExpectedWhittakerSmoothLambda +from tests.tests_for_utils.utils_models import ExpectedWhittakerSmoothLambda ### Type aliases ### diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/tests_for_utils/test_whittaker_base.py similarity index 99% rename from tests/test_for_utils/test_whittaker_base.py rename to tests/tests_for_utils/test_whittaker_base.py index bbeeab64..0c0592a7 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/tests_for_utils/test_whittaker_base.py @@ -25,10 +25,10 @@ from chemotools.utils._whittaker_base.solvers import solve_normal_equations from tests.fixtures import noise_level_whittaker_auto_lambda # noqa: F401 from tests.fixtures import spectrum_whittaker_auto_lambda # noqa: F401 -from tests.test_for_utils.utils_funcs import ( +from tests.tests_for_utils.utils_funcs import ( find_whittaker_smooth_opt_lambda_log_marginal_likelihood, ) -from tests.test_for_utils.utils_models import ExpectedWhittakerSmoothLambda +from tests.tests_for_utils.utils_models import ExpectedWhittakerSmoothLambda ### Type Aliases ### diff --git a/tests/test_for_utils/utils_funcs.py b/tests/tests_for_utils/utils_funcs.py similarity index 98% rename from tests/test_for_utils/utils_funcs.py rename to tests/tests_for_utils/utils_funcs.py index 006b5a00..ddc5dcb0 100644 --- a/tests/test_for_utils/utils_funcs.py +++ b/tests/tests_for_utils/utils_funcs.py @@ -34,7 +34,7 @@ def float_is_bit_equal(value: float, reference: float) -> bool: Doctests -------- >>> # Imports - >>> from tests.test_for_utils.utils_funcs import float_is_bit_equal + >>> from tests.tests_for_utils.utils_funcs import float_is_bit_equal >>> # Test 1 >>> float_is_bit_equal(value=1.0, reference=1.0) @@ -76,7 +76,7 @@ def conv_upper_cho_banded_storage_to_sparse(ab: np.ndarray) -> csr_matrix: >>> # Imports >>> import numpy as np >>> from numpy import nan - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... conv_upper_cho_banded_storage_to_sparse, ... ) @@ -224,7 +224,7 @@ def conv_lu_banded_storage_to_sparse( >>> # Imports >>> import numpy as np >>> from numpy import nan - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... conv_lu_banded_storage_to_sparse, ... ) @@ -383,7 +383,7 @@ def multiply_vect_with_squared_forward_finite_differences_original_first( -------- >>> # Imports >>> import numpy as np - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... multiply_vect_with_squared_forward_finite_differences_original_first, ... ) @@ -525,7 +525,7 @@ def multiply_vect_with_squared_forward_finite_differences_transpose_first( -------- >>> # Imports >>> import numpy as np - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... multiply_vect_with_squared_forward_finite_differences_transpose_first, ... ) @@ -654,7 +654,7 @@ def get_banded_slogdet(ab: np.ndarray) -> Tuple[float, float]: -------- >>> # Imports >>> import numpy as np - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... conv_upper_cho_banded_storage_to_sparse, ... get_banded_slogdet, ... ) @@ -781,7 +781,7 @@ def get_sparse_forward_finite_difference_matrix( Doctests -------- >>> # Imports - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... get_sparse_forward_finite_difference_matrix, ... ) @@ -894,7 +894,7 @@ def sparse_slogdet_from_superlu(splu: spla.SuperLU) -> Tuple[float, float]: >>> import numpy as np >>> import scipy.sparse as sprs - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... sparse_slogdet_from_superlu, ... ) @@ -1026,7 +1026,7 @@ def calc_whittaker_smooth_log_marginal_likelihood_const_term( ------- >>> # Imports >>> import numpy as np - >>> from tests.test_for_utils.utils_funcs import ( + >>> from tests.tests_for_utils.utils_funcs import ( ... calc_whittaker_smooth_log_marginal_likelihood_const_term, ... get_sparse_forward_finite_difference_matrix, ... ) diff --git a/tests/test_for_utils/utils_models.py b/tests/tests_for_utils/utils_models.py similarity index 97% rename from tests/test_for_utils/utils_models.py rename to tests/tests_for_utils/utils_models.py index 1a4c85b1..9dbcfdec 100644 --- a/tests/test_for_utils/utils_models.py +++ b/tests/tests_for_utils/utils_models.py @@ -12,7 +12,7 @@ import numpy as np from chemotools.utils import _models -from tests.test_for_utils.utils_funcs import float_is_bit_equal +from tests.tests_for_utils.utils_funcs import float_is_bit_equal ### Dataclasses ### From e9052504943fa1de3ef22cf856352f9df0c45480 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 24 Jun 2024 23:28:20 +0200 Subject: [PATCH 117/118] feat: [44] added a guideline for proper use of `pytest` --- tests/README.md | 242 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 tests/README.md diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 00000000..2d318156 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,242 @@ +# ✅❌ Good practices for testing with ``pytest`` 🤔 + +## 1) ✍️ Type hint your tests +While this might seem tedious because tests are not the actual code after all, this helps catching errors early when writing tests, especially those one did not think of before. A good type checker will help here by underlining type mismatches in red. + +## 2) 🪛 Test your test utility functions +Some tests require utility functions to be written. Since a utility function is also code that can have bugs, it is important to test it as well.
+They can either be tested with a dedicated test + +```python +def utility_function(): + return 2.0 + +def test_utility_function(): + assert utility_function() == 2.0 +``` + +or via a doctest that will be included in ``chemotools``' test suite by ``pytest``. + +```python +def utility_function(): + """ + Doctests + -------- + >>> utility_function() + 2.0 + """ + + return 2.0 +``` + +## 3) 🦾🤖 Don't write the same test twice - use parametrization +If you have a test that is repeated with different inputs, use parametrisation to avoid writing the same test twice.
+This will make your test suite more readable and maintainable. With the ``pytest.mark.parametrize`` decorator, you can run the same test with different inputs. In the following example, the test will run 5 times with the inputs 1, 4, 9, 16, and 25. + +```python +import pytest + +@pytest.mark.parametrize("input", [1, 4, 9, 16, 25]) +def test_is_square(input: int) -> None: + assert input ** 0.5 == int(input ** 0.5) +``` + +In case you want to test multiple input combinations, you can use multiple wrappings of `@pytest.mark.parametrize`. The next test will run 5 x 5 = 25 combinations of inputs. + +```python +import pytest + +@pytest.mark.parametrize("input_2", [1, 4, 9, 16, 25]) +@pytest.mark.parametrize("input_1", [1, 4, 9, 16, 25]) +def test_sum_is_positive( + input_1: int, + input_2: int +) -> None: + assert input_1 + input_2 > 0 +``` + +If you need multiple wrappings, but some combinations are not valid, you can use `pytest.skip` to skip the test. The following will run 5 x 5 = 25 tests, but will skip the test when both inputs are 1. + +```python +import pytest + +@pytest.mark.parametrize("input_2", [1, 4, 9, 16, 25]) +@pytest.mark.parametrize("input_1", [1, 4, 9, 16, 25]) +def test_sum_is_positive( + input_1: int, + input_2: int +) -> None: + if input_1 == 1 and input_2 == 1: + pytest.skip("This test is not valid") + + assert input_1 + input_2 > 0 +``` + +Finally, in case your test runs on multiple specific combinations of inputs and expected outputs, you can parametrize the full combination in a ``pytest.mark.parametrize`` decorator. + +```python +import pytest + + +@pytest.mark.parametrize( + "input_1, input_2, expected", + [ + (1, 2, 3), + (2, 3, 5), + (3, 4, 7), + ], +) +def test_sum_is_correct( + input_1: int, + input_2: int, + expected: int, +) -> None: + assert input_1 + input_2 == expected +``` + +## 4) 💣❌ Test your error handling thoroughly based on error messages +If your function raises an error, you should test that it raises the correct error. You can use the ``pytest.raises`` context manager to check that the function raises the expected error. + +```python +import pytest + +def divide(a: int, b: int) -> float: + return a / b + +def test_divide_by_zero_raises_error() -> None: + with pytest.raises(ZeroDivisionError): + divide(1, 0) +``` + +Note that it's crucial to put a ``return`` statement at the end of an error test to avoid that everything that comes after the test is also executed. + +```python +from typing import List, Union + +import numpy as np +import pytest + + +def function_for_an_array(input: Union[List[int], np.ndarray]) -> np.ndarray: + if isinstance(input, list): + raise TypeError("Input must be a numpy array") + + # Do something with the input + return input + +@pytest.mark.parametrize( + "input", + [ + [1, 2, 3], + np.array([1, 2, 3]), + ], +) +def test_function_for_an_array( + input: Union[List[int], np.ndarray], +) -> None: + if isinstance(input, list): + with pytest.raises(TypeError): + function_for_an_array(input) + + return # without this, the following code would still be executed and fail + + result = function_for_an_array(np.array(input)) + assert result is not None + +``` + +However, this is not reliable enough for functions that can raise the same exception type in different contexts. In this case, you can use the ``match`` argument of ``pytest.raises`` to check the error message. For the next function to test, a ``ValueError`` will be encountered for both ``a`` and ``b`` being negative. + +```python +import pytest + +def sum_non_negative_values(a: int, b: int) -> int: + if a < 0: + raise ValueError("a must be non-negative") + + if b < 0: + raise ValueError("b must be non-negative") + + return a + b +``` + +Now, the following test will pass but ``b`` being negative is never tested. + +```python +@pytest.mark.parametrize( + "a, b, expected", + [ + (-1, 1, ValueError()), + (-1, -1, ValueError()), + ], +) +def test_sum_non_negative_values_raises_error( + a: int, + b: int, + expected: Exception, +) -> None: + with pytest.raises(type(expected)): + sum_non_negative_values(a, b) +``` + +``b`` being negative is never hit because ``a`` is negative in both tests. Yet, the ``ValueError`` is still properly raised. Such a situation can be avoided by using the ``match`` argument of ``pytest.raises`` to catch and check the error message. + +```python +@pytest.mark.parametrize( + "a, b, expected", + [ + (-1, 1, ValueError("a must be non-negative")), + (-1, -1, ValueError("b must be non-negative")), # this test will fail + ], +) +def test_sum_non_negative_values_raises_error( + a: int, + b: int, + expected: Exception, +) -> None: + error_catch_phrase = str(expected) + with pytest.raises(type(expected), match=error_catch_phrase): + sum_non_negative_values(a, b) +``` + +Due to the enhanced test, the test will now fail with the following output: + +```bash + with pytest.raises(ValueError, match=expected): +> sum_non_negative_values(a, b) +E AssertionError: Regex pattern did not match. +E Regex: 'b must be non-negative' +E Input: 'a must be non-negative' +``` + +Of course, the same principles apply for warnings that can be caught with ``pytest.warns``. + +```python +import pytest + +def function_that_warns() -> None: + import warnings + warnings.warn("This is a warning", UserWarning) + +def test_function_that_warns() -> None: + with pytest.warns(UserWarning, match="This is a warning"): + function_that_warns() + + return +``` + +## 5) 🧪🧫 Test edge cases +Edge cases are the limits of the input space. They are often the source of bugs in code.
+Let's say your function starts to misbehave when the input is 0. You should write a test +for that case. + +```python +import pytest + +def divide(a: int, b: int) -> float: + return a / b + +def test_divide_by_zero() -> None: + with pytest.raises(ZeroDivisionError): + divide(1, 0) +``` \ No newline at end of file From 3826871b6400da667fbb34bfbe723e831892fa40 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 24 Jun 2024 23:31:49 +0200 Subject: [PATCH 118/118] feat: [44] added line on why tests for error handling are mandatory --- tests/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/README.md b/tests/README.md index 2d318156..67343e7d 100644 --- a/tests/README.md +++ b/tests/README.md @@ -95,6 +95,7 @@ def test_sum_is_correct( ``` ## 4) 💣❌ Test your error handling thoroughly based on error messages +Functions that have error handling which is not properly covered by the tests should be classified as not tested at all because all kind of unexpected behavior can occur.
If your function raises an error, you should test that it raises the correct error. You can use the ``pytest.raises`` context manager to check that the function raises the expected error. ```python