From d6e72cba3c4193ae7985f2a6ffb28ef7af262815 Mon Sep 17 00:00:00 2001 From: Collin Leiber Date: Thu, 28 May 2026 17:35:23 +0300 Subject: [PATCH 1/2] Fix bug in SpecialK algorithm. Fix bugs mentioned in #107 --- clustpy/deep/_utils.py | 8 +-- .../neural_networks/_abstract_autoencoder.py | 1 + .../convolutional_autoencoder.py | 35 +++++++++++-- .../feedforward_autoencoder.py | 16 ++++-- .../tests/test_convolutional_autoencoder.py | 25 +++++++++ .../tests/test_feedforward_autoencoder.py | 4 ++ clustpy/deep/tests/test_utils.py | 29 +++++++++++ clustpy/partition/specialk.py | 51 +++++++++++-------- clustpy/partition/tests/test_specialk.py | 2 +- 9 files changed, 136 insertions(+), 35 deletions(-) diff --git a/clustpy/deep/_utils.py b/clustpy/deep/_utils.py index 8fc0039..b3e1aa9 100644 --- a/clustpy/deep/_utils.py +++ b/clustpy/deep/_utils.py @@ -185,7 +185,7 @@ def encode_batchwise(dataloader: torch.utils.data.DataLoader, neural_network: to if type(embedded_data) is tuple: embedded_data = embedded_data[0] if embeddings_numpy is None: - embeddings_numpy = np.zeros((len(dataloader.dataset), embedded_data.shape[1]), dtype=float) + embeddings_numpy = np.zeros([len(dataloader.dataset)] + list(embedded_data.shape[1:]), dtype=float) embeddings_numpy[batch[0]] = embedded_data.detach().cpu().numpy() return embeddings_numpy @@ -218,7 +218,7 @@ def decode_batchwise(dataloader: torch.utils.data.DataLoader, neural_network: to else: decoded_data = neural_network.decode(embedded_data) if decodings_numpy is None: - decodings_numpy = np.zeros((len(dataloader.dataset), decoded_data.shape[1]), dtype=float) + decodings_numpy = np.zeros([len(dataloader.dataset)] + list(decoded_data.shape[1:]), dtype=float) decodings_numpy[batch[0]] = decoded_data.detach().cpu().numpy() return decodings_numpy @@ -255,8 +255,8 @@ def encode_decode_batchwise(dataloader: torch.utils.data.DataLoader, neural_netw else: decoded_data = neural_network.decode(embedded_data) if embeddings_numpy is None: - embeddings_numpy = np.zeros((len(dataloader.dataset), embedded_data.shape[1]), dtype=float) - decodings_numpy = np.zeros((len(dataloader.dataset), decoded_data.shape[1]), dtype=float) + embeddings_numpy = np.zeros([len(dataloader.dataset)] + list(embedded_data.shape[1:]), dtype=float) + decodings_numpy = np.zeros([len(dataloader.dataset)] + list(decoded_data.shape[1:]), dtype=float) embeddings_numpy[batch[0]] = embedded_data.detach().cpu().numpy() decodings_numpy[batch[0]] = decoded_data.detach().cpu().numpy() return embeddings_numpy, decodings_numpy diff --git a/clustpy/deep/neural_networks/_abstract_autoencoder.py b/clustpy/deep/neural_networks/_abstract_autoencoder.py index 33c9d36..b255718 100644 --- a/clustpy/deep/neural_networks/_abstract_autoencoder.py +++ b/clustpy/deep/neural_networks/_abstract_autoencoder.py @@ -424,6 +424,7 @@ def load_parameters(self, path: str | Path) -> '_AbstractAutoencoder': this instance of the autoencoder """ self.load_state_dict(torch.load(path, weights_only=True, map_location=get_device_from_module(self))) + self.eval() self.fitted = True return self diff --git a/clustpy/deep/neural_networks/convolutional_autoencoder.py b/clustpy/deep/neural_networks/convolutional_autoencoder.py index d17cd73..1c48840 100644 --- a/clustpy/deep/neural_networks/convolutional_autoencoder.py +++ b/clustpy/deep/neural_networks/convolutional_autoencoder.py @@ -68,7 +68,23 @@ class ConvolutionalAutoencoder(_AbstractAutoencoder): indicates whether the autoencoder is already fitted work_on_copy : bool indicates whether deep clustering algorithms should work on a copy of the original autoencoder - + + Examples + ---------- + >>> from clustpy.deep.neural_networks import ConvolutionalAutoencoder + >>> from clustpy.data import load_usps + >>> from clustpy.utils import plot_image + >>> import torchvision + >>> dataset = load_usps() + >>> X = dataset.images + >>> X = X / 255. + >>> X = X.reshape(-1, 1, X.shape[1], X.shape[1]) + >>> X = np.tile(X, (1, 3, 1, 1)) + >>> X = torchvision.transforms.Resize((32, 32))(torch.from_numpy(X).float()).numpy() + >>> cae = ConvolutionalAutoencoder(X.shape[2], [512, 10]).fit(data=X[:500], n_epochs=100) + >>> Z = cae.decode(cae.encode(torch.from_numpy(X[0]).float())).detach().numpy() + >>> plot_image(Z, image_shape=(16, 16), min_value=0, max_value=1) + References ---------- He, Kaiming, et al. "Deep residual learning for image recognition." @@ -87,6 +103,8 @@ def __init__(self, input_height: int, fc_layers: list, conv_encoder_name: str = work_on_copy: bool = True, random_state: np.random.RandomState | int = None, **fc_kwargs): super().__init__(work_on_copy, random_state) self.allow_nd_input = True + if input_height % 32 != 0: + raise ValueError(f"Input_height has to be a multiple of 32. Your input: {input_height}") self.input_height = input_height # Check if layers match @@ -141,8 +159,14 @@ def encode(self, x: torch.Tensor) -> torch.Tensor: embedded : torch.Tensor the embedded data point with dimensionality embedding_size """ - embedded = self.conv_encoder(x) + x_adj = x.reshape(1, x.shape[0], x.shape[1] ,x.shape[2]) if x.ndim == 3 else x + if x_adj.shape[1:] != (3, self.input_height, self.input_height): + raise ValueError("Input layer of the encoder ({0}) does not match shape of the input sample ({1})".format((3, self.input_height, self.input_height), + x_adj.shape[1:])) + embedded = self.conv_encoder(x_adj) embedded = self.fc_encoder(embedded) + if x.ndim == 3: + embedded = embedded[0] return embedded def decode(self, embedded: torch.Tensor) -> torch.Tensor: @@ -159,6 +183,11 @@ def decode(self, embedded: torch.Tensor) -> torch.Tensor: decoded : torch.Tensor returns the reconstruction of embedded """ - decoded = self.fc_decoder(embedded) + embedded_adj = embedded.reshape((1, -1)) if embedded.ndim == 1 else embedded + if embedded_adj.shape[1] != self.fc_decoder.layers[0]: + raise ValueError("Input layer of the decoder does not match input sample") + decoded = self.fc_decoder(embedded_adj) decoded = self.conv_decoder(decoded) + if embedded.ndim == 1: + decoded = decoded[0] return decoded diff --git a/clustpy/deep/neural_networks/feedforward_autoencoder.py b/clustpy/deep/neural_networks/feedforward_autoencoder.py index c51704b..14c5f5c 100644 --- a/clustpy/deep/neural_networks/feedforward_autoencoder.py +++ b/clustpy/deep/neural_networks/feedforward_autoencoder.py @@ -95,10 +95,13 @@ def encode(self, x: torch.Tensor) -> torch.Tensor: embedded : torch.Tensor the embedded data point with dimensionality embedding_size """ - if x.shape[1] != self.encoder.layers[0]: + x_adj = x.reshape((1, -1)) if x.ndim == 1 else x + if x_adj.shape[1] != self.encoder.layers[0]: raise ValueError("Input layer of the encoder ({0}) does not match input sample ({1})".format(self.encoder.layers[0], - x.shape[1])) - embedded = self.encoder(x) + x_adj.shape[1])) + embedded = self.encoder(x_adj) + if x.ndim == 1: + embedded = embedded[0] return embedded def decode(self, embedded: torch.Tensor) -> torch.Tensor: @@ -115,7 +118,10 @@ def decode(self, embedded: torch.Tensor) -> torch.Tensor: decoded : torch.Tensor returns the reconstruction of embedded """ - if embedded.shape[1] != self.decoder.layers[0]: + embedded_adj = embedded.reshape((1, -1)) if embedded.ndim == 1 else embedded + if embedded_adj.shape[1] != self.decoder.layers[0]: raise ValueError("Input layer of the decoder does not match input sample") - decoded = self.decoder(embedded) + decoded = self.decoder(embedded_adj) + if embedded.ndim == 1: + decoded = decoded[0] return decoded diff --git a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py index adcfe96..22259b7 100644 --- a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py +++ b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py @@ -2,6 +2,7 @@ from clustpy.deep import DCN import torch import numpy as np +import pytest def test_convolutional_autoencoder_resnet18(): @@ -13,9 +14,13 @@ def test_convolutional_autoencoder_resnet18(): # Test encoding embedded = autoencoder.encode(data_batch) assert embedded.shape == (batch_size, embedding_dim) + embedded_solo = autoencoder.encode(data_batch[0]) + assert embedded_solo.shape == (embedding_dim, ) # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, 3, 32, 32) + decoded_solo = autoencoder.decoded(embedded[0]) + assert decoded_solo.shape == (3, 32, 32) # Test forwarding forwarded = autoencoder.forward(data_batch) assert torch.equal(decoded, forwarded) @@ -34,9 +39,13 @@ def test_convolutional_autoencoder_resnet_50(): # Test encoding embedded = autoencoder.encode(data_batch) assert embedded.shape == (batch_size, embedding_dim) + embedded_solo = autoencoder.encode(data_batch[0]) + assert embedded_solo.shape == (embedding_dim, ) # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, 3, 32, 32) + decoded_solo = autoencoder.decoded(embedded[0]) + assert decoded_solo.shape == (3, 32, 32) # Test forwarding forwarded = autoencoder.forward(data_batch) assert torch.equal(decoded, forwarded) @@ -52,9 +61,13 @@ def test_mixed_convolutional_autoencoder(): # Test encoding embedded = autoencoder.encode(data_batch) assert embedded.shape == (batch_size, embedding_dim) + embedded_solo = autoencoder.encode(data_batch[0]) + assert embedded_solo.shape == (embedding_dim, ) # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, 3, 32, 32) + decoded_solo = autoencoder.decoded(embedded[0]) + assert decoded_solo.shape == (3, 32, 32) # Test forwarding forwarded = autoencoder.forward(data_batch) assert torch.equal(decoded, forwarded) @@ -71,3 +84,15 @@ def test_convolutional_autoencoder_in_deep_clustering(): assert dcn.labels_.shape == (100,) X_embed = dcn.transform(data) assert X_embed.shape == (data.shape[0], dcn.embedding_size) + + +def test_convolutional_autoencoder_errors(): + with pytest.raises(ValueError): + # Wrong input height (must be 32 x X) + ConvolutionalAutoencoder(16, [512, 10]) + with pytest.raises(ValueError): + # Wrong fc_layers for resnet 18 + ConvolutionalAutoencoder(32, conv_encoder_name="resnet18", fc_layers=[2048, 10]) + with pytest.raises(ValueError): + # Wrong fc_layers for resnet 50 + ConvolutionalAutoencoder(32, conv_encoder_name="resnet50", fc_layers=[512, 10]) diff --git a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py index f3fed9a..e71fd09 100644 --- a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py +++ b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py @@ -22,9 +22,13 @@ def test_feedforward_autoencoder(): # Test encoding embedded = autoencoder.encode(data_batch) assert embedded.shape == (batch_size, embedding_dim) + embedded_solo = autoencoder.encode(data_batch[0]) + assert embedded_solo.shape == (embedding_dim, ) # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, data.shape[1]) + decoded_solo = autoencoder.decoded(embedded[0]) + assert decoded_solo.shape == (data.shape[1], ) # Test forwarding forwarded = autoencoder.forward(data_batch) assert torch.equal(decoded, forwarded) diff --git a/clustpy/deep/tests/test_utils.py b/clustpy/deep/tests/test_utils.py index e760938..f73d655 100644 --- a/clustpy/deep/tests/test_utils.py +++ b/clustpy/deep/tests/test_utils.py @@ -8,6 +8,7 @@ from sklearn.mixture import GaussianMixture from clustpy.partition import XMeans from clustpy.deep.tests._helpers_for_tests import _get_dc_test_data +from clustpy.deep.neural_networks import ConvolutionalAutoencoder def test_mean_squared_error(): @@ -71,6 +72,15 @@ def test_encode_batchwise(): desired = np.sum(data, axis=1).reshape((-1, 1)) desired = np.tile(desired, embedding_size) assert np.allclose(encoded, desired, atol=1e-5) + # Test for Conv + X_images = np.array([[[[11] * 32] * 32, [[12] * 32] * 32, [[13] * 32] * 32], + [[[10] * 32] * 32, [[20] * 32] * 32, [[30] * 32] * 32], + [[[10] * 32] * 32, [[40] * 32] * 32, [[70] * 32] * 32], + [[[1] * 32] * 32, [[1] * 32] * 32, [[1] * 32] * 32]]) + dataloader_images = _get_test_dataloader(X_images, 2, False, False) + autoencoder_images = ConvolutionalAutoencoder(32, [512, 10]) + encoded_images = encode_batchwise(dataloader_images, autoencoder_images) + assert encoded_images.shape == (4, 10) def test_predict_batchwise(): @@ -95,6 +105,15 @@ def test_decode_batchwise(): autoencoder = _TestAutoencoder(data.shape[1], embedding_size) decoded = decode_batchwise(dataloader, autoencoder) assert data.shape == decoded.shape + # Test for Conv + X_images = np.array([[[[11] * 32] * 32, [[12] * 32] * 32, [[13] * 32] * 32], + [[[10] * 32] * 32, [[20] * 32] * 32, [[30] * 32] * 32], + [[[10] * 32] * 32, [[40] * 32] * 32, [[70] * 32] * 32], + [[[1] * 32] * 32, [[1] * 32] * 32, [[1] * 32] * 32]]) + dataloader_images = _get_test_dataloader(X_images, 2, False, False) + autoencoder_images = ConvolutionalAutoencoder(32, [512, 10]) + decoded_images = decode_batchwise(dataloader_images, autoencoder_images) + assert X_images.shape == decoded_images.shape def test_encode_decode_batchwise(): @@ -109,6 +128,16 @@ def test_encode_decode_batchwise(): desired = np.tile(desired, embedding_size) assert np.allclose(encoded, desired, atol=1e-5) assert data.shape == decoded.shape + # Test for Conv + X_images = np.array([[[[11] * 32] * 32, [[12] * 32] * 32, [[13] * 32] * 32], + [[[10] * 32] * 32, [[20] * 32] * 32, [[30] * 32] * 32], + [[[10] * 32] * 32, [[40] * 32] * 32, [[70] * 32] * 32], + [[[1] * 32] * 32, [[1] * 32] * 32, [[1] * 32] * 32]]) + dataloader_images = _get_test_dataloader(X_images, 2, False, False) + autoencoder_images = ConvolutionalAutoencoder(32, [512, 10]) + encoded_images, decoded_images = encode_decode_batchwise(dataloader_images, autoencoder_images) + assert encoded_images.shape == (4, 10) + assert X_images.shape == decoded_images.shape def test_int_to_one_hot(): diff --git a/clustpy/partition/specialk.py b/clustpy/partition/specialk.py index ebdf2ed..efad033 100644 --- a/clustpy/partition/specialk.py +++ b/clustpy/partition/specialk.py @@ -7,15 +7,16 @@ """ import numpy as np -from scipy.spatial.distance import pdist, squareform from sklearn.neighbors import radius_neighbors_graph, kneighbors_graph from sklearn.cluster import KMeans from sklearn.base import BaseEstimator, ClusterMixin from clustpy.utils.checks import check_parameters import scipy +from sklearn.neighbors import NearestNeighbors -def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_matrix: str, n_neighbors: int, +def _specialk(X: np.ndarray, significance: float, n_dimensions: int, + similarity_matrix: str | np.ndarray | scipy.sparse.csr_matrix, n_neighbors: int, percentage: float, n_cluster_pairs_to_consider: int, max_n_clusters: int, random_state: np.random.RandomState, debug: bool) -> (int, np.ndarray): """ @@ -29,7 +30,7 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_ Threshold to decide if the samples originate from a single distribution or two distributions n_dimensions : int Dimensionality of the embedding - similarity_matrix : str + similarity_matrix : str | np.ndarray | scipy.sparse.csr_matrix Defines the similarity matrix to use. Can be one of the following strings or a numpy array / scipy sparse csr matrix. If 'NAM', a neighborhood adjacency matrix is used. If 'SAM' a symmetrically normalized adjacency matrix is used @@ -56,7 +57,7 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_ The labels as identified by DipMeans, """ assert significance >= 0 and significance <= 1, "significance must be a value in the range [0, 1]" - assert percentage >= 0 and percentage <= 1, "percentage must be a value in the range [0, 1]" + assert percentage > 0 and percentage <= 1, "percentage must be a value in the range (0, 1]" if type(similarity_matrix) is str and similarity_matrix == 'NAM': final_similarity_matrix = _get_neighborhood_adjacency_matrix(X, percentage, n_neighbors) elif type(similarity_matrix) is str and similarity_matrix == 'SAM': @@ -73,7 +74,8 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_ # Initial values n_clusters = 2 stop_search = False - best_labels = np.zeros(X.shape[0]) + best_labels = np.zeros(X.shape[0], dtype=np.int32) + log_significance = np.log(significance) while n_clusters <= max_n_clusters: if debug: print("=== n_clusters={0} ===".format(n_clusters)) @@ -105,11 +107,12 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_ ids_in_cluster_1 = ids_in_each_cluster[c1] ids_in_cluster_2 = ids_in_each_cluster[c2] # Calculate bound - t_total = _zz_top_bound(D, ids_in_cluster_1, ids_in_cluster_2, debug) + t_total = _log_zz_top_bound(D, ids_in_cluster_1, ids_in_cluster_2, debug) if debug: print("ZZ top:", t_total) - if t_total > significance: + if t_total > log_significance: # Stop execution -> return n_clusters - 1 + n_clusters = n_clusters - 1 stop_search = True break if stop_search: @@ -120,13 +123,13 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_ n_clusters += 1 # Return number of clusters and labels if debug: - print("Final n_clusters={0}".format(n_clusters - 1)) - return n_clusters - 1, best_labels + print("Final n_clusters={0}".format(n_clusters)) + return n_clusters, best_labels -def _zz_top_bound(D: np.ndarray, ids_in_cluster_1: np.ndarray, ids_in_cluster_2: np.ndarray, debug: bool) -> float: +def _log_zz_top_bound(D: np.ndarray, ids_in_cluster_1: np.ndarray, ids_in_cluster_2: np.ndarray, debug: bool) -> float: """ - Calculate the ZZ Top bound + Calculate the log ZZ Top bound. Parameters ---------- @@ -154,7 +157,8 @@ def _zz_top_bound(D: np.ndarray, ids_in_cluster_1: np.ndarray, ids_in_cluster_2: t = max(t1, t2) - sigma2 * Dj.shape[1] if debug: print("sigma={0} / t={1}".format(sigma2, t)) - t_total = Dj.shape[0] * np.exp(-0.5 * t ** 2 / (Dj.shape[1] * sigma2 + t / 3)) + t_total = np.log(Dj.shape[0]) - 0.5 * t ** 2 / (Dj.shape[1] * sigma2 + t / 3) + t_total = min(t_total, 0.0) return t_total @@ -179,14 +183,15 @@ def _get_neighborhood_adjacency_matrix(X: np.ndarray, percentage: float = 0.99, The resulting similarity matrix """ # Get pairwise distances - dist_matrix = squareform(pdist(X, 'euclidean')) - # Get kNN distances (+1 because self is not included in n_neighbors) - knn_distances = np.sort(dist_matrix, axis=1)[:, n_neighbors + 1] + neighbors = NearestNeighbors(n_neighbors=n_neighbors) + neighbors.fit(X) + knn_distances, _ = neighbors.kneighbors() # Get knn dist so that more than 'percentage' points have 'n_neighbors' neighbors - knn_dist_sorted = np.sort(knn_distances) - eps = knn_dist_sorted[int((X.shape[0] - 1) * percentage)] + knn_dist_sorted = np.sort(knn_distances[:, -1]) + percentage_idx = int(np.ceil(X.shape[0] * percentage)) + eps = knn_dist_sorted[percentage_idx - 1] # Get neighbor graph - similarity_matrix = radius_neighbors_graph(X, radius=eps) + similarity_matrix = radius_neighbors_graph(X, radius=eps, mode="distance", include_self=False) return similarity_matrix @@ -232,7 +237,7 @@ class SpecialK(ClusterMixin, BaseEstimator): Threshold to decide if the samples originate from a single distribution or two distributions (default: 0.01) n_dimensions : int Dimensionality of the embedding (default: 200) - similarity_matrix : str + similarity_matrix : str | np.ndarray | scipy.sparse.csr_matrix Defines the similarity matrix to use. Can be one of the following strings or a numpy array / scipy sparse csr matrix. If 'NAM', a neighborhood adjacency matrix is used. If 'SAM' a symmetrically normalized adjacency matrix is used (default: 'NAM') @@ -267,7 +272,8 @@ class SpecialK(ClusterMixin, BaseEstimator): Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2019, Würzburg, Germany, September 16–20, 2019, Proceedings, Part I. Springer International Publishing, 2020. """ - def __init__(self, significance: float = 0.01, n_dimensions: int = 200, similarity_matrix: str = 'NAM', + def __init__(self, significance: float = 0.01, n_dimensions: int = 200, + similarity_matrix: str | np.ndarray | scipy.sparse.csr_matrix = 'NAM', n_neighbors: int = 5, percentage: float = 0.99, n_cluster_pairs_to_consider: int = 10, max_n_clusters: int = np.inf, random_state: np.random.RandomState | int = None, debug: bool = False): self.significance = significance @@ -298,10 +304,11 @@ def fit(self, X: np.ndarray, y: np.ndarray = None) -> 'SpecialK': this instance of the SpecialK algorithm """ X, _, random_state = check_parameters(X=X, y=y, random_state=self.random_state) - n_clusters, labels = _specialk(X, self.significance, self.n_dimensions, self.similarity_matrix, + n_dimensions = min(self.n_dimensions, X.shape[0] - 1) + n_clusters, labels = _specialk(X, self.significance, n_dimensions, self.similarity_matrix, self.n_neighbors, self.percentage, self.n_cluster_pairs_to_consider, self.max_n_clusters, random_state, self.debug) self.n_clusters_ = n_clusters self.labels_ = labels - self.features_in_ = X.shape[1] + self.n_features_in_ = X.shape[1] return self diff --git a/clustpy/partition/tests/test_specialk.py b/clustpy/partition/tests/test_specialk.py index 202231e..49a0a45 100644 --- a/clustpy/partition/tests/test_specialk.py +++ b/clustpy/partition/tests/test_specialk.py @@ -5,7 +5,7 @@ from clustpy.utils.checks import check_clustpy_estimator import pytest -@pytest.mark.skip(reason="There seems to be a non-trivial error. Needs to be fixed.") +#@pytest.mark.skip(reason="There seems to be a non-trivial error. Needs to be fixed.") def test_specialk_estimator(): check_clustpy_estimator(SpecialK(), ("check_complex_data")) From 5f42ca9399f293ba4278d735f09f6ae88542d124 Mon Sep 17 00:00:00 2001 From: Collin Leiber Date: Fri, 29 May 2026 10:33:40 +0300 Subject: [PATCH 2/2] fix spelling error in conv ae test --- .../tests/test_convolutional_autoencoder.py | 9 ++++++--- .../tests/test_feedforward_autoencoder.py | 2 +- clustpy/partition/tests/test_specialk.py | 1 - 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py index 22259b7..4333552 100644 --- a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py +++ b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py @@ -11,6 +11,7 @@ def test_convolutional_autoencoder_resnet18(): data_batch = torch.Tensor(data[:batch_size]) embedding_dim = 10 autoencoder = ConvolutionalAutoencoder(32, [512, embedding_dim]) + autoencoder.eval() # Test encoding embedded = autoencoder.encode(data_batch) assert embedded.shape == (batch_size, embedding_dim) @@ -19,7 +20,7 @@ def test_convolutional_autoencoder_resnet18(): # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, 3, 32, 32) - decoded_solo = autoencoder.decoded(embedded[0]) + decoded_solo = autoencoder.decode(embedded[0]) assert decoded_solo.shape == (3, 32, 32) # Test forwarding forwarded = autoencoder.forward(data_batch) @@ -36,6 +37,7 @@ def test_convolutional_autoencoder_resnet_50(): data_batch = torch.Tensor(data[:batch_size]) embedding_dim = 10 autoencoder = ConvolutionalAutoencoder(32, [2048, embedding_dim], conv_encoder_name="resnet50") + autoencoder.eval() # Test encoding embedded = autoencoder.encode(data_batch) assert embedded.shape == (batch_size, embedding_dim) @@ -44,7 +46,7 @@ def test_convolutional_autoencoder_resnet_50(): # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, 3, 32, 32) - decoded_solo = autoencoder.decoded(embedded[0]) + decoded_solo = autoencoder.decode(embedded[0]) assert decoded_solo.shape == (3, 32, 32) # Test forwarding forwarded = autoencoder.forward(data_batch) @@ -58,6 +60,7 @@ def test_mixed_convolutional_autoencoder(): embedding_dim = 10 autoencoder = ConvolutionalAutoencoder(32, [2048, embedding_dim], fc_decoder_layers=[embedding_dim, 512], conv_encoder_name="resnet50", conv_decoder_name="resnet18") + autoencoder.eval() # Test encoding embedded = autoencoder.encode(data_batch) assert embedded.shape == (batch_size, embedding_dim) @@ -66,7 +69,7 @@ def test_mixed_convolutional_autoencoder(): # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, 3, 32, 32) - decoded_solo = autoencoder.decoded(embedded[0]) + decoded_solo = autoencoder.decode(embedded[0]) assert decoded_solo.shape == (3, 32, 32) # Test forwarding forwarded = autoencoder.forward(data_batch) diff --git a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py index e71fd09..1c675c2 100644 --- a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py +++ b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py @@ -27,7 +27,7 @@ def test_feedforward_autoencoder(): # Test decoding decoded = autoencoder.decode(embedded) assert decoded.shape == (batch_size, data.shape[1]) - decoded_solo = autoencoder.decoded(embedded[0]) + decoded_solo = autoencoder.decode(embedded[0]) assert decoded_solo.shape == (data.shape[1], ) # Test forwarding forwarded = autoencoder.forward(data_batch) diff --git a/clustpy/partition/tests/test_specialk.py b/clustpy/partition/tests/test_specialk.py index 49a0a45..4201e00 100644 --- a/clustpy/partition/tests/test_specialk.py +++ b/clustpy/partition/tests/test_specialk.py @@ -5,7 +5,6 @@ from clustpy.utils.checks import check_clustpy_estimator import pytest -#@pytest.mark.skip(reason="There seems to be a non-trivial error. Needs to be fixed.") def test_specialk_estimator(): check_clustpy_estimator(SpecialK(), ("check_complex_data"))