From d6e72cba3c4193ae7985f2a6ffb28ef7af262815 Mon Sep 17 00:00:00 2001
From: Collin Leiber <collin.leiber@aalto.fi>
Date: Thu, 28 May 2026 17:35:23 +0300
Subject: [PATCH 1/2] Fix bug in SpecialK algorithm. Fix bugs mentioned in #107

---
 clustpy/deep/_utils.py                        |  8 +--
 .../neural_networks/_abstract_autoencoder.py  |  1 +
 .../convolutional_autoencoder.py              | 35 +++++++++++--
 .../feedforward_autoencoder.py                | 16 ++++--
 .../tests/test_convolutional_autoencoder.py   | 25 +++++++++
 .../tests/test_feedforward_autoencoder.py     |  4 ++
 clustpy/deep/tests/test_utils.py              | 29 +++++++++++
 clustpy/partition/specialk.py                 | 51 +++++++++++--------
 clustpy/partition/tests/test_specialk.py      |  2 +-
 9 files changed, 136 insertions(+), 35 deletions(-)

diff --git a/clustpy/deep/_utils.py b/clustpy/deep/_utils.py
index 8fc0039..b3e1aa9 100644
--- a/clustpy/deep/_utils.py
+++ b/clustpy/deep/_utils.py
@@ -185,7 +185,7 @@ def encode_batchwise(dataloader: torch.utils.data.DataLoader, neural_network: to
         if type(embedded_data) is tuple:
             embedded_data = embedded_data[0]
         if embeddings_numpy is None:
-            embeddings_numpy = np.zeros((len(dataloader.dataset), embedded_data.shape[1]), dtype=float)
+            embeddings_numpy = np.zeros([len(dataloader.dataset)] + list(embedded_data.shape[1:]), dtype=float)
         embeddings_numpy[batch[0]] = embedded_data.detach().cpu().numpy()
     return embeddings_numpy
 
@@ -218,7 +218,7 @@ def decode_batchwise(dataloader: torch.utils.data.DataLoader, neural_network: to
         else:
             decoded_data = neural_network.decode(embedded_data)
         if decodings_numpy is None:
-            decodings_numpy = np.zeros((len(dataloader.dataset), decoded_data.shape[1]), dtype=float)
+            decodings_numpy = np.zeros([len(dataloader.dataset)] + list(decoded_data.shape[1:]), dtype=float)
         decodings_numpy[batch[0]] = decoded_data.detach().cpu().numpy()
     return decodings_numpy
 
@@ -255,8 +255,8 @@ def encode_decode_batchwise(dataloader: torch.utils.data.DataLoader, neural_netw
         else:
             decoded_data = neural_network.decode(embedded_data)
         if embeddings_numpy is None:
-            embeddings_numpy = np.zeros((len(dataloader.dataset), embedded_data.shape[1]), dtype=float)
-            decodings_numpy = np.zeros((len(dataloader.dataset), decoded_data.shape[1]), dtype=float)
+            embeddings_numpy = np.zeros([len(dataloader.dataset)] + list(embedded_data.shape[1:]), dtype=float)
+            decodings_numpy = np.zeros([len(dataloader.dataset)] + list(decoded_data.shape[1:]), dtype=float)
         embeddings_numpy[batch[0]] = embedded_data.detach().cpu().numpy()
         decodings_numpy[batch[0]] = decoded_data.detach().cpu().numpy()
     return embeddings_numpy, decodings_numpy
diff --git a/clustpy/deep/neural_networks/_abstract_autoencoder.py b/clustpy/deep/neural_networks/_abstract_autoencoder.py
index 33c9d36..b255718 100644
--- a/clustpy/deep/neural_networks/_abstract_autoencoder.py
+++ b/clustpy/deep/neural_networks/_abstract_autoencoder.py
@@ -424,6 +424,7 @@ def load_parameters(self, path: str | Path) -> '_AbstractAutoencoder':
             this instance of the autoencoder
         """
         self.load_state_dict(torch.load(path, weights_only=True, map_location=get_device_from_module(self)))
+        self.eval()
         self.fitted = True
         return self
 
diff --git a/clustpy/deep/neural_networks/convolutional_autoencoder.py b/clustpy/deep/neural_networks/convolutional_autoencoder.py
index d17cd73..1c48840 100644
--- a/clustpy/deep/neural_networks/convolutional_autoencoder.py
+++ b/clustpy/deep/neural_networks/convolutional_autoencoder.py
@@ -68,7 +68,23 @@ class ConvolutionalAutoencoder(_AbstractAutoencoder):
         indicates whether the autoencoder is already fitted
     work_on_copy : bool
         indicates whether deep clustering algorithms should work on a copy of the original autoencoder
-        
+
+    Examples
+    ----------
+    >>> from clustpy.deep.neural_networks import ConvolutionalAutoencoder
+    >>> from clustpy.data import load_usps
+    >>> from clustpy.utils import plot_image
+    >>> import torchvision
+    >>> dataset = load_usps()
+    >>> X = dataset.images
+    >>> X = X / 255.
+    >>> X = X.reshape(-1, 1, X.shape[1], X.shape[1])
+    >>> X = np.tile(X, (1, 3, 1, 1))
+    >>> X = torchvision.transforms.Resize((32, 32))(torch.from_numpy(X).float()).numpy()
+    >>> cae = ConvolutionalAutoencoder(X.shape[2], [512, 10]).fit(data=X[:500], n_epochs=100)
+    >>> Z = cae.decode(cae.encode(torch.from_numpy(X[0]).float())).detach().numpy()
+    >>> plot_image(Z, image_shape=(16, 16), min_value=0, max_value=1)
+
     References
     ----------
     He, Kaiming, et al. "Deep residual learning for image recognition."
@@ -87,6 +103,8 @@ def __init__(self, input_height: int, fc_layers: list, conv_encoder_name: str =
                  work_on_copy: bool = True, random_state: np.random.RandomState | int = None, **fc_kwargs):
         super().__init__(work_on_copy, random_state)
         self.allow_nd_input = True
+        if input_height % 32 != 0:
+            raise ValueError(f"Input_height has to be a multiple of 32. Your input: {input_height}")
         self.input_height = input_height
 
         # Check if layers match
@@ -141,8 +159,14 @@ def encode(self, x: torch.Tensor) -> torch.Tensor:
         embedded : torch.Tensor
             the embedded data point with dimensionality embedding_size
         """
-        embedded = self.conv_encoder(x)
+        x_adj = x.reshape(1, x.shape[0], x.shape[1] ,x.shape[2]) if x.ndim == 3 else x
+        if x_adj.shape[1:] != (3, self.input_height, self.input_height):
+            raise ValueError("Input layer of the encoder ({0}) does not match shape of the input sample ({1})".format((3, self.input_height, self.input_height),
+                                                                                            x_adj.shape[1:]))
+        embedded = self.conv_encoder(x_adj)
         embedded = self.fc_encoder(embedded)
+        if x.ndim == 3:
+            embedded = embedded[0]
         return embedded
 
     def decode(self, embedded: torch.Tensor) -> torch.Tensor:
@@ -159,6 +183,11 @@ def decode(self, embedded: torch.Tensor) -> torch.Tensor:
         decoded : torch.Tensor
             returns the reconstruction of embedded
         """
-        decoded = self.fc_decoder(embedded)
+        embedded_adj = embedded.reshape((1, -1)) if embedded.ndim == 1 else embedded
+        if embedded_adj.shape[1] != self.fc_decoder.layers[0]:
+            raise ValueError("Input layer of the decoder does not match input sample")
+        decoded = self.fc_decoder(embedded_adj)
         decoded = self.conv_decoder(decoded)
+        if embedded.ndim == 1:
+            decoded = decoded[0]
         return decoded
diff --git a/clustpy/deep/neural_networks/feedforward_autoencoder.py b/clustpy/deep/neural_networks/feedforward_autoencoder.py
index c51704b..14c5f5c 100644
--- a/clustpy/deep/neural_networks/feedforward_autoencoder.py
+++ b/clustpy/deep/neural_networks/feedforward_autoencoder.py
@@ -95,10 +95,13 @@ def encode(self, x: torch.Tensor) -> torch.Tensor:
         embedded : torch.Tensor
             the embedded data point with dimensionality embedding_size
         """
-        if x.shape[1] != self.encoder.layers[0]:
+        x_adj = x.reshape((1, -1)) if x.ndim == 1 else x
+        if x_adj.shape[1] != self.encoder.layers[0]:
             raise ValueError("Input layer of the encoder ({0}) does not match input sample ({1})".format(self.encoder.layers[0],
-                                                                                            x.shape[1]))
-        embedded = self.encoder(x)
+                                                                                            x_adj.shape[1]))
+        embedded = self.encoder(x_adj)
+        if x.ndim == 1:
+            embedded = embedded[0]
         return embedded
 
     def decode(self, embedded: torch.Tensor) -> torch.Tensor:
@@ -115,7 +118,10 @@ def decode(self, embedded: torch.Tensor) -> torch.Tensor:
         decoded : torch.Tensor
             returns the reconstruction of embedded
         """
-        if embedded.shape[1] != self.decoder.layers[0]:
+        embedded_adj = embedded.reshape((1, -1)) if embedded.ndim == 1 else embedded
+        if embedded_adj.shape[1] != self.decoder.layers[0]:
             raise ValueError("Input layer of the decoder does not match input sample")
-        decoded = self.decoder(embedded)
+        decoded = self.decoder(embedded_adj)
+        if embedded.ndim == 1:
+            decoded = decoded[0]
         return decoded
diff --git a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py
index adcfe96..22259b7 100644
--- a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py
+++ b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py
@@ -2,6 +2,7 @@
 from clustpy.deep import DCN
 import torch
 import numpy as np
+import pytest
 
 
 def test_convolutional_autoencoder_resnet18():
@@ -13,9 +14,13 @@ def test_convolutional_autoencoder_resnet18():
     # Test encoding
     embedded = autoencoder.encode(data_batch)
     assert embedded.shape == (batch_size, embedding_dim)
+    embedded_solo = autoencoder.encode(data_batch[0])
+    assert embedded_solo.shape == (embedding_dim, )
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, 3, 32, 32)
+    decoded_solo = autoencoder.decoded(embedded[0])
+    assert decoded_solo.shape == (3, 32, 32)
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
     assert torch.equal(decoded, forwarded)
@@ -34,9 +39,13 @@ def test_convolutional_autoencoder_resnet_50():
     # Test encoding
     embedded = autoencoder.encode(data_batch)
     assert embedded.shape == (batch_size, embedding_dim)
+    embedded_solo = autoencoder.encode(data_batch[0])
+    assert embedded_solo.shape == (embedding_dim, )
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, 3, 32, 32)
+    decoded_solo = autoencoder.decoded(embedded[0])
+    assert decoded_solo.shape == (3, 32, 32)
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
     assert torch.equal(decoded, forwarded)
@@ -52,9 +61,13 @@ def test_mixed_convolutional_autoencoder():
     # Test encoding
     embedded = autoencoder.encode(data_batch)
     assert embedded.shape == (batch_size, embedding_dim)
+    embedded_solo = autoencoder.encode(data_batch[0])
+    assert embedded_solo.shape == (embedding_dim, )
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, 3, 32, 32)
+    decoded_solo = autoencoder.decoded(embedded[0])
+    assert decoded_solo.shape == (3, 32, 32)
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
     assert torch.equal(decoded, forwarded)
@@ -71,3 +84,15 @@ def test_convolutional_autoencoder_in_deep_clustering():
     assert dcn.labels_.shape == (100,)
     X_embed = dcn.transform(data)
     assert X_embed.shape == (data.shape[0], dcn.embedding_size)
+
+
+def test_convolutional_autoencoder_errors():
+    with pytest.raises(ValueError):
+        # Wrong input height (must be 32 x X)
+        ConvolutionalAutoencoder(16, [512, 10])
+    with pytest.raises(ValueError):
+        # Wrong fc_layers for resnet 18
+        ConvolutionalAutoencoder(32, conv_encoder_name="resnet18", fc_layers=[2048, 10])
+    with pytest.raises(ValueError):
+        # Wrong fc_layers for resnet 50
+        ConvolutionalAutoencoder(32, conv_encoder_name="resnet50", fc_layers=[512, 10])
diff --git a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py
index f3fed9a..e71fd09 100644
--- a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py
+++ b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py
@@ -22,9 +22,13 @@ def test_feedforward_autoencoder():
     # Test encoding
     embedded = autoencoder.encode(data_batch)
     assert embedded.shape == (batch_size, embedding_dim)
+    embedded_solo = autoencoder.encode(data_batch[0])
+    assert embedded_solo.shape == (embedding_dim, )
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, data.shape[1])
+    decoded_solo = autoencoder.decoded(embedded[0])
+    assert decoded_solo.shape == (data.shape[1], )
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
     assert torch.equal(decoded, forwarded)
diff --git a/clustpy/deep/tests/test_utils.py b/clustpy/deep/tests/test_utils.py
index e760938..f73d655 100644
--- a/clustpy/deep/tests/test_utils.py
+++ b/clustpy/deep/tests/test_utils.py
@@ -8,6 +8,7 @@
 from sklearn.mixture import GaussianMixture
 from clustpy.partition import XMeans
 from clustpy.deep.tests._helpers_for_tests import _get_dc_test_data
+from clustpy.deep.neural_networks import ConvolutionalAutoencoder
 
 
 def test_mean_squared_error():
@@ -71,6 +72,15 @@ def test_encode_batchwise():
     desired = np.sum(data, axis=1).reshape((-1, 1))
     desired = np.tile(desired, embedding_size)
     assert np.allclose(encoded, desired, atol=1e-5)
+    # Test for Conv
+    X_images = np.array([[[[11] * 32] * 32, [[12] * 32] * 32, [[13] * 32] * 32],
+                         [[[10] * 32] * 32, [[20] * 32] * 32, [[30] * 32] * 32],
+                         [[[10] * 32] * 32, [[40] * 32] * 32, [[70] * 32] * 32],
+                         [[[1] * 32] * 32, [[1] * 32] * 32, [[1] * 32] * 32]])
+    dataloader_images = _get_test_dataloader(X_images, 2, False, False)
+    autoencoder_images = ConvolutionalAutoencoder(32, [512, 10])
+    encoded_images = encode_batchwise(dataloader_images, autoencoder_images)
+    assert encoded_images.shape == (4, 10)
 
 
 def test_predict_batchwise():
@@ -95,6 +105,15 @@ def test_decode_batchwise():
     autoencoder = _TestAutoencoder(data.shape[1], embedding_size)
     decoded = decode_batchwise(dataloader, autoencoder)
     assert data.shape == decoded.shape
+    # Test for Conv
+    X_images = np.array([[[[11] * 32] * 32, [[12] * 32] * 32, [[13] * 32] * 32],
+                         [[[10] * 32] * 32, [[20] * 32] * 32, [[30] * 32] * 32],
+                         [[[10] * 32] * 32, [[40] * 32] * 32, [[70] * 32] * 32],
+                         [[[1] * 32] * 32, [[1] * 32] * 32, [[1] * 32] * 32]])
+    dataloader_images = _get_test_dataloader(X_images, 2, False, False)
+    autoencoder_images = ConvolutionalAutoencoder(32, [512, 10])
+    decoded_images = decode_batchwise(dataloader_images, autoencoder_images)
+    assert X_images.shape == decoded_images.shape
 
 
 def test_encode_decode_batchwise():
@@ -109,6 +128,16 @@ def test_encode_decode_batchwise():
     desired = np.tile(desired, embedding_size)
     assert np.allclose(encoded, desired, atol=1e-5)
     assert data.shape == decoded.shape
+    # Test for Conv
+    X_images = np.array([[[[11] * 32] * 32, [[12] * 32] * 32, [[13] * 32] * 32],
+                         [[[10] * 32] * 32, [[20] * 32] * 32, [[30] * 32] * 32],
+                         [[[10] * 32] * 32, [[40] * 32] * 32, [[70] * 32] * 32],
+                         [[[1] * 32] * 32, [[1] * 32] * 32, [[1] * 32] * 32]])
+    dataloader_images = _get_test_dataloader(X_images, 2, False, False)
+    autoencoder_images = ConvolutionalAutoencoder(32, [512, 10])
+    encoded_images, decoded_images = encode_decode_batchwise(dataloader_images, autoencoder_images)
+    assert encoded_images.shape == (4, 10)
+    assert X_images.shape == decoded_images.shape
 
 
 def test_int_to_one_hot():
diff --git a/clustpy/partition/specialk.py b/clustpy/partition/specialk.py
index ebdf2ed..efad033 100644
--- a/clustpy/partition/specialk.py
+++ b/clustpy/partition/specialk.py
@@ -7,15 +7,16 @@
 """
 
 import numpy as np
-from scipy.spatial.distance import pdist, squareform
 from sklearn.neighbors import radius_neighbors_graph, kneighbors_graph
 from sklearn.cluster import KMeans
 from sklearn.base import BaseEstimator, ClusterMixin
 from clustpy.utils.checks import check_parameters
 import scipy
+from sklearn.neighbors import NearestNeighbors
 
 
-def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_matrix: str, n_neighbors: int,
+def _specialk(X: np.ndarray, significance: float, n_dimensions: int,
+              similarity_matrix: str | np.ndarray | scipy.sparse.csr_matrix, n_neighbors: int,
               percentage: float, n_cluster_pairs_to_consider: int, max_n_clusters: int,
               random_state: np.random.RandomState, debug: bool) -> (int, np.ndarray):
     """
@@ -29,7 +30,7 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_
         Threshold to decide if the samples originate from a single distribution or two distributions
     n_dimensions : int
         Dimensionality of the embedding
-    similarity_matrix : str
+    similarity_matrix : str | np.ndarray | scipy.sparse.csr_matrix
         Defines the similarity matrix to use. Can be one of the following strings or a numpy array / scipy sparse csr matrix.
         If 'NAM', a neighborhood adjacency matrix is used.
         If 'SAM' a symmetrically normalized adjacency matrix is used
@@ -56,7 +57,7 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_
         The labels as identified by DipMeans,
     """
     assert significance >= 0 and significance <= 1, "significance must be a value in the range [0, 1]"
-    assert percentage >= 0 and percentage <= 1, "percentage must be a value in the range [0, 1]"
+    assert percentage > 0 and percentage <= 1, "percentage must be a value in the range (0, 1]"
     if type(similarity_matrix) is str and similarity_matrix == 'NAM':
         final_similarity_matrix = _get_neighborhood_adjacency_matrix(X, percentage, n_neighbors)
     elif type(similarity_matrix) is str and similarity_matrix == 'SAM':
@@ -73,7 +74,8 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_
     # Initial values
     n_clusters = 2
     stop_search = False
-    best_labels = np.zeros(X.shape[0])
+    best_labels = np.zeros(X.shape[0], dtype=np.int32)
+    log_significance = np.log(significance)
     while n_clusters <= max_n_clusters:
         if debug:
             print("=== n_clusters={0} ===".format(n_clusters))
@@ -105,11 +107,12 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_
             ids_in_cluster_1 = ids_in_each_cluster[c1]
             ids_in_cluster_2 = ids_in_each_cluster[c2]
             # Calculate bound
-            t_total = _zz_top_bound(D, ids_in_cluster_1, ids_in_cluster_2, debug)
+            t_total = _log_zz_top_bound(D, ids_in_cluster_1, ids_in_cluster_2, debug)
             if debug:
                 print("ZZ top:", t_total)
-            if t_total > significance:
+            if t_total > log_significance:
                 # Stop execution -> return n_clusters - 1
+                n_clusters = n_clusters - 1
                 stop_search = True
                 break
         if stop_search:
@@ -120,13 +123,13 @@ def _specialk(X: np.ndarray, significance: float, n_dimensions: int, similarity_
             n_clusters += 1
     # Return number of clusters and labels
     if debug:
-        print("Final n_clusters={0}".format(n_clusters - 1))
-    return n_clusters - 1, best_labels
+        print("Final n_clusters={0}".format(n_clusters))
+    return n_clusters, best_labels
 
 
-def _zz_top_bound(D: np.ndarray, ids_in_cluster_1: np.ndarray, ids_in_cluster_2: np.ndarray, debug: bool) -> float:
+def _log_zz_top_bound(D: np.ndarray, ids_in_cluster_1: np.ndarray, ids_in_cluster_2: np.ndarray, debug: bool) -> float:
     """
-    Calculate the ZZ Top bound
+    Calculate the log ZZ Top bound.
 
     Parameters
     ----------
@@ -154,7 +157,8 @@ def _zz_top_bound(D: np.ndarray, ids_in_cluster_1: np.ndarray, ids_in_cluster_2:
     t = max(t1, t2) - sigma2 * Dj.shape[1]
     if debug:
         print("sigma={0} / t={1}".format(sigma2, t))
-    t_total = Dj.shape[0] * np.exp(-0.5 * t ** 2 / (Dj.shape[1] * sigma2 + t / 3))
+    t_total = np.log(Dj.shape[0]) - 0.5 * t ** 2 / (Dj.shape[1] * sigma2 + t / 3)
+    t_total = min(t_total, 0.0)
     return t_total
 
 
@@ -179,14 +183,15 @@ def _get_neighborhood_adjacency_matrix(X: np.ndarray, percentage: float = 0.99,
         The resulting similarity matrix
     """
     # Get pairwise distances
-    dist_matrix = squareform(pdist(X, 'euclidean'))
-    # Get kNN distances (+1 because self is not included in n_neighbors)
-    knn_distances = np.sort(dist_matrix, axis=1)[:, n_neighbors + 1]
+    neighbors = NearestNeighbors(n_neighbors=n_neighbors)
+    neighbors.fit(X)
+    knn_distances, _ = neighbors.kneighbors()
     # Get knn dist so that more than 'percentage' points have 'n_neighbors' neighbors
-    knn_dist_sorted = np.sort(knn_distances)
-    eps = knn_dist_sorted[int((X.shape[0] - 1) * percentage)]
+    knn_dist_sorted = np.sort(knn_distances[:, -1])
+    percentage_idx = int(np.ceil(X.shape[0] * percentage))
+    eps = knn_dist_sorted[percentage_idx - 1]
     # Get neighbor graph
-    similarity_matrix = radius_neighbors_graph(X, radius=eps)
+    similarity_matrix = radius_neighbors_graph(X, radius=eps, mode="distance", include_self=False)
     return similarity_matrix
 
 
@@ -232,7 +237,7 @@ class SpecialK(ClusterMixin, BaseEstimator):
         Threshold to decide if the samples originate from a single distribution or two distributions (default: 0.01)
     n_dimensions : int
         Dimensionality of the embedding (default: 200)
-    similarity_matrix : str
+    similarity_matrix : str | np.ndarray | scipy.sparse.csr_matrix
         Defines the similarity matrix to use. Can be one of the following strings or a numpy array / scipy sparse csr matrix.
         If 'NAM', a neighborhood adjacency matrix is used.
         If 'SAM' a symmetrically normalized adjacency matrix is used (default: 'NAM')
@@ -267,7 +272,8 @@ class SpecialK(ClusterMixin, BaseEstimator):
     Machine Learning and Knowledge Discovery in Databases: European Conference, ECML PKDD 2019, Würzburg, Germany, September 16–20, 2019, Proceedings, Part I. Springer International Publishing, 2020.
     """
 
-    def __init__(self, significance: float = 0.01, n_dimensions: int = 200, similarity_matrix: str = 'NAM',
+    def __init__(self, significance: float = 0.01, n_dimensions: int = 200,
+                 similarity_matrix: str | np.ndarray | scipy.sparse.csr_matrix = 'NAM',
                  n_neighbors: int = 5, percentage: float = 0.99, n_cluster_pairs_to_consider: int = 10,
                  max_n_clusters: int = np.inf, random_state: np.random.RandomState | int = None, debug: bool = False):
         self.significance = significance
@@ -298,10 +304,11 @@ def fit(self, X: np.ndarray, y: np.ndarray = None) -> 'SpecialK':
             this instance of the SpecialK algorithm
         """
         X, _, random_state = check_parameters(X=X, y=y, random_state=self.random_state)
-        n_clusters, labels = _specialk(X, self.significance, self.n_dimensions, self.similarity_matrix,
+        n_dimensions = min(self.n_dimensions, X.shape[0] - 1)
+        n_clusters, labels = _specialk(X, self.significance, n_dimensions, self.similarity_matrix,
                                        self.n_neighbors, self.percentage, self.n_cluster_pairs_to_consider,
                                        self.max_n_clusters, random_state, self.debug)
         self.n_clusters_ = n_clusters
         self.labels_ = labels
-        self.features_in_ = X.shape[1]
+        self.n_features_in_ = X.shape[1]
         return self
diff --git a/clustpy/partition/tests/test_specialk.py b/clustpy/partition/tests/test_specialk.py
index 202231e..49a0a45 100644
--- a/clustpy/partition/tests/test_specialk.py
+++ b/clustpy/partition/tests/test_specialk.py
@@ -5,7 +5,7 @@
 from clustpy.utils.checks import check_clustpy_estimator
 import pytest
 
-@pytest.mark.skip(reason="There seems to be a non-trivial error. Needs to be fixed.")
+#@pytest.mark.skip(reason="There seems to be a non-trivial error. Needs to be fixed.")
 def test_specialk_estimator():
     check_clustpy_estimator(SpecialK(), ("check_complex_data"))
 

From 5f42ca9399f293ba4278d735f09f6ae88542d124 Mon Sep 17 00:00:00 2001
From: Collin Leiber <collin.leiber@aalto.fi>
Date: Fri, 29 May 2026 10:33:40 +0300
Subject: [PATCH 2/2] fix spelling error in conv ae test

---
 .../tests/test_convolutional_autoencoder.py              | 9 ++++++---
 .../tests/test_feedforward_autoencoder.py                | 2 +-
 clustpy/partition/tests/test_specialk.py                 | 1 -
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py
index 22259b7..4333552 100644
--- a/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py
+++ b/clustpy/deep/neural_networks/tests/test_convolutional_autoencoder.py
@@ -11,6 +11,7 @@ def test_convolutional_autoencoder_resnet18():
     data_batch = torch.Tensor(data[:batch_size])
     embedding_dim = 10
     autoencoder = ConvolutionalAutoencoder(32, [512, embedding_dim])
+    autoencoder.eval()
     # Test encoding
     embedded = autoencoder.encode(data_batch)
     assert embedded.shape == (batch_size, embedding_dim)
@@ -19,7 +20,7 @@ def test_convolutional_autoencoder_resnet18():
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, 3, 32, 32)
-    decoded_solo = autoencoder.decoded(embedded[0])
+    decoded_solo = autoencoder.decode(embedded[0])
     assert decoded_solo.shape == (3, 32, 32)
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
@@ -36,6 +37,7 @@ def test_convolutional_autoencoder_resnet_50():
     data_batch = torch.Tensor(data[:batch_size])
     embedding_dim = 10
     autoencoder = ConvolutionalAutoencoder(32, [2048, embedding_dim], conv_encoder_name="resnet50")
+    autoencoder.eval()
     # Test encoding
     embedded = autoencoder.encode(data_batch)
     assert embedded.shape == (batch_size, embedding_dim)
@@ -44,7 +46,7 @@ def test_convolutional_autoencoder_resnet_50():
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, 3, 32, 32)
-    decoded_solo = autoencoder.decoded(embedded[0])
+    decoded_solo = autoencoder.decode(embedded[0])
     assert decoded_solo.shape == (3, 32, 32)
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
@@ -58,6 +60,7 @@ def test_mixed_convolutional_autoencoder():
     embedding_dim = 10
     autoencoder = ConvolutionalAutoencoder(32, [2048, embedding_dim], fc_decoder_layers=[embedding_dim, 512],
                                            conv_encoder_name="resnet50", conv_decoder_name="resnet18")
+    autoencoder.eval()
     # Test encoding
     embedded = autoencoder.encode(data_batch)
     assert embedded.shape == (batch_size, embedding_dim)
@@ -66,7 +69,7 @@ def test_mixed_convolutional_autoencoder():
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, 3, 32, 32)
-    decoded_solo = autoencoder.decoded(embedded[0])
+    decoded_solo = autoencoder.decode(embedded[0])
     assert decoded_solo.shape == (3, 32, 32)
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
diff --git a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py
index e71fd09..1c675c2 100644
--- a/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py
+++ b/clustpy/deep/neural_networks/tests/test_feedforward_autoencoder.py
@@ -27,7 +27,7 @@ def test_feedforward_autoencoder():
     # Test decoding
     decoded = autoencoder.decode(embedded)
     assert decoded.shape == (batch_size, data.shape[1])
-    decoded_solo = autoencoder.decoded(embedded[0])
+    decoded_solo = autoencoder.decode(embedded[0])
     assert decoded_solo.shape == (data.shape[1], )
     # Test forwarding
     forwarded = autoencoder.forward(data_batch)
diff --git a/clustpy/partition/tests/test_specialk.py b/clustpy/partition/tests/test_specialk.py
index 49a0a45..4201e00 100644
--- a/clustpy/partition/tests/test_specialk.py
+++ b/clustpy/partition/tests/test_specialk.py
@@ -5,7 +5,6 @@
 from clustpy.utils.checks import check_clustpy_estimator
 import pytest
 
-#@pytest.mark.skip(reason="There seems to be a non-trivial error. Needs to be fixed.")
 def test_specialk_estimator():
     check_clustpy_estimator(SpecialK(), ("check_complex_data"))