From e9c659b85813baae0efbdfc06d973808c5f37c56 Mon Sep 17 00:00:00 2001
From: Pascal Weber <pascal.weber@univie.ac.at>
Date: Sun, 11 Jan 2026 13:53:59 +0100
Subject: [PATCH 1/5] Added SHADE

---
 clustpy/deep/__init__.py                |   2 +
 clustpy/deep/dcdist/__init__.py         |  27 +
 clustpy/deep/dcdist/dctree.py           | 857 ++++++++++++++++++++++++
 clustpy/deep/dcdist/dctree_clusterer.py | 228 +++++++
 clustpy/deep/shade.py                   | 508 ++++++++++++++
 5 files changed, 1622 insertions(+)
 create mode 100644 clustpy/deep/dcdist/__init__.py
 create mode 100644 clustpy/deep/dcdist/dctree.py
 create mode 100644 clustpy/deep/dcdist/dctree_clusterer.py
 create mode 100644 clustpy/deep/shade.py

diff --git a/clustpy/deep/__init__.py b/clustpy/deep/__init__.py
index a5b72d0..10561d8 100644
--- a/clustpy/deep/__init__.py
+++ b/clustpy/deep/__init__.py
@@ -9,6 +9,7 @@
 from .aec import AEC
 from .deepect import DeepECT
 from .den import DEN
+from .shade import SHADE
 from ._data_utils import get_dataloader, get_default_augmented_dataloaders
 from ._train_utils import get_trained_network, get_neural_network
 from ._utils import encode_batchwise, decode_batchwise, encode_decode_batchwise, predict_batchwise, detect_device, \
@@ -28,6 +29,7 @@
            'DipEncoder',
            'DeepECT',
            'DEN',
+           'SHADE',
            'get_dataloader',
            'get_default_augmented_dataloaders',
            'get_neural_network'
diff --git a/clustpy/deep/dcdist/__init__.py b/clustpy/deep/dcdist/__init__.py
new file mode 100644
index 0000000..113a855
--- /dev/null
+++ b/clustpy/deep/dcdist/__init__.py
@@ -0,0 +1,27 @@
+from .dctree import (
+    DCTree,
+    calculate_reachability_distance,
+    serialize as serialize_DCTree,
+    serialize_compressed as serialize_DCTree_compressed,
+    save as save_DCTree,
+    deserialize as deserialize_DCTree,
+    deserialize_compressed as deserialize_DCTree_compressed,
+    load as load_DCTree,
+)
+
+
+from .dctree_clusterer import DCTree_Clusterer
+
+__all__ = [
+    ###  DCTree  ###
+    "DCTree",
+    "calculate_reachability_distance",
+    "serialize_DCTree",
+    "serialize_DCTree_compressed",
+    "save_DCTree",
+    "deserialize_DCTree",
+    "deserialize_DCTree_compressed",
+    "load_DCTree",
+    ###  DCTree_Cluster  ###
+    "DCTree_Clusterer",
+]
diff --git a/clustpy/deep/dcdist/dctree.py b/clustpy/deep/dcdist/dctree.py
new file mode 100644
index 0000000..e6299ab
--- /dev/null
+++ b/clustpy/deep/dcdist/dctree.py
@@ -0,0 +1,857 @@
+"""
+@authors:
+Pascal Weber
+"""
+
+# Implementation of the dc-distance with a DCTree by
+# - Author: Pascal Weber
+# - Source: https://github.com/pasiweber/SHADE
+
+# Paper: Connecting the Dots -- Density-Connectivity Distance unifies DBSCAN, k-Center and Spectral Clustering
+# Authors: Anna Beer, Andrew Draganov, Ellen Hohma, Philipp Jahn, Christian M.M. Frey, and Ira Assent
+# Link: https://doi.org/10.1145/3580305.3599283
+
+
+from __future__ import annotations
+
+import gzip
+import numpy as np
+import sys
+
+from collections import deque
+from multiprocessing import cpu_count
+from multiprocessing.pool import ThreadPool
+from sklearn.metrics import pairwise_distances
+from typing import List, Literal, Optional, Sequence, Tuple, Union
+
+
+class DCTree:
+    """
+    The DCTree computes the dc_distances and stores them in a tree structure.
+
+    By using the euler tour of the tree and a sparse table for range minimum queries,
+    the lca-elements in the tree (i.e. the dc_distances) can be computed in O(1) time.
+
+    The function `dc_dist(i, j)` returns the dc_distance between `points[i]` and `points[j]`
+    in O(1) time.
+    The function `dc_distance(X, Y=None, access_method="tree")` returns a dc_distance matrix
+    with the dc_distance from each pair of `points[X]` and `points[Y]`.
+    See the section Functions for more details.
+
+    The DCTree provides `serialize` and `serialize_compressed` functions to serialize the
+    DCTree. With `deserialize` or `deserialize_compressed` the DCTree can be deserialized again.
+
+    The DCTree provides `save` and `load` functions to save / load the DCTree to / from disk.
+
+
+    Parameters
+    ----------
+    points : np.ndarray
+        points, of which the dc_distances should be computed of.
+
+    min_points : int, optional
+        min_points parameter used for the computation of the dc_distances, by default 5.
+
+    access_method: str
+        Default access_method for dc_distances, by default "tree".
+
+    no_fastindex: bool = False
+        No fast index structure is constructed if this parameter is set to `True`.
+        Disables the functions `dc_dist` and `dc_distances` with `access_method = "dc_dist"`.
+        Only useful if you only use the `dc_distance` function with `access_method = "tree"`,
+        e.g. for calculating all pair dc_distances with `dc_dists = dc_tree.dc_distances()`.
+
+    use_less_memory: bool = False
+        Don't precalculate the reachability distance. Saves quadratic RAM usage, but also needs
+        double the time for computing.
+
+
+    Functions
+    ---------
+    DCTree[x] or DCTree[i,j] or DCTree[X,Y]:
+        Returns the _DCNode of given index if `arg` is an integer or a Sequence.
+
+        If DCTree[i,j] is used, the dc_dist of `points[i]` and `points[j]` is returned
+        (i and j are integer).
+
+        If DCTree[X,Y] is used, the dc_dist matrix between each pair of `points[X]` and
+        `points[Y]` is returned (X and Y are np.ndarray or Sequences).
+
+    dc_dist : (self, i: int, j: int) -> distance
+        returns the dc_distance between points[i] and points[j] in O(1) time.
+
+    dc_distances : (self, X = None, Y = None, access_method = `self.accesss_method`) -> np.ndarray
+        Computes the dc_distance matrix between each pair of points[X] and points[Y].
+        If X=None, X=range(n) is used.
+        If Y=None, Y=X is used.
+
+        `access_method` (by default `self.accesss_method`):
+            - "tree":    Traverses the tree in O(n) time (n = len(points)),
+                         no matter the size of X / Y.
+
+            - "dc_dist": Uses the dc_dist function and needs O(k*l) time
+                         (k = len(X), l = len(Y))).
+
+            "dc_dist" is often faster if X and Y are smaller than ~10% of n = len(points).
+
+        Returns dc_dists: ndarray of shape (n_samples_X, n_samples_Y)
+
+
+    serialize : (dc_tree: DCTree) -> str
+        Serializes the DCTree `dc_tree` to a string.
+
+    serialize_compressed : (dc_tree: DCTree) -> bytes
+        Serializes the DCTree `dc_tree` to a compressed byte array.
+
+    save : (dc_tree: DCTree, file_path: str) -> save to disk
+        Saves the DCTree `dc_tree` to disk at `file_path`.
+
+
+    deserialize : (data: str, access_method = "tree", no_fastindex = False, n_jobs = None) -> DCTree
+        Deserializes a string `str` to a DCTree.
+
+    deserialize_compressed : (data: bytes, access_method = "tree", no_fastindex = False, n_jobs = None) -> DCTree
+        Deserializes a compressed byte array `bytes` to a DCTree.
+
+    load : (file_path: str, access_method = "tree", no_fastindex = False, n_jobs = None) -> DCTree
+        Loads a DCTree from disk at `file_path`.
+
+
+    Examples
+    --------
+    >>> import dcdist
+    >>> points = np.array([[1,6],[2,6],[6,2],[14,17],[123,3246],[52,8323],[265,73]])
+    >>> dc_tree = dcdist.DCTree(points, 5)
+    >>> print(dc_tree.dc_dist(2,5))
+    >>> print(dc_tree.dc_distances(range(len(points))))
+    >>> print(dc_tree.dc_distances([0,1], [2,3]))
+
+    >>> s = dcdist.serialize(dc_tree)
+    >>> dc_tree_new = dcdist.deserialize(s)
+
+    >>> b = dcdist.serialize_compressed(dc_tree)
+    >>> dc_tree_new = dcdist.deserialize_compressed(b)
+
+    >>> dcdist.save(dc_tree, "./data.dctree")
+    >>> dc_tree_new = dcdist.load("./data.dctree")
+
+
+    References
+    ----------
+    Anna Beer, Andrew Draganov, Ellen Hohma, Philipp Jahn, Christian M.M. Frey, and Ira Assent.
+    "Connecting the Dots -- Density-Connectivity Distance unifies DBSCAN, k-Center and Spectral
+    Clustering." KDD '23. 2023.
+    """
+
+    n: int  # len(points)
+    min_points: int
+    root: _DCNode
+
+    euler: List[_DCNode]
+    level: List[int]
+    f_occur: List[int]
+    level_table: _SparseTable
+
+    access_method: str
+    no_fastindex: bool
+    n_jobs: int
+    no_gil: bool
+
+    def __init__(
+        self,
+        points: np.ndarray,
+        min_points: int = 5,
+        min_points_mr: Optional[int] = None,
+        access_method: str = "tree",
+        no_fastindex: bool = False,
+        use_less_memory: bool = False,
+        n_jobs: Optional[int] = None,
+        precomputed=False,
+    ):
+        self.n = points.shape[0]
+        self.min_points = min_points
+        self.access_method = access_method
+        self.no_fastindex = no_fastindex
+
+        if min_points_mr is None:
+            min_points_mr = min_points
+
+        if n_jobs is None or n_jobs == 0:
+            self.n_jobs = cpu_count()
+        elif n_jobs < 0:
+            self.n_jobs = max(cpu_count() + 1 + n_jobs, 1)
+        else:
+            self.n_jobs = n_jobs
+        self.no_gil = sys.flags.nogil if hasattr(sys.flags, "nogil") else False
+
+        if use_less_memory and not precomputed:
+            mst_edges = self._get_mst_edges(points, use_less_memory=True)
+        else:
+            if not precomputed:
+                reach_dists = calculate_reachability_distance(points, min_points_mr)
+            else:
+                reach_dists = points
+            mst_edges = self._get_mst_edges(reach_dists)
+            if not precomputed:
+                del reach_dists
+        mst_edges.sort(order="dist")
+        self.root = self._build_tree(mst_edges)
+        del mst_edges
+
+        if not self.no_fastindex:
+            self._init_fast_index()
+
+    def _init_fast_index(self):
+        n_nodes = 2 * self.n - 1
+        self.euler = [self.root] * (2 * n_nodes - 1)
+        self.level = [0] * (2 * n_nodes - 1)
+        self.f_occur = [-1] * n_nodes
+        self._euler_tour(self.root)
+        self.level_table = _SparseTable(self.level)
+
+    def __getitem__(
+        self,
+        arg: Union[
+            Union[int, slice, Sequence[int], np.ndarray],
+            Union[
+                Tuple[int, int],
+                Tuple[int, np.ndarray, Sequence[int]],
+                Tuple[np.ndarray, Sequence[int], int],
+                Tuple[np.ndarray, Sequence[int], np.ndarray, Sequence[int]],
+            ],
+        ],
+    ) -> Union[Union[_DCNode, List[_DCNode]], Union[float, np.ndarray]]:
+        """
+        Returns the _DCNode of given index if `arg` is an integer or a Sequence.
+
+        If DCTree[i,j] is used, the dc_dist of `points[i]` and `points[j]` is returned
+        (i and j are integer).
+
+        If DCTree[X,Y] is used, the dc_dist matrix between each pair of `points[X]` and
+        `points[Y]` is returned (X and Y are np.ndarray or Sequences).
+        """
+
+        index_error_msg = f"`{arg}` needs to be an integer, Sequence, np.ndarray, tuple of integer, or tuple of np.ndarray / Sequence!"
+
+        if isinstance(arg, tuple):
+            if len(arg) != 2:
+                raise IndexError(index_error_msg)
+            (i, j) = arg
+            if isinstance(i, int) and isinstance(j, int):
+                return self.dc_dist(i, j)
+            if isinstance(i, int) and isinstance(j, (np.ndarray, Sequence)):
+                if isinstance(j, np.ndarray):
+                    j = j.flatten()
+                return self.dc_distances([i], j)
+            if isinstance(i, (np.ndarray, Sequence)) and isinstance(j, int):
+                if isinstance(i, np.ndarray):
+                    i = i.flatten()
+                return self.dc_distances(i, [j])
+            elif isinstance(i, (np.ndarray, Sequence)) and isinstance(j, (np.ndarray, Sequence)):
+                if isinstance(i, np.ndarray):
+                    i = i.flatten()
+                if isinstance(j, np.ndarray):
+                    j = j.flatten()
+                return self.dc_distances(i, j)
+            else:
+                raise IndexError(index_error_msg)
+
+        elif isinstance(arg, int):
+            return self.euler[self.f_occur[arg]]
+        elif isinstance(arg, (Sequence, np.ndarray)):
+            return [self.euler[self.f_occur[i]] for i in arg]
+        elif isinstance(arg, slice):
+            return [self.euler[i] for i in self.f_occur[arg]]
+
+        else:
+            raise IndexError(index_error_msg)
+
+    def __repr__(self):
+        if self.root is None:
+            return ""
+
+        # pointer_right = "└──"
+        # pointer_left = "├──" if self.root.right else "└──"
+        pointer_right = "   "
+        pointer_left = "   " if self.root.right else "   "
+        return (
+            f"{self.root}"
+            f"{self.__repr__help(self.root.left, pointer_left, '', self.root.right is not None)}"
+            f"{self.__repr__help(self.root.right, pointer_right, '', False)}"
+        )
+
+    def __repr__help(self, node: Optional[_DCNode], pointer: str, padding: str, has_right_sibling: bool):
+        if node is None:
+            return ""
+
+        # padding_for_both = padding + ("|  " if has_right_sibling else "   ")
+        # pointer_right = "└──"
+        # pointer_left = "├──" if node.right else "└──"
+        padding_for_both = padding + ("   " if has_right_sibling else "   ")
+        pointer_right = "   "
+        pointer_left = "   " if node.right else "   "
+        return (
+            f"\n   {padding.replace('|', ' ')}// #region"
+            f"\n{padding}{pointer}{node}"
+            f"{self.__repr__help(node.left, pointer_left, padding_for_both, node.right is not None)}"
+            f"{self.__repr__help(node.right, pointer_right, padding_for_both, False)}"
+            f"\n   {padding.replace('|', ' ')}// #endregion"
+        )
+
+    def dc_dist(self, i: int, j: int) -> float:
+        """Returns the dc_distance from points[i] to points[j] in O(1) time."""
+        if i == j:
+            return 0
+        return self._lca(i, j).dist
+
+    def _lca(self, i: int, j: int) -> _DCNode:
+        first_i = self.f_occur[i]
+        first_j = self.f_occur[j]
+        if first_i > first_j:
+            first_i, first_j = first_j, first_i
+        return self.euler[self.level_table.query(first_i, first_j)]
+
+    def dc_distances(
+        self,
+        X: Union[Sequence[int], np.ndarray, None] = None,
+        Y: Union[Sequence[int], np.ndarray, None] = None,
+        access_method: Optional[str] = None,
+    ) -> np.ndarray:
+        """
+        Computes the dc_distance matrix between each pair of points[X] and points[Y].
+        If X=None, X=range(n) is used.
+        If Y=None, Y=X is used.
+
+        `access_method` (by default `self.access_method`):
+            - "tree":    traverses the tree in O(n) time (n = len(points)),
+                         no matter the size of X / Y.
+            - "dc_dist": uses the dc_dist function and needs O(k*l) time
+                         (k = len(X), l = len(Y))).
+
+            "dc_dist" is often faster if X and Y are smaller than ~10% of n = len(points).
+
+        Returns dc_dists: ndarray of shape (n_samples_X, n_samples_Y)
+        """
+
+        if X is None:
+            X = range(self.n)
+
+        if Y is None:
+            Y = X
+
+        if access_method is None:
+            access_method = self.access_method
+
+        if access_method == "dc_dist" and self.no_fastindex:
+            print("No fastindex computed. Fallback to access_method: `tree`.")
+            access_method = "tree"
+
+        if access_method == "dc_dist" and not self.no_fastindex:
+            dc_dists = np.zeros((len(X), len(Y)))
+
+            for i in range(len(X)):
+                for j in range(i + 1 if X is Y else 0, len(Y)):
+                    if X[i] != Y[j]:
+                        dc_dists[i, j] = self.dc_dist(X[i], Y[j])
+            if X is Y:
+                dc_dists = dc_dists + dc_dists.T
+            return dc_dists
+
+        elif access_method == "tree":
+            dc_dists = np.zeros((len(X), len(Y)))
+
+            idx_rev_X = np.full(self.n, -1)
+            idx_rev_X[X] = range(len(X))
+
+            if X is Y:
+                idx_rev_Y = idx_rev_X
+            else:
+                idx_rev_Y = np.full(self.n, -1)
+                idx_rev_Y[Y] = range(len(Y))
+
+            inodes = self.get_internal_nodes()
+            inodes_start = 0
+            inodes_end = len(inodes)
+
+            n_jobs = self.n_jobs if self.no_gil else 4
+            pool = ThreadPool(n_jobs)
+            n_inodes = self.n - 1
+            chunk_size = int(np.ceil(n_inodes / n_jobs))
+            start_points = range(inodes_start, inodes_end, chunk_size)
+
+            def func(start):
+                for i in range(start, min(start + chunk_size, inodes_end)):
+                    inode = inodes[i]
+                    # inode always has left and right node
+                    i_leaves = inode.left.leaves
+                    j_leaves = inode.right.leaves
+
+                    (i_, j_) = (idx_rev_X[i_leaves], idx_rev_Y[j_leaves])
+                    (i_, j_) = (i_[i_ != -1], j_[j_ != -1])
+                    dc_dists[(i_[:, np.newaxis], j_[np.newaxis, :])] = inode.dist
+
+                    if X is Y:
+                        dc_dists[(j_[:, np.newaxis], i_[np.newaxis, :])] = inode.dist
+                    else:
+                        (i_, j_) = (idx_rev_Y[i_leaves], idx_rev_X[j_leaves])
+                        (i_, j_) = (i_[i_ != -1], j_[j_ != -1])
+                        dc_dists[(j_[:, np.newaxis], i_[np.newaxis, :])] = inode.dist
+
+            pool.map(func, start_points)
+            return dc_dists
+
+        raise ValueError(f"'{access_method}' is no valid `access_method`")
+
+    def _get_mst_edges(self, dist_matrix: np.ndarray, use_less_memory: bool = False) -> np.ndarray:
+        """Prim's algorithm to build up the minimum spanning tree in O(n^2) time."""
+        # dist_matrix are the points, if use_less_memory=True
+        n = self.n
+        nodes_min_dist = np.full(n, np.inf)
+        parent = np.full(n, 0)
+        not_in_mst = np.full(n, True)
+        u = 0  # Start node
+        nodes_min_dist[u] = 0
+        not_in_mst[u] = False
+        mst_edges = np.empty((n - 1), dtype=([("i", int), ("j", int), ("dist", float)]))
+
+        if use_less_memory:
+            reach_dists = np.empty((dist_matrix.shape[0]))
+            for i in range(dist_matrix.shape[0]):
+                eucl_dists = np.linalg.norm(dist_matrix - dist_matrix[i], axis=1)
+                reach_dists[i] = np.max(np.partition(eucl_dists, self.min_points)[: self.min_points])
+
+        for i in range(n - 1):
+            if use_less_memory:
+                dist_u = np.linalg.norm(dist_matrix - dist_matrix[u], axis=1)  # Euclidean distances
+                dist_u = np.maximum(dist_u, reach_dists[u])  # reachability distance of i
+                dist_u = np.maximum(dist_u, reach_dists)  # reachability distance of all points
+
+                v = np.where(not_in_mst & (dist_u < nodes_min_dist))[0]
+                nodes_min_dist[v] = dist_u[v]
+            else:
+                v = np.where(not_in_mst & (dist_matrix[u] < nodes_min_dist))[0]
+                nodes_min_dist[v] = dist_matrix[u, v]
+            parent[v] = u
+
+            arg = np.argmin(nodes_min_dist[not_in_mst])
+            u = np.arange(n)[not_in_mst][arg]
+            mst_edges[i] = (parent[u], u, nodes_min_dist[u])
+            not_in_mst[u] = False
+
+        return mst_edges
+
+    def _build_tree(self, mst_edges: np.ndarray) -> _DCNode:
+        """Kruskal's algorithm to build up the DCTree with the precomputed
+        and sorted mst_edges in O(n) time."""
+        union_find = _UnionFind(self.n)
+        node = _DCNode(id=0, dist=0.0, leaves=[0])
+        idx = self.n
+        k = len(mst_edges)
+        for i, j, dist in mst_edges:
+            i_root = union_find.find(i)
+            j_root = union_find.find(j)
+            node = _DCNode(
+                id=idx,
+                dist=dist,
+                k=-1 if (len(i_root.leaves) + len(j_root.leaves)) < self.min_points else k,
+                left=i_root,
+                right=j_root,
+                leaves=i_root.leaves + j_root.leaves,
+            )
+            i_root.parent = node
+            j_root.parent = node
+            union_find.union(i, j, node)
+            idx += 1
+            if not node.k == -1:
+                k -= 1
+        return node
+
+    def _euler_tour(self, root: _DCNode):
+        """Euler tour to get the euler, level, and f_occur lists in O(n) time."""
+        DOWN, UP = 0, 1
+        stack: deque[Tuple[_DCNode, int, Literal[0, 1]]] = deque([(root, 0, DOWN)])  # (node, level, DOWN / UP)
+
+        pos = 0
+        while len(stack):
+            (node, level, status) = stack.pop()
+
+            if status == DOWN:
+                if self.f_occur[node.id] == -1:
+                    self.f_occur[node.id] = pos
+
+                self.euler[pos] = node
+                self.level[pos] = level
+                pos += 1
+
+                if node.right:
+                    stack.append((node, level, UP))
+                    stack.append((node.right, level + 1, DOWN))
+                if node.left:
+                    stack.append((node, level, UP))
+                    stack.append((node.left, level + 1, DOWN))
+
+            elif status == UP:
+                self.euler[pos] = node
+                self.level[pos] = level
+                pos += 1
+
+    def get_internal_nodes(self):
+        nodes = []
+        stack: deque[_DCNode] = deque([self.root])
+
+        while len(stack):
+            node = stack.pop()
+
+            if node and node.left and node.right:
+                nodes.append(node)
+
+            if node.left:
+                stack.append(node.left)
+
+            if node.right:
+                stack.append(node.right)
+        return nodes
+
+    def traverse_until_k(self, k):
+        from queue import PriorityQueue
+
+        result_nodes = set([self.root])
+        if k == 1:
+            return result_nodes
+
+        stack = PriorityQueue()
+        stack.put((-self.root.dist, self.root, self.root))
+        while not stack.empty():
+            _, node, parent_node = stack.get()
+
+            if node is None:
+                return
+
+            if node.k >= 0:
+                if node.left.k != -1:
+                    if node.left.k != -1 and node.right.k != -1:
+                        result_nodes.discard(parent_node)
+                        result_nodes.discard(node)
+                        result_nodes.add(node.left)
+                    if node.parent.left.k == -1 or node.parent.right.k == -1:
+                        stack.put((-node.left.dist, node.left, parent_node))
+                    else:
+                        stack.put((-node.left.dist, node.left, node))
+                if node.right.k != -1:
+                    if node.left.k != -1 and node.right.k != -1:
+                        result_nodes.discard(parent_node)
+                        result_nodes.discard(node)
+                        result_nodes.add(node.right)
+                    if node.parent.left.k == -1 or node.parent.right.k == -1:
+                        stack.put((-node.right.dist, node.right, parent_node))
+                    else:
+                        stack.put((-node.right.dist, node.right, node))
+
+            if len(result_nodes) >= k:
+                break
+
+        return result_nodes
+
+    def get_k_center(self, k):
+        nodes = self.traverse_until_k(k)
+        labels = np.full(self.n, -1)
+        for i, node in enumerate(nodes):
+            labels[np.array(node.leaves)] = i
+        return labels
+
+    def get_eps_for_k(self, k, eps=-3e-12):
+        nodes = self.traverse_until_k(k)
+        min_eps = np.inf
+        for node in nodes:
+            min_eps = min(min_eps, node.parent.dist)
+        return min_eps + eps
+
+
+def _serialize(root: _DCNode):
+    tree_list = []
+    stack: deque[Optional[_DCNode]] = deque([root])
+    while len(stack):
+        node = stack.pop()
+        if node is None:
+            tree_list.append("/")
+        elif node.left is None and node.right is None:
+            tree_list.append(f"{node.id}|{node.dist}")
+        else:
+            tree_list.append(f"'{node.id}|{node.dist}")
+            stack.append(node.right)
+            stack.append(node.left)
+    return ",".join(tree_list)
+
+
+def serialize(dc_tree: DCTree) -> str:
+    """Serializes the DCTree `dc_tree` to a string."""
+
+    sep = "<\1dc_tree>"
+    # Tree structure, min_points, n_jobs, no_gil
+    return _serialize(dc_tree.root) + sep + str(dc_tree.min_points)
+
+
+def serialize_compressed(dc_tree: DCTree) -> bytes:
+    """Serializes the DCTree `dc_tree` to a gzip-compressed byte array."""
+    data = serialize(dc_tree)
+    byte_data = bytes(data, "utf-8")
+    return gzip.compress(byte_data)
+
+
+def save(dc_tree: DCTree, file_path: str) -> None:
+    """Saves the DCTree `dc_tree` to disk at `file_path`."""
+    byte_data = serialize_compressed(dc_tree)
+    with open(file_path, "wb") as file:
+        file.write(byte_data)
+
+
+def _deserialize(data: List[str]) -> _DCNode:
+    id, dist = data[0].split("|")
+    root = _DCNode(id=int(id[1:]), dist=float(dist), leaves=[])
+
+    DOWN, UP = 0, 1
+    stack: deque[Literal[0, 1]] = deque([UP, DOWN, DOWN])
+    nodes: deque[_DCNode] = deque([root])
+    res: deque[Optional[_DCNode]] = deque([])
+
+    pos = 0
+    while len(stack):
+        status = stack.pop()
+
+        if status == DOWN:
+            pos += 1
+            if data[pos] == "/":
+                res.append(None)
+                continue
+            id, dist = data[pos].split("|")
+            if id[0] != "'":
+                res.append(_DCNode(id=int(id), dist=0, leaves=[int(id)]))
+                continue
+            inode = _DCNode(id=int(id[1:]), dist=float(dist), leaves=[])
+            nodes.append(inode)
+            stack.extend([UP, DOWN, DOWN])
+
+        elif status == UP:
+            node = nodes.pop()
+            node.right = res.pop()
+            node.left = res.pop()
+            res.append(node)
+            node.leaves = node.left.leaves + node.right.leaves
+    return root
+
+
+def deserialize(
+    data: str,
+    access_method: Optional[str] = None,
+    no_fastindex: bool = False,
+    n_jobs: Optional[int] = None,
+) -> DCTree:
+    """Deserializes a string `str` to a DCTree."""
+    dc_tree = object.__new__(DCTree)
+
+    sep = "<\1dc_tree>"
+    # Tree structure, min_points, n_jobs, no_gil
+    (tree_data, min_points) = data.split(sep)
+    dc_tree.min_points = int(min_points)
+    dc_tree.access_method = access_method if access_method is not None else "tree"
+    dc_tree.n_jobs = n_jobs if n_jobs else cpu_count()
+    dc_tree.no_gil = sys.flags.nogil if hasattr(sys.flags, "nogil") else False
+
+    tree_data_list = tree_data.split(",")
+    dc_tree.root = _deserialize(tree_data_list)
+    dc_tree.n = (len(tree_data_list) + 1) // 2
+
+    dc_tree.no_fastindex = no_fastindex
+    if not dc_tree.no_fastindex:
+        dc_tree._init_fast_index()
+    return dc_tree
+
+
+def deserialize_compressed(
+    compressed_data: bytes,
+    access_method: Optional[str] = None,
+    no_fastindex: bool = False,
+    n_jobs: Optional[int] = None,
+) -> DCTree:
+    """Deserializes a compressed byte array `bytes` to a DCTree."""
+    byte_data = gzip.decompress(compressed_data)
+    data = str(byte_data, "utf-8")
+    return deserialize(data, access_method, no_fastindex, n_jobs=n_jobs)
+
+
+def load(
+    file_path: str,
+    access_method: Optional[str] = None,
+    no_fastindex: bool = False,
+    n_jobs: Optional[int] = None,
+) -> DCTree:
+    """Loads a DCTree from disk at `file_path`."""
+    file = open(file_path, "rb")
+    byte_data = file.read()
+    file.close()
+    return deserialize_compressed(byte_data, access_method, no_fastindex, n_jobs=n_jobs)
+
+
+def calculate_reachability_distance(
+    points: np.ndarray, min_points: int = 5, n_jobs: Optional[int] = None
+) -> np.ndarray:
+    """
+    Calculates the reachability distance of points using the min_points threshold in O(n^2) time.
+
+    Raises a ValueError if min_points is larger than the number of points.
+    """
+
+    if min_points > points.shape[0]:
+        raise ValueError(f"Min points ({min_points}) can't exceed the size of the dataset ({points.shape[0]})")
+
+    eucl_dists = pairwise_distances(points, metric="euclidean", n_jobs=n_jobs)
+
+    if min_points > 1:
+        # Get reachability for each point with respect to min_points parameter
+        reach_dists = np.empty((points.shape[0]))
+        for i in range(points.shape[0]):
+            reach_dists[i] = np.max(np.partition(eucl_dists[i], min_points)[:min_points])
+        np.maximum(eucl_dists, reach_dists[np.newaxis, :], eucl_dists)
+        np.maximum(eucl_dists, reach_dists[:, np.newaxis], eucl_dists)
+        np.fill_diagonal(eucl_dists, 0)
+    return eucl_dists
+
+
+class _DCNode:
+    id: int
+    dist: float
+    leaves: List[int]
+    left: Optional[_DCNode]
+    right: Optional[_DCNode]
+    parent: _DCNode
+    k: int
+
+    def __init__(
+        self,
+        id: int,
+        dist: float,
+        leaves: List[int],
+        left: Optional[_DCNode] = None,
+        right: Optional[_DCNode] = None,
+        parent=None,
+        k=-1,
+    ):
+        self.id = id
+        self.dist = dist
+        self.leaves = leaves
+        self.left = left
+        self.right = right
+        if not parent:
+            self.parent = self
+        else:
+            self.parent = parent
+        self.k = k
+
+    def __repr__(self):
+        return f"DCNode #{self.id} ({self.dist}) - {self.k}"
+
+    def __lt__(self, other):
+        return self.dist < other.dist
+
+
+class _UnionFind:
+    """
+    UnionFind structure which provides the functions `find` and `union` with amortized
+    time complexity of O(α(n)), where α(n) is the inverse Ackermann function.
+    """
+
+    root: List[_DCNode]
+    parent: List[int]
+    rank: List[int]
+
+    def __init__(self, n: int):
+        self.root = [_DCNode(id=i, dist=0, leaves=[i]) for i in range(n)]
+        self.parent = list(range(n))
+        self.rank = [1] * n
+
+    def find(self, x: int) -> _DCNode:
+        """Finds the representative node of x."""
+        return self.root[self._find(x)]
+
+    def _find(self, x: int) -> int:
+        """Finds the representative of x."""
+        parent = self.parent[x]
+        if parent != x:
+            parent = self._find(parent)
+            self.parent[x] = parent
+        return parent
+
+    def union(self, x: int, y: int, node: _DCNode):
+        """Union the set which contains x with the set which contains y."""
+        xset = self._find(x)
+        yset = self._find(y)
+        if xset == yset:
+            return
+
+        # Put smaller ranked item under bigger ranked item if ranks are different
+        if self.rank[xset] < self.rank[yset]:
+            self.parent[xset] = yset
+            self.root[yset] = node
+        elif self.rank[xset] > self.rank[yset]:
+            self.parent[yset] = xset
+            self.root[xset] = node
+
+        # If ranks are same, then move y under x (doesn't matter which one goes where)
+        # and increment rank of x's tree
+        else:
+            self.parent[yset] = xset
+            self.root[xset] = node
+            self.rank[xset] = self.rank[xset] + 1
+
+
+class _SparseTable:
+    """
+    SparseTable structure which provides the function `query` which finds the index of the
+    minimum value within the range [l,r] in the array `arr`.
+    Needs O(n * logn) time and storage to precompute and O(1) for the query.
+    """
+
+    # arr: List[int]
+    arr: np.ndarray
+    # sparse_table: List[List[int]]
+    sparse_table: np.ndarray
+    pow_2: List[int]
+    log_2: List[int]
+
+    def __init__(self, arr: List[int]):
+        self.arr = np.asarray(arr, dtype=np.int64)
+        n = len(arr)
+        log_n = n.bit_length() - 1
+        # self.sparse_table = [[-1] * log_n for _ in range(n - 1)]
+        self.sparse_table = np.full((n, log_n), -1, dtype=np.int64)
+
+        self.pow_2 = [1 << i for i in range(log_n + 1)]
+        self.log_2 = [i.bit_length() - 1 for i in range(n)]
+
+        # for i in range(0, n - 1):
+        #     self.sparse_table[i][0] = i if self.arr[i] < self.arr[i + 1] else i + 1
+        idx = np.arange(n - 1, dtype=np.int64)
+        self.sparse_table[: n - 1, 0] = np.where(self.arr[idx] < self.arr[idx + 1], idx, idx + 1)
+
+        for j in range(1, log_n):
+            step = self.pow_2[j]  # 2**j
+            limit = n - self.pow_2[j + 1] + 1  # number of start positions
+            left_idx = self.sparse_table[:limit, j - 1]  # L for i = 0..limit‑1
+            right_idx = self.sparse_table[step : step + limit, j - 1]  # R for i = 0..limit‑1
+            choose_left = self.arr[left_idx] <= self.arr[right_idx]
+
+            self.sparse_table[:limit, j] = np.where(choose_left, left_idx, right_idx)
+
+            # for i in range(n - self.pow_2[j + 1] + 1):
+            #     L = self.sparse_table[i][j - 1]
+            #     R = self.sparse_table[i + step][j - 1]
+            #     self.sparse_table[i][j] = L if arr[L] <= arr[R] else R
+
+    def query(self, l: int, r: int) -> int:
+        # assert L > R, f"L value ({L}) needs to be larger than R value ({R})"
+        if l == r:
+            return l
+        k = self.log_2[r - l + 1]
+        l_k = self.sparse_table[l][k - 1]
+        r_k = self.sparse_table[r - self.pow_2[k] + 1][k - 1]
+        return l_k if self.arr[l_k] <= self.arr[r_k] else r_k
diff --git a/clustpy/deep/dcdist/dctree_clusterer.py b/clustpy/deep/dcdist/dctree_clusterer.py
new file mode 100644
index 0000000..e89b7c8
--- /dev/null
+++ b/clustpy/deep/dcdist/dctree_clusterer.py
@@ -0,0 +1,228 @@
+"""
+@authors:
+Pascal Weber
+"""
+
+# - Author: Pascal Weber
+# - Source: https://github.com/pasiweber/SHADE
+
+from __future__ import annotations
+
+import numpy as np
+
+from collections import deque
+from .dctree import DCTree, _DCNode as _DCDist_DCNode
+from typing import List, Literal, Optional, Sequence, Tuple, Union
+
+from sklearn.base import ClusterMixin, BaseEstimator
+
+
+class DCTree_Clusterer(ClusterMixin, BaseEstimator):
+    """
+    Parameters
+    ----------
+    points : np.ndarray
+        points, of which the dc_distances should be computed of.
+
+    min_points : int, optional
+        min_points parameter used for the computation of the dc_distances, by default 5.
+    """
+
+    min_points: int
+    increase_inter_cluster_distance: bool
+
+    dc_tree: DCTree
+    labels_: np.ndarray
+
+    def __init__(
+        self,
+        min_points: int = 5,
+        increase_inter_cluster_distance: bool = True,
+    ):
+        self.min_points = min_points
+        self.increase_inter_cluster_distance = increase_inter_cluster_distance
+
+    def fit(self, X, y=None):
+        self.dc_tree = DCTree(X, min_points=self.min_points)
+        self.labels_ = self.clustering(self.dc_tree)
+        self.n_clusters = len(np.where(np.unique(self.labels_) >= 0)[0])
+
+        if self.increase_inter_cluster_distance:
+            self._increase_inter_cluster_distance(self.dc_tree.root)
+
+        return self
+
+    def _condense_tree(self, node: Optional[_DCDist_DCNode]) -> Optional[_DCNode]:
+        if node is None:
+            return None
+
+        if node.left is None:
+            right = self._condense_tree(node.right)
+            if right is not None:
+                right.id = node.id
+                right.leaves = node.leaves
+            return right
+
+        if node.right is None:
+            left = self._condense_tree(node.left)
+            if left is not None:
+                left.id = node.id
+                left.leaves = node.leaves
+            return left
+
+        left_size = len(node.left.leaves)
+        right_size = len(node.right.leaves)
+
+        if right_size >= self.min_points and left_size >= self.min_points:
+            node_left = self._condense_tree(node.left)
+            node_right = self._condense_tree(node.right)
+            if node_left is None and node_right is None:
+                return _DCNode(node.id, node.dist, node.leaves, node_left, node_right)
+            if node_left is None:
+                return node_right
+            if node_right is None:
+                return node_left
+            return _DCNode(node.id, node.dist, node.leaves, node_left, node_right)
+
+        if right_size < self.min_points and left_size < self.min_points:
+            return None
+
+        if right_size < self.min_points:
+            left = self._condense_tree(node.left)
+            if left is not None:
+                left.id = node.id
+                left.leaves = node.leaves
+            return left
+
+        if left_size < self.min_points:
+            right = self._condense_tree(node.right)
+            if right is not None:
+                right.id = node.id
+                right.leaves = node.leaves
+            return right
+
+    def _calculate_stability(self, node: _DCNode):
+        if node.left is None and node.right is None:
+            return
+
+        if node.left is not None:
+            self._calculate_stability(node.left)
+            node.left.stability = (1.0 / node.left.dist - 1.0 / node.dist) * len(node.left.leaves)
+
+        if node.right is not None:
+            self._calculate_stability(node.right)
+            node.right.stability = (1.0 / node.right.dist - 1.0 / node.dist) * len(
+                node.right.leaves
+            )
+
+    def _stable_node_flagging(self, node: Optional[_DCNode]):
+        if node is None:
+            return
+
+        node.is_stable = False
+
+        if node.left is None and node.right is None:
+            node.is_stable = True
+            return
+
+        if node.left is None:
+            self._stable_node_flagging(node.right)
+            if node.stability < node.right.stability:
+                node.stability = node.right.stability
+                node.is_stable = False
+                return
+            else:
+                node.is_stable = True
+                return
+
+        if node.right is None:
+            self._stable_node_flagging(node.left)
+            if node.stability < node.left.stability:
+                node.stability = node.left.stability
+                node.is_stable = False
+                return
+            else:
+                node.is_stable = True
+                return
+
+        self._stable_node_flagging(node.right)
+        self._stable_node_flagging(node.left)
+
+        if node.stability < node.left.stability + node.right.stability:
+            node.stability = node.left.stability + node.right.stability
+            node.is_stable = False
+            return
+        else:
+            # Node is a true cluster
+            node.is_stable = True
+            return
+
+    def _get_stable_clusters(self, node: Optional[_DCNode]) -> list[_DCNode]:
+        if node is None:
+            return []
+
+        if node.is_stable:
+            self.dc_tree[node.id].is_stable = True
+            return [node]
+
+        else:
+            return self._get_stable_clusters(node.left) + self._get_stable_clusters(node.right)
+
+    def _increase_inter_cluster_distance(self, root: _DCDist_DCNode):
+        queue = deque()
+        queue.append(root)
+        while len(queue):
+            node = queue.popleft()
+
+            if node.left is None and node.right is None:
+                continue
+
+            if hasattr(node, "is_stable") and node.is_stable:
+                continue
+
+            node.dist += max(
+                node.left.dist * len(node.left.leaves), node.right.dist * len(node.right.leaves)
+            )
+            # node.dist += max(
+            #     (node.dist - node.left.dist) * len(node.left.leaves), (node.dist - node.right.dist) * len(node.right.leaves)
+            # )
+            queue.append(node.left)
+            queue.append(node.right)
+
+    def stable_nodes(self, dc_tree: DCTree) -> list[_DCNode]:
+        new_root = self._condense_tree(dc_tree.root)
+        if new_root is None:
+            return []
+
+        self._calculate_stability(new_root)
+        new_root.stability = 0
+
+        self._stable_node_flagging(new_root)
+        new_root.is_stable = False
+
+        stable_nodes = self._get_stable_clusters(new_root)
+
+        stable_nodes_ = []
+        for node in stable_nodes:
+            if len(node.leaves) >= self.min_points:
+                stable_nodes_.append(node)
+
+        return stable_nodes_
+
+    def clustering(self, dc_tree: DCTree):
+        stable_nodes = self.stable_nodes(dc_tree)
+
+        labels = np.full(dc_tree.n, -1)
+
+        idx = 0
+        for i in range(len(stable_nodes)):
+            labels[stable_nodes[i].leaves] = idx
+            idx += 1
+        return labels
+
+
+class _DCNode(_DCDist_DCNode):
+    is_stable: Optional[Union[Literal[True], Literal[False]]]
+    stability: Optional[float]
+    left: _DCNode
+    right: _DCNode
diff --git a/clustpy/deep/shade.py b/clustpy/deep/shade.py
new file mode 100644
index 0000000..28a389b
--- /dev/null
+++ b/clustpy/deep/shade.py
@@ -0,0 +1,508 @@
+"""
+@authors:
+Pascal Weber
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import torch
+import sys
+
+from .dcdist import DCTree, DCTree_Clusterer
+from clustpy.deep._abstract_deep_clustering_algo import _AbstractDeepClusteringAlgo
+from clustpy.deep.neural_networks._abstract_autoencoder import _AbstractAutoencoder
+from clustpy.deep.neural_networks import FeedforwardAutoencoder
+from clustpy.deep._data_utils import get_dataloader
+from clustpy.deep._train_utils import get_trained_network
+from clustpy.deep._utils import (
+    detect_device,
+    set_torch_seed,
+    squared_euclidean_distance,
+    encode_batchwise,
+    run_initial_clustering,
+)
+from sklearn.utils import check_random_state
+from tqdm import tqdm
+from typing import Callable, Optional, Tuple
+
+import matplotlib.pyplot as plt
+
+
+sys.setrecursionlimit(1000000000)
+
+
+class SHADE(_AbstractDeepClusteringAlgo):
+    """
+    A neural network (autoencoder AE) will be trained with the reconstruction loss and the d_dc loss function.
+    Afterward, KMeans or HDBSCAN identifies the initial clusters.
+
+    Parameters
+    ----------
+    batch_size : int
+        Size of the data batches. (default: 500)
+    embedding_size : int
+        Size of the embedding within the neural_network. (default: 10)
+    neural_network : torch.nn.Module
+        The input neural_network. If None a new Autoencoder model will be created. (default: None)
+    optimizer_params : dict
+        Parameters of the optimizer for the clustering procedure.
+        Can also include the learning rate. (default: {"lr": 1e-3})
+    optimizer_class : torch.optim.Optimizer
+        The optimizer class. (default: torch.optim.Adam)
+    loss_fn : torch.nn.modules.loss._Loss
+        Loss function for the reconstruction. (default: torch.nn.MSELoss())
+    custom_dataloaders : tuple
+        Tuple consisting of a trainloader (random order) at the first and
+        a test loader (non-random order) at the second position.
+        If None, the default dataloaders will be used. (default: None)
+    random_state : np.random.RandomState
+        Use a fixed random state to get a repeatable solution.
+        Can also be of type int. (default: None)
+    device : torch.device
+        If device is None then it will set to cuda if it is available. (default: None)
+
+    Attributes
+    ----------
+    labels_ : np.ndarray
+        The final labels
+    neural_network : torch.nn.Module
+        The final neural_network
+
+    Examples
+    --------
+    >>> from clustpy.data import create_subspace_data
+    >>> data, labels = create_subspace_data(1500, subspace_features=(3, 50), random_state=1)
+    >>> shade = SHADE()
+    >>> shade.fit(data)
+
+    References
+    ----------
+    SHADE: Deep Density-based Clustering
+    Anna Beer; Pascal Weber; Lukas Miklautz; Collin Leiber; Walid Durani; Christian Böhm
+    IEEE International Conference on Data Mining (ICDM), Abu Dhabi, United Arab Emirates, 2024, pp. 675-680, doi: 10.1109/ICDM59182.2024.
+    """
+
+    batch_size: int
+    neural_network: Optional[torch.nn.Module]
+    min_points: int
+    use_complete_dc_tree: bool
+    use_matrix_dc_distance: bool
+    increase_inter_cluster_distance: bool
+    pretrain_epochs: int
+    pretrain_optimizer_params: dict
+    clustering_epochs: int
+    clustering_optimizer_params: dict
+    embedding_size: int
+    optimizer_params: dict
+    optimizer_class: torch.optim.Optimizer
+    loss_fn: torch.nn.modules.loss._Loss
+    custom_dataloaders = Optional[Tuple[Callable, Callable]]
+    random_state: np.random.RandomState
+    device: torch.device
+    cluster_algorithm: ClusterMixin
+    cluster_algorithm_params: dict
+    degree_of_reconstruction: float
+    degree_of_density_preservation: float
+
+    n_clusters: Optional[int]
+    labels_: np.ndarray
+
+    def __init__(
+        self,
+        batch_size: int = 500,
+        neural_network: Optional[torch.nn.Module] = None,
+        min_points: int = 5,
+        use_complete_dc_tree: bool = True,
+        use_matrix_dc_distance: bool = True,
+        increase_inter_cluster_distance: bool = False,
+        pretrain_epochs: int = 0,
+        pretrain_optimizer_params: dict = {"lr": 1e-3},
+        clustering_epochs: int = 100,
+        clustering_optimizer_params: dict = {"lr": 1e-3},
+        embedding_size: int = 10,
+        optimizer_class: torch.optim.Optimizer = torch.optim.Adam,
+        loss_fn: torch.nn.modules.loss._Loss = torch.nn.MSELoss(),
+        custom_dataloaders: Optional[Tuple[Callable, Callable]] = None,
+        random_state: Optional[np.random.RandomState] = None,
+        device: Optional[torch.device] = None,
+        n_clusters: Optional[int] = None,
+        cluster_algorithm: Optional[ClusterMixin] = DCTree_Clusterer,
+        cluster_algorithm_params: dict = {},
+        degree_of_reconstruction: float = 1.0,
+        degree_of_density_preservation: float = 1.0,
+    ):
+        self.batch_size = batch_size
+        self.min_points = min_points
+        self.use_complete_dc_tree = use_complete_dc_tree
+        self.dc_tree = None
+        self.use_matrix_dc_distance = use_matrix_dc_distance
+        self.increase_inter_cluster_distance = increase_inter_cluster_distance
+        self.pretrain_epochs = pretrain_epochs
+        self.pretrain_optimizer_params = pretrain_optimizer_params
+        self.clustering_epochs = clustering_epochs
+        self.clustering_optimizer_params = clustering_optimizer_params
+        self.embedding_size = embedding_size
+        self.neural_network = neural_network
+        self.optimizer_class = optimizer_class
+        self.loss_fn = loss_fn
+        self.custom_dataloaders = custom_dataloaders
+        self.random_state = check_random_state(random_state)
+        set_torch_seed(self.random_state)
+        self.device = detect_device(device)
+        self.n_clusters = n_clusters
+        self.cluster_algorithm = cluster_algorithm
+        self.cluster_algorithm_params = cluster_algorithm_params
+        self.degree_of_reconstruction = degree_of_reconstruction
+        self.degree_of_density_preservation = degree_of_density_preservation
+
+    def fit(self, X, y=None) -> SHADE:
+        """
+        Cluster the input dataset with the SHADE algorithm.
+        The resulting cluster labels will be stored in the `labels_` attribute.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            The given data set.
+        y : np.ndarray
+            The labels. (can be ignored)
+        dc_distances : Optional[Union[np.ndarray, DCTree, SampleDCTree]]
+            dc_distances of X.
+
+        Returns
+        -------
+        self : SHADE
+            This instance of the SHADE algorithm.
+        """
+
+        # Create Dataloader
+        if self.custom_dataloaders is None:
+            trainloader = get_dataloader(
+                X,
+                self.batch_size,
+                drop_last=False,
+                shuffle=True,
+            )
+            testloader = get_dataloader(
+                X,
+                self.batch_size,
+                drop_last=False,
+                shuffle=False,
+            )
+        else:
+            trainloader, testloader = self.custom_dataloaders
+            if trainloader.batch_size != self.batch_size:
+                self.batch_size = trainloader.batch_size
+
+        # Create dc_tree
+        if self.dc_tree is None and self.use_complete_dc_tree:
+            print("Build DCTree of the complete dataset")
+            self.dc_tree = DCTree(X, min_points=self.min_points)
+
+        # Create and pretrain Autoencoder
+        if self.neural_network is None:
+            architecture = [X.shape[1], 512, 256, 128, self.embedding_size]
+            self.neural_network = FeedforwardAutoencoder(architecture)
+            self.neural_network = self.neural_network.to(self.device)
+
+        if not self.neural_network.fitted:
+            self.neural_network = get_trained_network(
+                trainloader=trainloader,
+                data=X,
+                n_epochs=self.pretrain_epochs,
+                batch_size=self.batch_size,
+                optimizer_params=self.pretrain_optimizer_params,
+                optimizer_class=self.optimizer_class,
+                device=self.device,
+                ssl_loss_fn=self.loss_fn,
+                embedding_size=self.embedding_size,
+                neural_network=self.neural_network,
+                random_state=self.random_state,
+            )
+            self.neural_network.fitted = False
+
+        # Setup SHADE Module
+        self.shade_module = _SHADE_Module(
+            autoencoder=self.neural_network,
+            n_epochs=self.clustering_epochs,
+            min_points=self.min_points,
+            dc_tree=self.dc_tree,
+            use_matrix_dc_distance=self.use_matrix_dc_distance,
+            increase_inter_cluster_distance=self.increase_inter_cluster_distance,
+            degree_of_reconstruction=self.degree_of_reconstruction,
+            degree_of_density_preservation=self.degree_of_density_preservation,
+            optimizer_class=self.optimizer_class,
+            optimizer_params=self.clustering_optimizer_params,
+            device=self.device,
+        )
+        if not self.neural_network.fitted:
+            print("Start training with clustering loss.")
+            self.shade_module.fit(
+                X,
+                trainloader=trainloader,
+                loss_fn=self.loss_fn,
+                testloader=testloader,
+            )
+            self.neural_network.fitted = True
+
+        embedding = encode_batchwise(testloader, self.neural_network)
+
+        (
+            self.n_clusters,
+            self.labels_,
+            self.cluster_centers_,
+            _,
+        ) = run_initial_clustering(
+            X=embedding,
+            n_clusters=self.n_clusters,
+            clustering_class=self.cluster_algorithm,
+            clustering_params=self.cluster_algorithm_params,
+            random_state=self.random_state,
+        )
+        return self
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        """
+        Predicts the labels of the input data.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            The input data.
+        n_clusters : int
+            Number of clusters. Can be None if a corresponding initial_clustering_class is given,
+            e.g. DBSCAN / HDBSCAN.
+        cluster_algorithm : Clusterer, optional
+            Clustering algorithm which should be used on the learned embedding. (default: KMeans)
+        cluster_params : dict
+            Clustering parameters for `cluster_algorithm`.
+
+        Returns
+        -------
+        predicted_labels : np.ndarray
+            The predicted labels.
+        """
+
+        dataloader = get_dataloader(
+            X,
+            self.batch_size,
+            drop_last=False,
+            shuffle=False,
+        )
+
+        embedding = encode_batchwise(dataloader, self.neural_network)
+
+        (
+            self.n_clusters,
+            self.labels_,
+            self.cluster_centers_,
+            _,
+        ) = run_initial_clustering(
+            X=embedding,
+            n_clusters=self.n_clusters,
+            clustering_class=self.cluster_algorithm,
+            clustering_params=self.cluster_algorithm_params,
+            random_state=self.random_state,
+        )
+        return self.labels_
+
+    def encode(self, X: np.ndarray) -> np.ndarray:
+        """
+        Embedds the input data with the learned SHADE Autoencoder.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            The input data.
+
+        Returns
+        -------
+        np.ndarray
+            Embedded input data.
+        """
+
+        dataloader = get_dataloader(
+            X,
+            self.batch_size,
+            drop_last=False,
+            shuffle=False,
+        )
+
+        embedding = encode_batchwise(dataloader, self.neural_network)
+        return embedding
+
+
+class _SHADE_Module(_AbstractAutoencoder):
+    """
+    The SHADE Autoencoder.
+    """
+
+    autoencoder: torch.nn.Module
+    dc_tree: Optional[DCTree]
+    use_matrix_dc_distance: bool
+    n_epochs: int
+    min_points: int
+    increase_inter_cluster_distance: bool
+    degree_of_reconstruction: float
+    degree_of_density_preservation: float
+    optimizer: torch.optim.Optimizer
+    device: torch.device
+
+    def __init__(
+        self,
+        autoencoder: torch.nn.Module,
+        n_epochs=100,
+        min_points: int = 5,
+        dc_tree: Optional[DCTree] = None,
+        use_matrix_dc_distance: bool = True,
+        increase_inter_cluster_distance: bool = False,
+        degree_of_reconstruction: float = 1.0,
+        degree_of_density_preservation: float = 1.0,
+        optimizer_class: torch.optim.Optimizer = torch.optim.Adam,
+        optimizer_params: dict = {},
+        use_tqdm = False,
+        device: Optional[torch.device] = None,
+    ):
+        super().__init__()
+
+        self.autoencoder = autoencoder
+        self.n_epochs = n_epochs
+        self.min_points = min_points
+        self.dc_tree = dc_tree
+        self.use_matrix_dc_distance = use_matrix_dc_distance
+        self.increase_inter_cluster_distance = increase_inter_cluster_distance
+        self.degree_of_reconstruction = degree_of_reconstruction
+        self.degree_of_density_preservation = degree_of_density_preservation
+        self.optimizer = optimizer_class(list(autoencoder.parameters()), **optimizer_params)
+        self.use_tqdm = use_tqdm
+        self.device = detect_device(device)
+
+    def encode(self, x: torch.Tensor) -> torch.Tensor:
+        return self.autoencoder.encode(x)
+
+    def decode(self, embedded: torch.Tensor) -> torch.Tensor:
+        return self.autoencoder.decode(embedded)
+
+    def fit(
+        self,
+        X,
+        trainloader: torch.utils.data.DataLoader,
+        loss_fn: torch.nn.modules.loss._Loss,
+        testloader,
+    ) -> _SHADE_Module:
+        """
+        Trains the autoencoder in place.
+
+        Parameters
+        ----------
+        trainloader : torch.utils.data.DataLoader
+            Dataloader to be used for training.
+        dc_distances : Union[np.ndarray, DCTree, SampleDCTree]
+            dc_distances of the data of the dataloader.
+        n_epochs : int
+            Number of epochs for the clustering procedure.
+        optimizer : torch.optim.Optimizer
+            The optimizer for training.
+        loss_fn : torch.nn.modules.loss._Loss
+            Loss function for the reconstruction.
+        device : torch.device
+            Device to be trained on.
+
+        Returns
+        -------
+        self : _SHADE_Module
+            This instance of the _SHADE_Module.
+        """
+
+        if self.dc_tree is not None and self.use_matrix_dc_distance:
+            print("Compute dc_distance matrix.")
+            self.matrix_dc_distance = self.dc_tree.dc_distances()
+
+        self.train()
+        for epoch_i in tqdm(range(self.n_epochs), file=sys.stdout, desc="Epoch", disable=not self.use_tqdm):
+            loss_rec_sum = []
+            loss_dens_sum = []
+            # Update Network
+            for batch in trainloader:
+                if len(batch[0]) <= self.min_points:
+                    continue
+
+                loss_rec, loss_dens = self._loss(X, batch, loss_fn)
+                loss = (
+                    self.degree_of_reconstruction * loss_rec
+                    + self.degree_of_density_preservation * loss_dens
+                )
+                loss_rec_sum.append(loss_rec.cpu().detach().numpy())
+                loss_dens_sum.append(loss_dens.cpu().detach().numpy())
+                # Backward pass - update weights
+                self.optimizer.zero_grad()
+                loss.backward()
+                self.optimizer.step()
+
+        self.autoencoder.eval()
+        self.eval()
+        return self
+
+    def _loss(
+        self,
+        X,
+        batch: list,
+        loss_fn: torch.nn.modules.loss._Loss,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Calculate the autoencoder reconstruction + d_dc loss.
+
+        Parameters
+        ----------
+        batch : list
+            The minibatch.
+        loss_fn : torch.nn.modules.loss._Loss
+            Loss function for the reconstruction.
+        device : torch.device
+            Device to be trained on.
+
+        Returns
+        -------loss
+        loss : torch.Tensor
+            The final SHADE loss.
+        """
+
+        # Reconstrucion
+        batch_data = batch[1].to(self.device)
+        emb_data = self.encode(batch_data)
+        reconstructed = self.decode(emb_data)
+        loss_rec = loss_fn(reconstructed, batch_data)
+
+        # Density loss
+        if self.dc_tree is None:
+            # Batch-wise DCTree
+            if self.increase_inter_cluster_distance:
+                dc_clusterer = DCTree_Clusterer(
+                    min_points=self.min_points,
+                    increase_inter_cluster_distance=self.increase_inter_cluster_distance,
+                )
+                dc_clusterer.fit_predict(X[batch[0]])
+                batch_dc_dists = torch.tensor(
+                    dc_clusterer.dc_tree.dc_distances(), device=self.device
+                )
+            else:
+                batch_dc_dists = torch.tensor(
+                    DCTree(X[batch[0]], min_points=self.min_points).dc_distances(),
+                    device=self.device,
+                )
+        else:
+            # DCTree of all data points X
+            if self.use_matrix_dc_distance:
+                batch_dc_dists = torch.tensor(
+                    self.matrix_dc_distance[np.ix_(batch[0], batch[0])], device=self.device
+                )
+            else:
+                batch_dc_dists = torch.tensor(
+                    self.dc_tree.dc_distances(batch[0], batch[0]), device=self.device
+                )
+
+        batch_eucl_dists = squared_euclidean_distance(emb_data, emb_data)
+        loss_dens = (batch_eucl_dists - batch_dc_dists).pow(2).mean()
+
+        return loss_rec, loss_dens

From 1448438ec3d391421c33a96605f59640e570d6b2 Mon Sep 17 00:00:00 2001
From: Collin Leiber <collin.leiber@aalto.fi>
Date: Thu, 21 May 2026 13:25:59 +0300
Subject: [PATCH 2/5] Shorten code and better align it to ClustPy standards.
 Add tests

---
 README.md                                     |   1 +
 .../deep/_abstract_deep_clustering_algo.py    |   5 -
 clustpy/deep/_train_utils.py                  |   2 +-
 clustpy/deep/dcdist/__init__.py               |  27 -
 clustpy/deep/dcdist/dctree.py                 | 857 ------------------
 clustpy/deep/dcdist/dctree_clusterer.py       | 228 -----
 clustpy/deep/shade.py                         | 554 +++++------
 clustpy/deep/tests/test_shade.py              |  25 +
 clustpy/hierarchical/__init__.py              |   4 +-
 clustpy/hierarchical/dctree_clusterer.py      | 153 ++++
 .../tests/test_dctree_clusterer.py            |  17 +
 clustpy/utils/__init__.py                     |   6 +-
 clustpy/utils/dctree.py                       | 607 +++++++++++++
 clustpy/utils/tests/test_dctree.py            | 162 ++++
 14 files changed, 1184 insertions(+), 1464 deletions(-)
 delete mode 100644 clustpy/deep/dcdist/__init__.py
 delete mode 100644 clustpy/deep/dcdist/dctree.py
 delete mode 100644 clustpy/deep/dcdist/dctree_clusterer.py
 create mode 100644 clustpy/deep/tests/test_shade.py
 create mode 100644 clustpy/hierarchical/dctree_clusterer.py
 create mode 100644 clustpy/hierarchical/tests/test_dctree_clusterer.py
 create mode 100644 clustpy/utils/dctree.py
 create mode 100644 clustpy/utils/tests/test_dctree.py

diff --git a/README.md b/README.md
index 67135ed..1b818f7 100644
--- a/README.md
+++ b/README.md
@@ -132,6 +132,7 @@ Remove clustpy via pip to avoid ambiguities during development, e.g., when chang
 | ENRC       | [Deep Embedded Non-Redundant Clustering](https://ojs.aaai.org/index.php/AAAI/article/view/5961) | AAAI 2020                        | [Link](https://gitlab.cs.univie.ac.at/lukas/enrcpublic) (Python + PyTorch)    | [Link](https://clustpy.readthedocs.io/en/latest/clustpy.deep.html#clustpy.deep.enrc.ENRC)             |
 | IDEC       | [Improved Deep Embedded Clustering with Local Structure Preservation](https://www.ijcai.org/proceedings/2017/243) | IJCAI 2017                       | [Link](https://github.com/XifengGuo/IDEC) (Python + Keras)                    | [Link](https://clustpy.readthedocs.io/en/latest/clustpy.deep.html#clustpy.deep.dec.IDEC)              |
 | N2D        | [N2d:(not too) deep clustering via clustering the local manifold of an autoencoded embedding](https://ieeexplore.ieee.org/document/9413131) | ICPR 2021                        | [Link](https://github.com/XifengGuo/IDEC) (Python + Keras)                    | [Link](https://clustpy.readthedocs.io/en/latest/clustpy.deep.html#clustpy.deep.ddc_n2d.N2D)           |
+| SHADE | [SHADE: Deep Density-based Clustering](https://ieeexplore.ieee.org/abstract/document/10884477) | ICDM 2024 | [Link](https://github.com/pasiweber/SHADE) (Python + PyTorch) | [Link](https://clustpy.readthedocs.io/en/latest/clustpy.deep.html#clustpy.deep.shade.SHADE) |
 | VaDE       | [Variational Deep Embedding: An Unsupervised and Generative Approach to Clustering](https://www.ijcai.org/proceedings/2017/0273) | IJCAI 2017                       | [Link](https://github.com/slim1017/VaDE) (Python + Keras)                     | [Link](https://clustpy.readthedocs.io/en/latest/clustpy.deep.html#clustpy.deep.vade.VaDE)             |
 
 #### Neural Networks
diff --git a/clustpy/deep/_abstract_deep_clustering_algo.py b/clustpy/deep/_abstract_deep_clustering_algo.py
index c776ff6..e7a5a8d 100644
--- a/clustpy/deep/_abstract_deep_clustering_algo.py
+++ b/clustpy/deep/_abstract_deep_clustering_algo.py
@@ -100,11 +100,6 @@ def transform(self, X: np.ndarray) -> np.ndarray:
         """
         check_is_fitted(self, ["labels_", "neural_network_trained_", "n_features_in_"])
         X, _, _ = check_parameters(X, allow_size_1=True, allow_nd=self.neural_network_trained_.allow_nd_input, estimator_obj=self)
-        if X.shape[1] != self.n_features_in_:
-            raise ValueError(
-            f"X has {X.shape[1]} features, but {self.__class__.__name__} "
-            f"is expecting {self.n_features_in_} features as input."
-        )
         X_embed = self.neural_network_trained_.transform(X)
         return X_embed
 
diff --git a/clustpy/deep/_train_utils.py b/clustpy/deep/_train_utils.py
index e776d96..79bd84c 100644
--- a/clustpy/deep/_train_utils.py
+++ b/clustpy/deep/_train_utils.py
@@ -166,7 +166,7 @@ def get_trained_network(trainloader: torch.utils.data.DataLoader = None, data: n
     neural_network = get_neural_network(input_dim, embedding_size, neural_network, neural_network_class,
                                          neural_network_params, neural_network_weights, device, random_state)
     # Start training
-    if not neural_network.fitted:
+    if not neural_network.fitted and n_epochs > 0:
         print("Neural network is not fitted yet, will be pretrained.")
         # Pretrain neural network
         optimizer_params = {"lr": 1e-3} if optimizer_params is None else optimizer_params
diff --git a/clustpy/deep/dcdist/__init__.py b/clustpy/deep/dcdist/__init__.py
deleted file mode 100644
index 113a855..0000000
--- a/clustpy/deep/dcdist/__init__.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from .dctree import (
-    DCTree,
-    calculate_reachability_distance,
-    serialize as serialize_DCTree,
-    serialize_compressed as serialize_DCTree_compressed,
-    save as save_DCTree,
-    deserialize as deserialize_DCTree,
-    deserialize_compressed as deserialize_DCTree_compressed,
-    load as load_DCTree,
-)
-
-
-from .dctree_clusterer import DCTree_Clusterer
-
-__all__ = [
-    ###  DCTree  ###
-    "DCTree",
-    "calculate_reachability_distance",
-    "serialize_DCTree",
-    "serialize_DCTree_compressed",
-    "save_DCTree",
-    "deserialize_DCTree",
-    "deserialize_DCTree_compressed",
-    "load_DCTree",
-    ###  DCTree_Cluster  ###
-    "DCTree_Clusterer",
-]
diff --git a/clustpy/deep/dcdist/dctree.py b/clustpy/deep/dcdist/dctree.py
deleted file mode 100644
index e6299ab..0000000
--- a/clustpy/deep/dcdist/dctree.py
+++ /dev/null
@@ -1,857 +0,0 @@
-"""
-@authors:
-Pascal Weber
-"""
-
-# Implementation of the dc-distance with a DCTree by
-# - Author: Pascal Weber
-# - Source: https://github.com/pasiweber/SHADE
-
-# Paper: Connecting the Dots -- Density-Connectivity Distance unifies DBSCAN, k-Center and Spectral Clustering
-# Authors: Anna Beer, Andrew Draganov, Ellen Hohma, Philipp Jahn, Christian M.M. Frey, and Ira Assent
-# Link: https://doi.org/10.1145/3580305.3599283
-
-
-from __future__ import annotations
-
-import gzip
-import numpy as np
-import sys
-
-from collections import deque
-from multiprocessing import cpu_count
-from multiprocessing.pool import ThreadPool
-from sklearn.metrics import pairwise_distances
-from typing import List, Literal, Optional, Sequence, Tuple, Union
-
-
-class DCTree:
-    """
-    The DCTree computes the dc_distances and stores them in a tree structure.
-
-    By using the euler tour of the tree and a sparse table for range minimum queries,
-    the lca-elements in the tree (i.e. the dc_distances) can be computed in O(1) time.
-
-    The function `dc_dist(i, j)` returns the dc_distance between `points[i]` and `points[j]`
-    in O(1) time.
-    The function `dc_distance(X, Y=None, access_method="tree")` returns a dc_distance matrix
-    with the dc_distance from each pair of `points[X]` and `points[Y]`.
-    See the section Functions for more details.
-
-    The DCTree provides `serialize` and `serialize_compressed` functions to serialize the
-    DCTree. With `deserialize` or `deserialize_compressed` the DCTree can be deserialized again.
-
-    The DCTree provides `save` and `load` functions to save / load the DCTree to / from disk.
-
-
-    Parameters
-    ----------
-    points : np.ndarray
-        points, of which the dc_distances should be computed of.
-
-    min_points : int, optional
-        min_points parameter used for the computation of the dc_distances, by default 5.
-
-    access_method: str
-        Default access_method for dc_distances, by default "tree".
-
-    no_fastindex: bool = False
-        No fast index structure is constructed if this parameter is set to `True`.
-        Disables the functions `dc_dist` and `dc_distances` with `access_method = "dc_dist"`.
-        Only useful if you only use the `dc_distance` function with `access_method = "tree"`,
-        e.g. for calculating all pair dc_distances with `dc_dists = dc_tree.dc_distances()`.
-
-    use_less_memory: bool = False
-        Don't precalculate the reachability distance. Saves quadratic RAM usage, but also needs
-        double the time for computing.
-
-
-    Functions
-    ---------
-    DCTree[x] or DCTree[i,j] or DCTree[X,Y]:
-        Returns the _DCNode of given index if `arg` is an integer or a Sequence.
-
-        If DCTree[i,j] is used, the dc_dist of `points[i]` and `points[j]` is returned
-        (i and j are integer).
-
-        If DCTree[X,Y] is used, the dc_dist matrix between each pair of `points[X]` and
-        `points[Y]` is returned (X and Y are np.ndarray or Sequences).
-
-    dc_dist : (self, i: int, j: int) -> distance
-        returns the dc_distance between points[i] and points[j] in O(1) time.
-
-    dc_distances : (self, X = None, Y = None, access_method = `self.accesss_method`) -> np.ndarray
-        Computes the dc_distance matrix between each pair of points[X] and points[Y].
-        If X=None, X=range(n) is used.
-        If Y=None, Y=X is used.
-
-        `access_method` (by default `self.accesss_method`):
-            - "tree":    Traverses the tree in O(n) time (n = len(points)),
-                         no matter the size of X / Y.
-
-            - "dc_dist": Uses the dc_dist function and needs O(k*l) time
-                         (k = len(X), l = len(Y))).
-
-            "dc_dist" is often faster if X and Y are smaller than ~10% of n = len(points).
-
-        Returns dc_dists: ndarray of shape (n_samples_X, n_samples_Y)
-
-
-    serialize : (dc_tree: DCTree) -> str
-        Serializes the DCTree `dc_tree` to a string.
-
-    serialize_compressed : (dc_tree: DCTree) -> bytes
-        Serializes the DCTree `dc_tree` to a compressed byte array.
-
-    save : (dc_tree: DCTree, file_path: str) -> save to disk
-        Saves the DCTree `dc_tree` to disk at `file_path`.
-
-
-    deserialize : (data: str, access_method = "tree", no_fastindex = False, n_jobs = None) -> DCTree
-        Deserializes a string `str` to a DCTree.
-
-    deserialize_compressed : (data: bytes, access_method = "tree", no_fastindex = False, n_jobs = None) -> DCTree
-        Deserializes a compressed byte array `bytes` to a DCTree.
-
-    load : (file_path: str, access_method = "tree", no_fastindex = False, n_jobs = None) -> DCTree
-        Loads a DCTree from disk at `file_path`.
-
-
-    Examples
-    --------
-    >>> import dcdist
-    >>> points = np.array([[1,6],[2,6],[6,2],[14,17],[123,3246],[52,8323],[265,73]])
-    >>> dc_tree = dcdist.DCTree(points, 5)
-    >>> print(dc_tree.dc_dist(2,5))
-    >>> print(dc_tree.dc_distances(range(len(points))))
-    >>> print(dc_tree.dc_distances([0,1], [2,3]))
-
-    >>> s = dcdist.serialize(dc_tree)
-    >>> dc_tree_new = dcdist.deserialize(s)
-
-    >>> b = dcdist.serialize_compressed(dc_tree)
-    >>> dc_tree_new = dcdist.deserialize_compressed(b)
-
-    >>> dcdist.save(dc_tree, "./data.dctree")
-    >>> dc_tree_new = dcdist.load("./data.dctree")
-
-
-    References
-    ----------
-    Anna Beer, Andrew Draganov, Ellen Hohma, Philipp Jahn, Christian M.M. Frey, and Ira Assent.
-    "Connecting the Dots -- Density-Connectivity Distance unifies DBSCAN, k-Center and Spectral
-    Clustering." KDD '23. 2023.
-    """
-
-    n: int  # len(points)
-    min_points: int
-    root: _DCNode
-
-    euler: List[_DCNode]
-    level: List[int]
-    f_occur: List[int]
-    level_table: _SparseTable
-
-    access_method: str
-    no_fastindex: bool
-    n_jobs: int
-    no_gil: bool
-
-    def __init__(
-        self,
-        points: np.ndarray,
-        min_points: int = 5,
-        min_points_mr: Optional[int] = None,
-        access_method: str = "tree",
-        no_fastindex: bool = False,
-        use_less_memory: bool = False,
-        n_jobs: Optional[int] = None,
-        precomputed=False,
-    ):
-        self.n = points.shape[0]
-        self.min_points = min_points
-        self.access_method = access_method
-        self.no_fastindex = no_fastindex
-
-        if min_points_mr is None:
-            min_points_mr = min_points
-
-        if n_jobs is None or n_jobs == 0:
-            self.n_jobs = cpu_count()
-        elif n_jobs < 0:
-            self.n_jobs = max(cpu_count() + 1 + n_jobs, 1)
-        else:
-            self.n_jobs = n_jobs
-        self.no_gil = sys.flags.nogil if hasattr(sys.flags, "nogil") else False
-
-        if use_less_memory and not precomputed:
-            mst_edges = self._get_mst_edges(points, use_less_memory=True)
-        else:
-            if not precomputed:
-                reach_dists = calculate_reachability_distance(points, min_points_mr)
-            else:
-                reach_dists = points
-            mst_edges = self._get_mst_edges(reach_dists)
-            if not precomputed:
-                del reach_dists
-        mst_edges.sort(order="dist")
-        self.root = self._build_tree(mst_edges)
-        del mst_edges
-
-        if not self.no_fastindex:
-            self._init_fast_index()
-
-    def _init_fast_index(self):
-        n_nodes = 2 * self.n - 1
-        self.euler = [self.root] * (2 * n_nodes - 1)
-        self.level = [0] * (2 * n_nodes - 1)
-        self.f_occur = [-1] * n_nodes
-        self._euler_tour(self.root)
-        self.level_table = _SparseTable(self.level)
-
-    def __getitem__(
-        self,
-        arg: Union[
-            Union[int, slice, Sequence[int], np.ndarray],
-            Union[
-                Tuple[int, int],
-                Tuple[int, np.ndarray, Sequence[int]],
-                Tuple[np.ndarray, Sequence[int], int],
-                Tuple[np.ndarray, Sequence[int], np.ndarray, Sequence[int]],
-            ],
-        ],
-    ) -> Union[Union[_DCNode, List[_DCNode]], Union[float, np.ndarray]]:
-        """
-        Returns the _DCNode of given index if `arg` is an integer or a Sequence.
-
-        If DCTree[i,j] is used, the dc_dist of `points[i]` and `points[j]` is returned
-        (i and j are integer).
-
-        If DCTree[X,Y] is used, the dc_dist matrix between each pair of `points[X]` and
-        `points[Y]` is returned (X and Y are np.ndarray or Sequences).
-        """
-
-        index_error_msg = f"`{arg}` needs to be an integer, Sequence, np.ndarray, tuple of integer, or tuple of np.ndarray / Sequence!"
-
-        if isinstance(arg, tuple):
-            if len(arg) != 2:
-                raise IndexError(index_error_msg)
-            (i, j) = arg
-            if isinstance(i, int) and isinstance(j, int):
-                return self.dc_dist(i, j)
-            if isinstance(i, int) and isinstance(j, (np.ndarray, Sequence)):
-                if isinstance(j, np.ndarray):
-                    j = j.flatten()
-                return self.dc_distances([i], j)
-            if isinstance(i, (np.ndarray, Sequence)) and isinstance(j, int):
-                if isinstance(i, np.ndarray):
-                    i = i.flatten()
-                return self.dc_distances(i, [j])
-            elif isinstance(i, (np.ndarray, Sequence)) and isinstance(j, (np.ndarray, Sequence)):
-                if isinstance(i, np.ndarray):
-                    i = i.flatten()
-                if isinstance(j, np.ndarray):
-                    j = j.flatten()
-                return self.dc_distances(i, j)
-            else:
-                raise IndexError(index_error_msg)
-
-        elif isinstance(arg, int):
-            return self.euler[self.f_occur[arg]]
-        elif isinstance(arg, (Sequence, np.ndarray)):
-            return [self.euler[self.f_occur[i]] for i in arg]
-        elif isinstance(arg, slice):
-            return [self.euler[i] for i in self.f_occur[arg]]
-
-        else:
-            raise IndexError(index_error_msg)
-
-    def __repr__(self):
-        if self.root is None:
-            return ""
-
-        # pointer_right = "└──"
-        # pointer_left = "├──" if self.root.right else "└──"
-        pointer_right = "   "
-        pointer_left = "   " if self.root.right else "   "
-        return (
-            f"{self.root}"
-            f"{self.__repr__help(self.root.left, pointer_left, '', self.root.right is not None)}"
-            f"{self.__repr__help(self.root.right, pointer_right, '', False)}"
-        )
-
-    def __repr__help(self, node: Optional[_DCNode], pointer: str, padding: str, has_right_sibling: bool):
-        if node is None:
-            return ""
-
-        # padding_for_both = padding + ("|  " if has_right_sibling else "   ")
-        # pointer_right = "└──"
-        # pointer_left = "├──" if node.right else "└──"
-        padding_for_both = padding + ("   " if has_right_sibling else "   ")
-        pointer_right = "   "
-        pointer_left = "   " if node.right else "   "
-        return (
-            f"\n   {padding.replace('|', ' ')}// #region"
-            f"\n{padding}{pointer}{node}"
-            f"{self.__repr__help(node.left, pointer_left, padding_for_both, node.right is not None)}"
-            f"{self.__repr__help(node.right, pointer_right, padding_for_both, False)}"
-            f"\n   {padding.replace('|', ' ')}// #endregion"
-        )
-
-    def dc_dist(self, i: int, j: int) -> float:
-        """Returns the dc_distance from points[i] to points[j] in O(1) time."""
-        if i == j:
-            return 0
-        return self._lca(i, j).dist
-
-    def _lca(self, i: int, j: int) -> _DCNode:
-        first_i = self.f_occur[i]
-        first_j = self.f_occur[j]
-        if first_i > first_j:
-            first_i, first_j = first_j, first_i
-        return self.euler[self.level_table.query(first_i, first_j)]
-
-    def dc_distances(
-        self,
-        X: Union[Sequence[int], np.ndarray, None] = None,
-        Y: Union[Sequence[int], np.ndarray, None] = None,
-        access_method: Optional[str] = None,
-    ) -> np.ndarray:
-        """
-        Computes the dc_distance matrix between each pair of points[X] and points[Y].
-        If X=None, X=range(n) is used.
-        If Y=None, Y=X is used.
-
-        `access_method` (by default `self.access_method`):
-            - "tree":    traverses the tree in O(n) time (n = len(points)),
-                         no matter the size of X / Y.
-            - "dc_dist": uses the dc_dist function and needs O(k*l) time
-                         (k = len(X), l = len(Y))).
-
-            "dc_dist" is often faster if X and Y are smaller than ~10% of n = len(points).
-
-        Returns dc_dists: ndarray of shape (n_samples_X, n_samples_Y)
-        """
-
-        if X is None:
-            X = range(self.n)
-
-        if Y is None:
-            Y = X
-
-        if access_method is None:
-            access_method = self.access_method
-
-        if access_method == "dc_dist" and self.no_fastindex:
-            print("No fastindex computed. Fallback to access_method: `tree`.")
-            access_method = "tree"
-
-        if access_method == "dc_dist" and not self.no_fastindex:
-            dc_dists = np.zeros((len(X), len(Y)))
-
-            for i in range(len(X)):
-                for j in range(i + 1 if X is Y else 0, len(Y)):
-                    if X[i] != Y[j]:
-                        dc_dists[i, j] = self.dc_dist(X[i], Y[j])
-            if X is Y:
-                dc_dists = dc_dists + dc_dists.T
-            return dc_dists
-
-        elif access_method == "tree":
-            dc_dists = np.zeros((len(X), len(Y)))
-
-            idx_rev_X = np.full(self.n, -1)
-            idx_rev_X[X] = range(len(X))
-
-            if X is Y:
-                idx_rev_Y = idx_rev_X
-            else:
-                idx_rev_Y = np.full(self.n, -1)
-                idx_rev_Y[Y] = range(len(Y))
-
-            inodes = self.get_internal_nodes()
-            inodes_start = 0
-            inodes_end = len(inodes)
-
-            n_jobs = self.n_jobs if self.no_gil else 4
-            pool = ThreadPool(n_jobs)
-            n_inodes = self.n - 1
-            chunk_size = int(np.ceil(n_inodes / n_jobs))
-            start_points = range(inodes_start, inodes_end, chunk_size)
-
-            def func(start):
-                for i in range(start, min(start + chunk_size, inodes_end)):
-                    inode = inodes[i]
-                    # inode always has left and right node
-                    i_leaves = inode.left.leaves
-                    j_leaves = inode.right.leaves
-
-                    (i_, j_) = (idx_rev_X[i_leaves], idx_rev_Y[j_leaves])
-                    (i_, j_) = (i_[i_ != -1], j_[j_ != -1])
-                    dc_dists[(i_[:, np.newaxis], j_[np.newaxis, :])] = inode.dist
-
-                    if X is Y:
-                        dc_dists[(j_[:, np.newaxis], i_[np.newaxis, :])] = inode.dist
-                    else:
-                        (i_, j_) = (idx_rev_Y[i_leaves], idx_rev_X[j_leaves])
-                        (i_, j_) = (i_[i_ != -1], j_[j_ != -1])
-                        dc_dists[(j_[:, np.newaxis], i_[np.newaxis, :])] = inode.dist
-
-            pool.map(func, start_points)
-            return dc_dists
-
-        raise ValueError(f"'{access_method}' is no valid `access_method`")
-
-    def _get_mst_edges(self, dist_matrix: np.ndarray, use_less_memory: bool = False) -> np.ndarray:
-        """Prim's algorithm to build up the minimum spanning tree in O(n^2) time."""
-        # dist_matrix are the points, if use_less_memory=True
-        n = self.n
-        nodes_min_dist = np.full(n, np.inf)
-        parent = np.full(n, 0)
-        not_in_mst = np.full(n, True)
-        u = 0  # Start node
-        nodes_min_dist[u] = 0
-        not_in_mst[u] = False
-        mst_edges = np.empty((n - 1), dtype=([("i", int), ("j", int), ("dist", float)]))
-
-        if use_less_memory:
-            reach_dists = np.empty((dist_matrix.shape[0]))
-            for i in range(dist_matrix.shape[0]):
-                eucl_dists = np.linalg.norm(dist_matrix - dist_matrix[i], axis=1)
-                reach_dists[i] = np.max(np.partition(eucl_dists, self.min_points)[: self.min_points])
-
-        for i in range(n - 1):
-            if use_less_memory:
-                dist_u = np.linalg.norm(dist_matrix - dist_matrix[u], axis=1)  # Euclidean distances
-                dist_u = np.maximum(dist_u, reach_dists[u])  # reachability distance of i
-                dist_u = np.maximum(dist_u, reach_dists)  # reachability distance of all points
-
-                v = np.where(not_in_mst & (dist_u < nodes_min_dist))[0]
-                nodes_min_dist[v] = dist_u[v]
-            else:
-                v = np.where(not_in_mst & (dist_matrix[u] < nodes_min_dist))[0]
-                nodes_min_dist[v] = dist_matrix[u, v]
-            parent[v] = u
-
-            arg = np.argmin(nodes_min_dist[not_in_mst])
-            u = np.arange(n)[not_in_mst][arg]
-            mst_edges[i] = (parent[u], u, nodes_min_dist[u])
-            not_in_mst[u] = False
-
-        return mst_edges
-
-    def _build_tree(self, mst_edges: np.ndarray) -> _DCNode:
-        """Kruskal's algorithm to build up the DCTree with the precomputed
-        and sorted mst_edges in O(n) time."""
-        union_find = _UnionFind(self.n)
-        node = _DCNode(id=0, dist=0.0, leaves=[0])
-        idx = self.n
-        k = len(mst_edges)
-        for i, j, dist in mst_edges:
-            i_root = union_find.find(i)
-            j_root = union_find.find(j)
-            node = _DCNode(
-                id=idx,
-                dist=dist,
-                k=-1 if (len(i_root.leaves) + len(j_root.leaves)) < self.min_points else k,
-                left=i_root,
-                right=j_root,
-                leaves=i_root.leaves + j_root.leaves,
-            )
-            i_root.parent = node
-            j_root.parent = node
-            union_find.union(i, j, node)
-            idx += 1
-            if not node.k == -1:
-                k -= 1
-        return node
-
-    def _euler_tour(self, root: _DCNode):
-        """Euler tour to get the euler, level, and f_occur lists in O(n) time."""
-        DOWN, UP = 0, 1
-        stack: deque[Tuple[_DCNode, int, Literal[0, 1]]] = deque([(root, 0, DOWN)])  # (node, level, DOWN / UP)
-
-        pos = 0
-        while len(stack):
-            (node, level, status) = stack.pop()
-
-            if status == DOWN:
-                if self.f_occur[node.id] == -1:
-                    self.f_occur[node.id] = pos
-
-                self.euler[pos] = node
-                self.level[pos] = level
-                pos += 1
-
-                if node.right:
-                    stack.append((node, level, UP))
-                    stack.append((node.right, level + 1, DOWN))
-                if node.left:
-                    stack.append((node, level, UP))
-                    stack.append((node.left, level + 1, DOWN))
-
-            elif status == UP:
-                self.euler[pos] = node
-                self.level[pos] = level
-                pos += 1
-
-    def get_internal_nodes(self):
-        nodes = []
-        stack: deque[_DCNode] = deque([self.root])
-
-        while len(stack):
-            node = stack.pop()
-
-            if node and node.left and node.right:
-                nodes.append(node)
-
-            if node.left:
-                stack.append(node.left)
-
-            if node.right:
-                stack.append(node.right)
-        return nodes
-
-    def traverse_until_k(self, k):
-        from queue import PriorityQueue
-
-        result_nodes = set([self.root])
-        if k == 1:
-            return result_nodes
-
-        stack = PriorityQueue()
-        stack.put((-self.root.dist, self.root, self.root))
-        while not stack.empty():
-            _, node, parent_node = stack.get()
-
-            if node is None:
-                return
-
-            if node.k >= 0:
-                if node.left.k != -1:
-                    if node.left.k != -1 and node.right.k != -1:
-                        result_nodes.discard(parent_node)
-                        result_nodes.discard(node)
-                        result_nodes.add(node.left)
-                    if node.parent.left.k == -1 or node.parent.right.k == -1:
-                        stack.put((-node.left.dist, node.left, parent_node))
-                    else:
-                        stack.put((-node.left.dist, node.left, node))
-                if node.right.k != -1:
-                    if node.left.k != -1 and node.right.k != -1:
-                        result_nodes.discard(parent_node)
-                        result_nodes.discard(node)
-                        result_nodes.add(node.right)
-                    if node.parent.left.k == -1 or node.parent.right.k == -1:
-                        stack.put((-node.right.dist, node.right, parent_node))
-                    else:
-                        stack.put((-node.right.dist, node.right, node))
-
-            if len(result_nodes) >= k:
-                break
-
-        return result_nodes
-
-    def get_k_center(self, k):
-        nodes = self.traverse_until_k(k)
-        labels = np.full(self.n, -1)
-        for i, node in enumerate(nodes):
-            labels[np.array(node.leaves)] = i
-        return labels
-
-    def get_eps_for_k(self, k, eps=-3e-12):
-        nodes = self.traverse_until_k(k)
-        min_eps = np.inf
-        for node in nodes:
-            min_eps = min(min_eps, node.parent.dist)
-        return min_eps + eps
-
-
-def _serialize(root: _DCNode):
-    tree_list = []
-    stack: deque[Optional[_DCNode]] = deque([root])
-    while len(stack):
-        node = stack.pop()
-        if node is None:
-            tree_list.append("/")
-        elif node.left is None and node.right is None:
-            tree_list.append(f"{node.id}|{node.dist}")
-        else:
-            tree_list.append(f"'{node.id}|{node.dist}")
-            stack.append(node.right)
-            stack.append(node.left)
-    return ",".join(tree_list)
-
-
-def serialize(dc_tree: DCTree) -> str:
-    """Serializes the DCTree `dc_tree` to a string."""
-
-    sep = "<\1dc_tree>"
-    # Tree structure, min_points, n_jobs, no_gil
-    return _serialize(dc_tree.root) + sep + str(dc_tree.min_points)
-
-
-def serialize_compressed(dc_tree: DCTree) -> bytes:
-    """Serializes the DCTree `dc_tree` to a gzip-compressed byte array."""
-    data = serialize(dc_tree)
-    byte_data = bytes(data, "utf-8")
-    return gzip.compress(byte_data)
-
-
-def save(dc_tree: DCTree, file_path: str) -> None:
-    """Saves the DCTree `dc_tree` to disk at `file_path`."""
-    byte_data = serialize_compressed(dc_tree)
-    with open(file_path, "wb") as file:
-        file.write(byte_data)
-
-
-def _deserialize(data: List[str]) -> _DCNode:
-    id, dist = data[0].split("|")
-    root = _DCNode(id=int(id[1:]), dist=float(dist), leaves=[])
-
-    DOWN, UP = 0, 1
-    stack: deque[Literal[0, 1]] = deque([UP, DOWN, DOWN])
-    nodes: deque[_DCNode] = deque([root])
-    res: deque[Optional[_DCNode]] = deque([])
-
-    pos = 0
-    while len(stack):
-        status = stack.pop()
-
-        if status == DOWN:
-            pos += 1
-            if data[pos] == "/":
-                res.append(None)
-                continue
-            id, dist = data[pos].split("|")
-            if id[0] != "'":
-                res.append(_DCNode(id=int(id), dist=0, leaves=[int(id)]))
-                continue
-            inode = _DCNode(id=int(id[1:]), dist=float(dist), leaves=[])
-            nodes.append(inode)
-            stack.extend([UP, DOWN, DOWN])
-
-        elif status == UP:
-            node = nodes.pop()
-            node.right = res.pop()
-            node.left = res.pop()
-            res.append(node)
-            node.leaves = node.left.leaves + node.right.leaves
-    return root
-
-
-def deserialize(
-    data: str,
-    access_method: Optional[str] = None,
-    no_fastindex: bool = False,
-    n_jobs: Optional[int] = None,
-) -> DCTree:
-    """Deserializes a string `str` to a DCTree."""
-    dc_tree = object.__new__(DCTree)
-
-    sep = "<\1dc_tree>"
-    # Tree structure, min_points, n_jobs, no_gil
-    (tree_data, min_points) = data.split(sep)
-    dc_tree.min_points = int(min_points)
-    dc_tree.access_method = access_method if access_method is not None else "tree"
-    dc_tree.n_jobs = n_jobs if n_jobs else cpu_count()
-    dc_tree.no_gil = sys.flags.nogil if hasattr(sys.flags, "nogil") else False
-
-    tree_data_list = tree_data.split(",")
-    dc_tree.root = _deserialize(tree_data_list)
-    dc_tree.n = (len(tree_data_list) + 1) // 2
-
-    dc_tree.no_fastindex = no_fastindex
-    if not dc_tree.no_fastindex:
-        dc_tree._init_fast_index()
-    return dc_tree
-
-
-def deserialize_compressed(
-    compressed_data: bytes,
-    access_method: Optional[str] = None,
-    no_fastindex: bool = False,
-    n_jobs: Optional[int] = None,
-) -> DCTree:
-    """Deserializes a compressed byte array `bytes` to a DCTree."""
-    byte_data = gzip.decompress(compressed_data)
-    data = str(byte_data, "utf-8")
-    return deserialize(data, access_method, no_fastindex, n_jobs=n_jobs)
-
-
-def load(
-    file_path: str,
-    access_method: Optional[str] = None,
-    no_fastindex: bool = False,
-    n_jobs: Optional[int] = None,
-) -> DCTree:
-    """Loads a DCTree from disk at `file_path`."""
-    file = open(file_path, "rb")
-    byte_data = file.read()
-    file.close()
-    return deserialize_compressed(byte_data, access_method, no_fastindex, n_jobs=n_jobs)
-
-
-def calculate_reachability_distance(
-    points: np.ndarray, min_points: int = 5, n_jobs: Optional[int] = None
-) -> np.ndarray:
-    """
-    Calculates the reachability distance of points using the min_points threshold in O(n^2) time.
-
-    Raises a ValueError if min_points is larger than the number of points.
-    """
-
-    if min_points > points.shape[0]:
-        raise ValueError(f"Min points ({min_points}) can't exceed the size of the dataset ({points.shape[0]})")
-
-    eucl_dists = pairwise_distances(points, metric="euclidean", n_jobs=n_jobs)
-
-    if min_points > 1:
-        # Get reachability for each point with respect to min_points parameter
-        reach_dists = np.empty((points.shape[0]))
-        for i in range(points.shape[0]):
-            reach_dists[i] = np.max(np.partition(eucl_dists[i], min_points)[:min_points])
-        np.maximum(eucl_dists, reach_dists[np.newaxis, :], eucl_dists)
-        np.maximum(eucl_dists, reach_dists[:, np.newaxis], eucl_dists)
-        np.fill_diagonal(eucl_dists, 0)
-    return eucl_dists
-
-
-class _DCNode:
-    id: int
-    dist: float
-    leaves: List[int]
-    left: Optional[_DCNode]
-    right: Optional[_DCNode]
-    parent: _DCNode
-    k: int
-
-    def __init__(
-        self,
-        id: int,
-        dist: float,
-        leaves: List[int],
-        left: Optional[_DCNode] = None,
-        right: Optional[_DCNode] = None,
-        parent=None,
-        k=-1,
-    ):
-        self.id = id
-        self.dist = dist
-        self.leaves = leaves
-        self.left = left
-        self.right = right
-        if not parent:
-            self.parent = self
-        else:
-            self.parent = parent
-        self.k = k
-
-    def __repr__(self):
-        return f"DCNode #{self.id} ({self.dist}) - {self.k}"
-
-    def __lt__(self, other):
-        return self.dist < other.dist
-
-
-class _UnionFind:
-    """
-    UnionFind structure which provides the functions `find` and `union` with amortized
-    time complexity of O(α(n)), where α(n) is the inverse Ackermann function.
-    """
-
-    root: List[_DCNode]
-    parent: List[int]
-    rank: List[int]
-
-    def __init__(self, n: int):
-        self.root = [_DCNode(id=i, dist=0, leaves=[i]) for i in range(n)]
-        self.parent = list(range(n))
-        self.rank = [1] * n
-
-    def find(self, x: int) -> _DCNode:
-        """Finds the representative node of x."""
-        return self.root[self._find(x)]
-
-    def _find(self, x: int) -> int:
-        """Finds the representative of x."""
-        parent = self.parent[x]
-        if parent != x:
-            parent = self._find(parent)
-            self.parent[x] = parent
-        return parent
-
-    def union(self, x: int, y: int, node: _DCNode):
-        """Union the set which contains x with the set which contains y."""
-        xset = self._find(x)
-        yset = self._find(y)
-        if xset == yset:
-            return
-
-        # Put smaller ranked item under bigger ranked item if ranks are different
-        if self.rank[xset] < self.rank[yset]:
-            self.parent[xset] = yset
-            self.root[yset] = node
-        elif self.rank[xset] > self.rank[yset]:
-            self.parent[yset] = xset
-            self.root[xset] = node
-
-        # If ranks are same, then move y under x (doesn't matter which one goes where)
-        # and increment rank of x's tree
-        else:
-            self.parent[yset] = xset
-            self.root[xset] = node
-            self.rank[xset] = self.rank[xset] + 1
-
-
-class _SparseTable:
-    """
-    SparseTable structure which provides the function `query` which finds the index of the
-    minimum value within the range [l,r] in the array `arr`.
-    Needs O(n * logn) time and storage to precompute and O(1) for the query.
-    """
-
-    # arr: List[int]
-    arr: np.ndarray
-    # sparse_table: List[List[int]]
-    sparse_table: np.ndarray
-    pow_2: List[int]
-    log_2: List[int]
-
-    def __init__(self, arr: List[int]):
-        self.arr = np.asarray(arr, dtype=np.int64)
-        n = len(arr)
-        log_n = n.bit_length() - 1
-        # self.sparse_table = [[-1] * log_n for _ in range(n - 1)]
-        self.sparse_table = np.full((n, log_n), -1, dtype=np.int64)
-
-        self.pow_2 = [1 << i for i in range(log_n + 1)]
-        self.log_2 = [i.bit_length() - 1 for i in range(n)]
-
-        # for i in range(0, n - 1):
-        #     self.sparse_table[i][0] = i if self.arr[i] < self.arr[i + 1] else i + 1
-        idx = np.arange(n - 1, dtype=np.int64)
-        self.sparse_table[: n - 1, 0] = np.where(self.arr[idx] < self.arr[idx + 1], idx, idx + 1)
-
-        for j in range(1, log_n):
-            step = self.pow_2[j]  # 2**j
-            limit = n - self.pow_2[j + 1] + 1  # number of start positions
-            left_idx = self.sparse_table[:limit, j - 1]  # L for i = 0..limit‑1
-            right_idx = self.sparse_table[step : step + limit, j - 1]  # R for i = 0..limit‑1
-            choose_left = self.arr[left_idx] <= self.arr[right_idx]
-
-            self.sparse_table[:limit, j] = np.where(choose_left, left_idx, right_idx)
-
-            # for i in range(n - self.pow_2[j + 1] + 1):
-            #     L = self.sparse_table[i][j - 1]
-            #     R = self.sparse_table[i + step][j - 1]
-            #     self.sparse_table[i][j] = L if arr[L] <= arr[R] else R
-
-    def query(self, l: int, r: int) -> int:
-        # assert L > R, f"L value ({L}) needs to be larger than R value ({R})"
-        if l == r:
-            return l
-        k = self.log_2[r - l + 1]
-        l_k = self.sparse_table[l][k - 1]
-        r_k = self.sparse_table[r - self.pow_2[k] + 1][k - 1]
-        return l_k if self.arr[l_k] <= self.arr[r_k] else r_k
diff --git a/clustpy/deep/dcdist/dctree_clusterer.py b/clustpy/deep/dcdist/dctree_clusterer.py
deleted file mode 100644
index e89b7c8..0000000
--- a/clustpy/deep/dcdist/dctree_clusterer.py
+++ /dev/null
@@ -1,228 +0,0 @@
-"""
-@authors:
-Pascal Weber
-"""
-
-# - Author: Pascal Weber
-# - Source: https://github.com/pasiweber/SHADE
-
-from __future__ import annotations
-
-import numpy as np
-
-from collections import deque
-from .dctree import DCTree, _DCNode as _DCDist_DCNode
-from typing import List, Literal, Optional, Sequence, Tuple, Union
-
-from sklearn.base import ClusterMixin, BaseEstimator
-
-
-class DCTree_Clusterer(ClusterMixin, BaseEstimator):
-    """
-    Parameters
-    ----------
-    points : np.ndarray
-        points, of which the dc_distances should be computed of.
-
-    min_points : int, optional
-        min_points parameter used for the computation of the dc_distances, by default 5.
-    """
-
-    min_points: int
-    increase_inter_cluster_distance: bool
-
-    dc_tree: DCTree
-    labels_: np.ndarray
-
-    def __init__(
-        self,
-        min_points: int = 5,
-        increase_inter_cluster_distance: bool = True,
-    ):
-        self.min_points = min_points
-        self.increase_inter_cluster_distance = increase_inter_cluster_distance
-
-    def fit(self, X, y=None):
-        self.dc_tree = DCTree(X, min_points=self.min_points)
-        self.labels_ = self.clustering(self.dc_tree)
-        self.n_clusters = len(np.where(np.unique(self.labels_) >= 0)[0])
-
-        if self.increase_inter_cluster_distance:
-            self._increase_inter_cluster_distance(self.dc_tree.root)
-
-        return self
-
-    def _condense_tree(self, node: Optional[_DCDist_DCNode]) -> Optional[_DCNode]:
-        if node is None:
-            return None
-
-        if node.left is None:
-            right = self._condense_tree(node.right)
-            if right is not None:
-                right.id = node.id
-                right.leaves = node.leaves
-            return right
-
-        if node.right is None:
-            left = self._condense_tree(node.left)
-            if left is not None:
-                left.id = node.id
-                left.leaves = node.leaves
-            return left
-
-        left_size = len(node.left.leaves)
-        right_size = len(node.right.leaves)
-
-        if right_size >= self.min_points and left_size >= self.min_points:
-            node_left = self._condense_tree(node.left)
-            node_right = self._condense_tree(node.right)
-            if node_left is None and node_right is None:
-                return _DCNode(node.id, node.dist, node.leaves, node_left, node_right)
-            if node_left is None:
-                return node_right
-            if node_right is None:
-                return node_left
-            return _DCNode(node.id, node.dist, node.leaves, node_left, node_right)
-
-        if right_size < self.min_points and left_size < self.min_points:
-            return None
-
-        if right_size < self.min_points:
-            left = self._condense_tree(node.left)
-            if left is not None:
-                left.id = node.id
-                left.leaves = node.leaves
-            return left
-
-        if left_size < self.min_points:
-            right = self._condense_tree(node.right)
-            if right is not None:
-                right.id = node.id
-                right.leaves = node.leaves
-            return right
-
-    def _calculate_stability(self, node: _DCNode):
-        if node.left is None and node.right is None:
-            return
-
-        if node.left is not None:
-            self._calculate_stability(node.left)
-            node.left.stability = (1.0 / node.left.dist - 1.0 / node.dist) * len(node.left.leaves)
-
-        if node.right is not None:
-            self._calculate_stability(node.right)
-            node.right.stability = (1.0 / node.right.dist - 1.0 / node.dist) * len(
-                node.right.leaves
-            )
-
-    def _stable_node_flagging(self, node: Optional[_DCNode]):
-        if node is None:
-            return
-
-        node.is_stable = False
-
-        if node.left is None and node.right is None:
-            node.is_stable = True
-            return
-
-        if node.left is None:
-            self._stable_node_flagging(node.right)
-            if node.stability < node.right.stability:
-                node.stability = node.right.stability
-                node.is_stable = False
-                return
-            else:
-                node.is_stable = True
-                return
-
-        if node.right is None:
-            self._stable_node_flagging(node.left)
-            if node.stability < node.left.stability:
-                node.stability = node.left.stability
-                node.is_stable = False
-                return
-            else:
-                node.is_stable = True
-                return
-
-        self._stable_node_flagging(node.right)
-        self._stable_node_flagging(node.left)
-
-        if node.stability < node.left.stability + node.right.stability:
-            node.stability = node.left.stability + node.right.stability
-            node.is_stable = False
-            return
-        else:
-            # Node is a true cluster
-            node.is_stable = True
-            return
-
-    def _get_stable_clusters(self, node: Optional[_DCNode]) -> list[_DCNode]:
-        if node is None:
-            return []
-
-        if node.is_stable:
-            self.dc_tree[node.id].is_stable = True
-            return [node]
-
-        else:
-            return self._get_stable_clusters(node.left) + self._get_stable_clusters(node.right)
-
-    def _increase_inter_cluster_distance(self, root: _DCDist_DCNode):
-        queue = deque()
-        queue.append(root)
-        while len(queue):
-            node = queue.popleft()
-
-            if node.left is None and node.right is None:
-                continue
-
-            if hasattr(node, "is_stable") and node.is_stable:
-                continue
-
-            node.dist += max(
-                node.left.dist * len(node.left.leaves), node.right.dist * len(node.right.leaves)
-            )
-            # node.dist += max(
-            #     (node.dist - node.left.dist) * len(node.left.leaves), (node.dist - node.right.dist) * len(node.right.leaves)
-            # )
-            queue.append(node.left)
-            queue.append(node.right)
-
-    def stable_nodes(self, dc_tree: DCTree) -> list[_DCNode]:
-        new_root = self._condense_tree(dc_tree.root)
-        if new_root is None:
-            return []
-
-        self._calculate_stability(new_root)
-        new_root.stability = 0
-
-        self._stable_node_flagging(new_root)
-        new_root.is_stable = False
-
-        stable_nodes = self._get_stable_clusters(new_root)
-
-        stable_nodes_ = []
-        for node in stable_nodes:
-            if len(node.leaves) >= self.min_points:
-                stable_nodes_.append(node)
-
-        return stable_nodes_
-
-    def clustering(self, dc_tree: DCTree):
-        stable_nodes = self.stable_nodes(dc_tree)
-
-        labels = np.full(dc_tree.n, -1)
-
-        idx = 0
-        for i in range(len(stable_nodes)):
-            labels[stable_nodes[i].leaves] = idx
-            idx += 1
-        return labels
-
-
-class _DCNode(_DCDist_DCNode):
-    is_stable: Optional[Union[Literal[True], Literal[False]]]
-    stability: Optional[float]
-    left: _DCNode
-    right: _DCNode
diff --git a/clustpy/deep/shade.py b/clustpy/deep/shade.py
index 28a389b..5bf50bb 100644
--- a/clustpy/deep/shade.py
+++ b/clustpy/deep/shade.py
@@ -4,70 +4,92 @@
 """
 
 from __future__ import annotations
-
 import numpy as np
 import torch
-import sys
-
-from .dcdist import DCTree, DCTree_Clusterer
+from clustpy.utils import DCTree
+from clustpy.hierarchical import DCTree_Clusterer
 from clustpy.deep._abstract_deep_clustering_algo import _AbstractDeepClusteringAlgo
-from clustpy.deep.neural_networks._abstract_autoencoder import _AbstractAutoencoder
-from clustpy.deep.neural_networks import FeedforwardAutoencoder
-from clustpy.deep._data_utils import get_dataloader
+from clustpy.deep._data_utils import get_train_and_test_dataloader
 from clustpy.deep._train_utils import get_trained_network
-from clustpy.deep._utils import (
-    detect_device,
-    set_torch_seed,
-    squared_euclidean_distance,
-    encode_batchwise,
-    run_initial_clustering,
-)
-from sklearn.utils import check_random_state
-from tqdm import tqdm
+from clustpy.deep._utils import detect_device, squared_euclidean_distance, encode_batchwise, run_initial_clustering, mean_squared_error
+from clustpy.utils.checks import check_parameters
+import tqdm
 from typing import Callable, Optional, Tuple
-
-import matplotlib.pyplot as plt
-
-
-sys.setrecursionlimit(1000000000)
+from sklearn.utils.validation import check_is_fitted
 
 
 class SHADE(_AbstractDeepClusteringAlgo):
     """
+    The Structure-preserving High-dimensional Analysis with Density-based Exploration (SHADE) algorithm.
     A neural network (autoencoder AE) will be trained with the reconstruction loss and the d_dc loss function.
     Afterward, KMeans or HDBSCAN identifies the initial clusters.
 
     Parameters
     ----------
+    clustering_class : ClusterMixin
+        clustering class to obtain the cluster labels after getting the embedding (default: DCTree_Clusterer)
+    clustering_params : dict
+        parameters for the clustering class. If None, it will be set to {"min_points": min_points} (default: None)
+    min_points : int
+        the minimum number of points (default: 5)
+    use_complete_dc_tree : bool
+        Defines whether the complete DC Tree should be used instead of a batch-wise version (default: True)
+    use_matrix_dc_distance: bool
+        Defines whether the matrix DC distance should be stored - can cause memory issues (default: True)
+    use_less_memory: bool
+      Use less memory when constructing the DCTree.
+      This will, however, increase the runtime (default: False)
     batch_size : int
         Size of the data batches. (default: 500)
-    embedding_size : int
-        Size of the embedding within the neural_network. (default: 10)
-    neural_network : torch.nn.Module
-        The input neural_network. If None a new Autoencoder model will be created. (default: None)
-    optimizer_params : dict
-        Parameters of the optimizer for the clustering procedure.
-        Can also include the learning rate. (default: {"lr": 1e-3})
+    pretrain_optimizer_params : dict
+        parameters of the optimizer for the pretraining of the neural network, includes the learning rate.
+        If None, it will be set to {"lr": 1e-3}. (default: None)
+    clustering_optimizer_params : dict
+        parameters of the optimizer for the actual clustering procedure, includes the learning rate. If None, it will be set to {"lr": 1e-4} (default: None)
+    pretrain_epochs : int
+        number of epochs for the pretraining of the neural network. (default: 0)
+    clustering_epochs : int
+        number of epochs for the actual clustering procedure (default: 100)
     optimizer_class : torch.optim.Optimizer
-        The optimizer class. (default: torch.optim.Adam)
-    loss_fn : torch.nn.modules.loss._Loss
-        Loss function for the reconstruction. (default: torch.nn.MSELoss())
+        the optimizer class (default: torch.optim.Adam)
+    ssl_loss_fn : Callable | torch.nn.modules.loss._Loss
+         self-supervised learning (ssl) loss function for training the network, e.g. reconstruction loss for autoencoders (default: mean_squared_error)
+    neural_network : torch.nn.Module | tuple
+        the input neural network. If None, a new FeedforwardAutoencoder will be created.
+        Can also be a tuple consisting of the neural network class (torch.nn.Module) and the initialization parameters (dict) (default: None)
+    neural_network_weights : str
+        Path to a file containing the state_dict of the neural_network (default: None)
+    embedding_size : int
+        size of the embedding within the neural network (default: 10)
+    density_loss_weight : float
+        weight of the density loss compared to the reconstruction loss (default: 1.0)
+    ssl_loss_weight : float
+        weight of the self-supervised learning (ssl) loss (default: 1.0)
     custom_dataloaders : tuple
-        Tuple consisting of a trainloader (random order) at the first and
-        a test loader (non-random order) at the second position.
-        If None, the default dataloaders will be used. (default: None)
-    random_state : np.random.RandomState
-        Use a fixed random state to get a repeatable solution.
-        Can also be of type int. (default: None)
+        tuple consisting of a trainloader (random order) at the first and a test loader (non-random order) at the second position.
+        Can also be a tuple of strings, where the first entry is the path to a saved trainloader and the second entry the path to a saved testloader.
+        In this case the dataloaders will be loaded by torch.load(PATH).
+        If None, the default dataloaders will be used (default: None)
     device : torch.device
-        If device is None then it will set to cuda if it is available. (default: None)
+        The device on which to perform the computations.
+        If device is None then it will be automatically chosen: if a gpu is available the gpu with the highest amount of free memory will be chosen (default: None)
+    random_state : np.random.RandomState | int
+        use a fixed random state to get a repeatable solution. Can also be of type int (default: None)
 
     Attributes
     ----------
+    n_clusters_ : int
+        The final number of clusters
     labels_ : np.ndarray
         The final labels
-    neural_network : torch.nn.Module
-        The final neural_network
+    cluster_centers_ : np.ndarray
+        The final cluster centers defined as the mean of assigned samples within the AE embedding
+    dc_tree_ : DCTree
+        The dc tree
+    neural_network_trained_ : torch.nn.Module
+        The final neural network
+    n_features_in_ : int
+        the number of features used for the fitting
 
     Examples
     --------
@@ -83,80 +105,48 @@ class SHADE(_AbstractDeepClusteringAlgo):
     IEEE International Conference on Data Mining (ICDM), Abu Dhabi, United Arab Emirates, 2024, pp. 675-680, doi: 10.1109/ICDM59182.2024.
     """
 
-    batch_size: int
-    neural_network: Optional[torch.nn.Module]
-    min_points: int
-    use_complete_dc_tree: bool
-    use_matrix_dc_distance: bool
-    increase_inter_cluster_distance: bool
-    pretrain_epochs: int
-    pretrain_optimizer_params: dict
-    clustering_epochs: int
-    clustering_optimizer_params: dict
-    embedding_size: int
-    optimizer_params: dict
-    optimizer_class: torch.optim.Optimizer
-    loss_fn: torch.nn.modules.loss._Loss
-    custom_dataloaders = Optional[Tuple[Callable, Callable]]
-    random_state: np.random.RandomState
-    device: torch.device
-    cluster_algorithm: ClusterMixin
-    cluster_algorithm_params: dict
-    degree_of_reconstruction: float
-    degree_of_density_preservation: float
-
-    n_clusters: Optional[int]
-    labels_: np.ndarray
-
     def __init__(
         self,
-        batch_size: int = 500,
-        neural_network: Optional[torch.nn.Module] = None,
-        min_points: int = 5,
+        clustering_class : Optional[ClusterMixin] = DCTree_Clusterer,
+        clustering_params : dict = None,
+        min_points : int = 5,
         use_complete_dc_tree: bool = True,
         use_matrix_dc_distance: bool = True,
-        increase_inter_cluster_distance: bool = False,
-        pretrain_epochs: int = 0,
-        pretrain_optimizer_params: dict = {"lr": 1e-3},
-        clustering_epochs: int = 100,
-        clustering_optimizer_params: dict = {"lr": 1e-3},
-        embedding_size: int = 10,
+        use_less_memory: bool = False,
+        batch_size: int = 500,
+        pretrain_optimizer_params: dict = None,
+        clustering_optimizer_params : dict = None,
+        pretrain_epochs : int = 0,
+        clustering_epochs : int = 100,
         optimizer_class: torch.optim.Optimizer = torch.optim.Adam,
-        loss_fn: torch.nn.modules.loss._Loss = torch.nn.MSELoss(),
-        custom_dataloaders: Optional[Tuple[Callable, Callable]] = None,
-        random_state: Optional[np.random.RandomState] = None,
-        device: Optional[torch.device] = None,
-        n_clusters: Optional[int] = None,
-        cluster_algorithm: Optional[ClusterMixin] = DCTree_Clusterer,
-        cluster_algorithm_params: dict = {},
-        degree_of_reconstruction: float = 1.0,
-        degree_of_density_preservation: float = 1.0,
+        ssl_loss_fn : Callable | torch.nn.modules.loss._Loss = mean_squared_error,
+        neural_network : torch.nn.Module | tuple = None,
+        neural_network_weights : str = None,
+        embedding_size : int = 10,
+        density_loss_weight : float = 1.0,
+        ssl_loss_weight : float = 1.0,
+        custom_dataloaders : tuple = None,
+        device : torch.device = None,
+        random_state : np.random.RandomState | int = None,
     ):
-        self.batch_size = batch_size
+        super().__init__(batch_size, neural_network, neural_network_weights, embedding_size, device, random_state)
+        self.clustering_class = clustering_class
+        self.clustering_params = clustering_params
         self.min_points = min_points
         self.use_complete_dc_tree = use_complete_dc_tree
-        self.dc_tree = None
         self.use_matrix_dc_distance = use_matrix_dc_distance
-        self.increase_inter_cluster_distance = increase_inter_cluster_distance
-        self.pretrain_epochs = pretrain_epochs
+        self.use_less_memory = use_less_memory
         self.pretrain_optimizer_params = pretrain_optimizer_params
-        self.clustering_epochs = clustering_epochs
         self.clustering_optimizer_params = clustering_optimizer_params
-        self.embedding_size = embedding_size
-        self.neural_network = neural_network
+        self.pretrain_epochs = pretrain_epochs
+        self.clustering_epochs = clustering_epochs
         self.optimizer_class = optimizer_class
-        self.loss_fn = loss_fn
+        self.ssl_loss_fn = ssl_loss_fn
+        self.density_loss_weight = density_loss_weight
+        self.ssl_loss_weight = ssl_loss_weight
         self.custom_dataloaders = custom_dataloaders
-        self.random_state = check_random_state(random_state)
-        set_torch_seed(self.random_state)
-        self.device = detect_device(device)
-        self.n_clusters = n_clusters
-        self.cluster_algorithm = cluster_algorithm
-        self.cluster_algorithm_params = cluster_algorithm_params
-        self.degree_of_reconstruction = degree_of_reconstruction
-        self.degree_of_density_preservation = degree_of_density_preservation
 
-    def fit(self, X, y=None) -> SHADE:
+    def fit(self, X: np.ndarray, y: np.ndarray=None) -> SHADE:
         """
         Cluster the input dataset with the SHADE algorithm.
         The resulting cluster labels will be stored in the `labels_` attribute.
@@ -167,342 +157,218 @@ def fit(self, X, y=None) -> SHADE:
             The given data set.
         y : np.ndarray
             The labels. (can be ignored)
-        dc_distances : Optional[Union[np.ndarray, DCTree, SampleDCTree]]
-            dc_distances of X.
 
         Returns
         -------
         self : SHADE
             This instance of the SHADE algorithm.
         """
-
-        # Create Dataloader
-        if self.custom_dataloaders is None:
-            trainloader = get_dataloader(
-                X,
-                self.batch_size,
-                drop_last=False,
-                shuffle=True,
-            )
-            testloader = get_dataloader(
-                X,
-                self.batch_size,
-                drop_last=False,
-                shuffle=False,
-            )
-        else:
-            trainloader, testloader = self.custom_dataloaders
-            if trainloader.batch_size != self.batch_size:
-                self.batch_size = trainloader.batch_size
-
+        X, _, random_state, pretrain_optimizer_params, _, _ = self._check_parameters(X, y=y)
+        clustering_optimizer_params = {"lr": 1e-3} if self.clustering_optimizer_params is None else self.clustering_optimizer_params
+        clustering_params = {"min_points": self.min_points, "use_less_memory": self.use_less_memory} if self.clustering_params is None else self.clustering_params
+        device = detect_device(self.device)
+        trainloader, testloader, batch_size = get_train_and_test_dataloader(X, self.batch_size, self.custom_dataloaders)
+        assert batch_size >= self.min_points, f"Batch_size ({batch_size}) cannot be smaller than min_points ({self.min_points})"
         # Create dc_tree
-        if self.dc_tree is None and self.use_complete_dc_tree:
-            print("Build DCTree of the complete dataset")
-            self.dc_tree = DCTree(X, min_points=self.min_points)
-
+        if self.use_complete_dc_tree:
+            self.dc_tree_ = DCTree(X, min_points=self.min_points, use_less_memory=self.use_less_memory)
+        else:
+            self.dc_tree_ = None
         # Create and pretrain Autoencoder
-        if self.neural_network is None:
-            architecture = [X.shape[1], 512, 256, 128, self.embedding_size]
-            self.neural_network = FeedforwardAutoencoder(architecture)
-            self.neural_network = self.neural_network.to(self.device)
-
-        if not self.neural_network.fitted:
-            self.neural_network = get_trained_network(
-                trainloader=trainloader,
-                data=X,
-                n_epochs=self.pretrain_epochs,
-                batch_size=self.batch_size,
-                optimizer_params=self.pretrain_optimizer_params,
-                optimizer_class=self.optimizer_class,
-                device=self.device,
-                ssl_loss_fn=self.loss_fn,
-                embedding_size=self.embedding_size,
-                neural_network=self.neural_network,
-                random_state=self.random_state,
-            )
-            self.neural_network.fitted = False
-
+        neural_network_params = {"layers": [X.shape[1], 512, 256, 128, self.embedding_size]}
+        neural_network = get_trained_network(trainloader, n_epochs=self.pretrain_epochs,
+                                            optimizer_params=pretrain_optimizer_params, optimizer_class=self.optimizer_class,
+                                            device=device, ssl_loss_fn=self.ssl_loss_fn, embedding_size=self.embedding_size,
+                                            neural_network=self.neural_network,
+                                            neural_network_weights=self.neural_network_weights, neural_network_params=neural_network_params,
+                                            random_state=random_state)
         # Setup SHADE Module
-        self.shade_module = _SHADE_Module(
-            autoencoder=self.neural_network,
+        shade_module = _SHADE_Module(
             n_epochs=self.clustering_epochs,
+            neural_network=neural_network,
             min_points=self.min_points,
-            dc_tree=self.dc_tree,
+            dc_tree=self.dc_tree_,
             use_matrix_dc_distance=self.use_matrix_dc_distance,
-            increase_inter_cluster_distance=self.increase_inter_cluster_distance,
-            degree_of_reconstruction=self.degree_of_reconstruction,
-            degree_of_density_preservation=self.degree_of_density_preservation,
-            optimizer_class=self.optimizer_class,
-            optimizer_params=self.clustering_optimizer_params,
-            device=self.device,
+            device=device,
+            ssl_loss_fn=self.ssl_loss_fn,
+            density_loss_weight=self.density_loss_weight,
+            ssl_loss_weight=self.ssl_loss_weight
         )
-        if not self.neural_network.fitted:
-            print("Start training with clustering loss.")
-            self.shade_module.fit(
-                X,
-                trainloader=trainloader,
-                loss_fn=self.loss_fn,
-                testloader=testloader,
-            )
-            self.neural_network.fitted = True
-
-        embedding = encode_batchwise(testloader, self.neural_network)
-
-        (
-            self.n_clusters,
-            self.labels_,
-            self.cluster_centers_,
-            _,
-        ) = run_initial_clustering(
-            X=embedding,
-            n_clusters=self.n_clusters,
-            clustering_class=self.cluster_algorithm,
-            clustering_params=self.cluster_algorithm_params,
-            random_state=self.random_state,
+        optimizer = self.optimizer_class(list(neural_network.parameters()), **clustering_optimizer_params)
+        shade_module.fit(X, trainloader, optimizer)
+        # Get labels
+        embedded_data = encode_batchwise(testloader, neural_network)
+        n_clusters, labels, cluster_centers, _ = run_initial_clustering(
+            X=embedded_data,
+            n_clusters=None,
+            clustering_class=self.clustering_class,
+            clustering_params=clustering_params,
+            random_state=random_state,
         )
+        self.n_clusters_ = n_clusters
+        self.labels_ = labels
+        self.cluster_centers_ = cluster_centers
+        self.neural_network_trained_ = neural_network
+        self.set_n_featrues_in(X)
         return self
 
     def predict(self, X: np.ndarray) -> np.ndarray:
         """
         Predicts the labels of the input data.
+        Note that this is just a very imprecise estimation as we are not using the DC Tree to predict the labels.
+        The prediction is calculated by checking the distance to the clostest mean of samples in a cluster within the embedding of the AE.
 
         Parameters
         ----------
         X : np.ndarray
-            The input data.
-        n_clusters : int
-            Number of clusters. Can be None if a corresponding initial_clustering_class is given,
-            e.g. DBSCAN / HDBSCAN.
-        cluster_algorithm : Clusterer, optional
-            Clustering algorithm which should be used on the learned embedding. (default: KMeans)
-        cluster_params : dict
-            Clustering parameters for `cluster_algorithm`.
+            input data
 
         Returns
         -------
         predicted_labels : np.ndarray
-            The predicted labels.
-        """
-
-        dataloader = get_dataloader(
-            X,
-            self.batch_size,
-            drop_last=False,
-            shuffle=False,
-        )
-
-        embedding = encode_batchwise(dataloader, self.neural_network)
-
-        (
-            self.n_clusters,
-            self.labels_,
-            self.cluster_centers_,
-            _,
-        ) = run_initial_clustering(
-            X=embedding,
-            n_clusters=self.n_clusters,
-            clustering_class=self.cluster_algorithm,
-            clustering_params=self.cluster_algorithm_params,
-            random_state=self.random_state,
-        )
-        return self.labels_
-
-    def encode(self, X: np.ndarray) -> np.ndarray:
+            The predicted labels
         """
-        Embedds the input data with the learned SHADE Autoencoder.
+        check_is_fitted(self, ["labels_", "neural_network_trained_", "n_features_in_"])
+        X, _, _ = check_parameters(X, allow_size_1=True, allow_nd=self.neural_network_trained_.allow_nd_input, estimator_obj=self)
+        print("WARNING: predict does not use the embedding of the manifold and is, therefore, just a very rough estimate")
+        predicted_labels = super().predict(X)
+        return predicted_labels
 
-        Parameters
-        ----------
-        X : np.ndarray
-            The input data.
-
-        Returns
-        -------
-        np.ndarray
-            Embedded input data.
-        """
-
-        dataloader = get_dataloader(
-            X,
-            self.batch_size,
-            drop_last=False,
-            shuffle=False,
-        )
 
-        embedding = encode_batchwise(dataloader, self.neural_network)
-        return embedding
-
-
-class _SHADE_Module(_AbstractAutoencoder):
-    """
-    The SHADE Autoencoder.
+class _SHADE_Module(torch.nn.Module):
     """
+    The _SHADE_Module. Contains most of the algorithm specific procedures like the loss function.
 
-    autoencoder: torch.nn.Module
-    dc_tree: Optional[DCTree]
+    Parameters
+    ----------
+    n_epochs : int
+        number of epochs for the clustering procedure
+    neural_network : torch.nn.Module
+        the neural network
+    min_points : int
+        the minimum number of points
+    dc_tree : Optional[DCTree]
+        the DCTree
     use_matrix_dc_distance: bool
-    n_epochs: int
-    min_points: int
-    increase_inter_cluster_distance: bool
-    degree_of_reconstruction: float
-    degree_of_density_preservation: float
-    optimizer: torch.optim.Optimizer
-    device: torch.device
+        Defines whether the matrix DC distance should be stored - can cause memory issues
+    device : torch.device
+        device to be trained on
+    ssl_loss_fn : Callable | torch.nn.modules.loss._Loss
+        self-supervised learning (ssl) loss function for training the network, e.g. reconstruction loss for autoencoders
+    density_loss_weight : float
+        weight of the clustering loss
+    ssl_loss_weight : float
+        weight of the self-supervised learning (ssl) loss
+    """
 
     def __init__(
         self,
-        autoencoder: torch.nn.Module,
-        n_epochs=100,
-        min_points: int = 5,
-        dc_tree: Optional[DCTree] = None,
-        use_matrix_dc_distance: bool = True,
-        increase_inter_cluster_distance: bool = False,
-        degree_of_reconstruction: float = 1.0,
-        degree_of_density_preservation: float = 1.0,
-        optimizer_class: torch.optim.Optimizer = torch.optim.Adam,
-        optimizer_params: dict = {},
-        use_tqdm = False,
-        device: Optional[torch.device] = None,
+        n_epochs : int,
+        neural_network: torch.nn.Module,
+        min_points: int,
+        dc_tree: Optional[DCTree],
+        use_matrix_dc_distance: bool,
+        device: torch.device,
+        ssl_loss_fn: Callable | torch.nn.modules.loss._Loss,
+        density_loss_weight: float,
+        ssl_loss_weight: float
     ):
         super().__init__()
-
-        self.autoencoder = autoencoder
         self.n_epochs = n_epochs
+        self.neural_network = neural_network
         self.min_points = min_points
         self.dc_tree = dc_tree
         self.use_matrix_dc_distance = use_matrix_dc_distance
-        self.increase_inter_cluster_distance = increase_inter_cluster_distance
-        self.degree_of_reconstruction = degree_of_reconstruction
-        self.degree_of_density_preservation = degree_of_density_preservation
-        self.optimizer = optimizer_class(list(autoencoder.parameters()), **optimizer_params)
-        self.use_tqdm = use_tqdm
-        self.device = detect_device(device)
-
-    def encode(self, x: torch.Tensor) -> torch.Tensor:
-        return self.autoencoder.encode(x)
-
-    def decode(self, embedded: torch.Tensor) -> torch.Tensor:
-        return self.autoencoder.decode(embedded)
+        self.device = device
+        self.ssl_loss_fn = ssl_loss_fn
+        self.density_loss_weight = density_loss_weight
+        self.ssl_loss_weight = ssl_loss_weight
 
     def fit(
         self,
-        X,
+        X: np.ndarray,
         trainloader: torch.utils.data.DataLoader,
-        loss_fn: torch.nn.modules.loss._Loss,
-        testloader,
+        optimizer: torch.optim.Optimizer
     ) -> _SHADE_Module:
         """
-        Trains the autoencoder in place.
+        Trains the _SHADE_Module in place.
 
         Parameters
         ----------
+        X : np.ndarray
+            The data
         trainloader : torch.utils.data.DataLoader
-            Dataloader to be used for training.
-        dc_distances : Union[np.ndarray, DCTree, SampleDCTree]
-            dc_distances of the data of the dataloader.
-        n_epochs : int
-            Number of epochs for the clustering procedure.
+            dataloader to be used for training
         optimizer : torch.optim.Optimizer
-            The optimizer for training.
-        loss_fn : torch.nn.modules.loss._Loss
-            Loss function for the reconstruction.
-        device : torch.device
-            Device to be trained on.
+            the optimizer for training
 
         Returns
         -------
         self : _SHADE_Module
             This instance of the _SHADE_Module.
         """
-
         if self.dc_tree is not None and self.use_matrix_dc_distance:
-            print("Compute dc_distance matrix.")
-            self.matrix_dc_distance = self.dc_tree.dc_distances()
-
+            matrix_dc_distance = self.dc_tree.dc_distances()
+            matrix_dc_distance_torch = torch.tensor(matrix_dc_distance, device=self.device)
+        else:
+            matrix_dc_distance_torch = None
         self.train()
-        for epoch_i in tqdm(range(self.n_epochs), file=sys.stdout, desc="Epoch", disable=not self.use_tqdm):
-            loss_rec_sum = []
-            loss_dens_sum = []
+        tbar = tqdm.trange(self.n_epochs, desc="SHADE training")
+        for _ in tbar:
             # Update Network
             for batch in trainloader:
                 if len(batch[0]) <= self.min_points:
                     continue
-
-                loss_rec, loss_dens = self._loss(X, batch, loss_fn)
-                loss = (
-                    self.degree_of_reconstruction * loss_rec
-                    + self.degree_of_density_preservation * loss_dens
-                )
-                loss_rec_sum.append(loss_rec.cpu().detach().numpy())
-                loss_dens_sum.append(loss_dens.cpu().detach().numpy())
+                loss = self._loss(X, batch, matrix_dc_distance_torch)
                 # Backward pass - update weights
-                self.optimizer.zero_grad()
+                optimizer.zero_grad()
                 loss.backward()
-                self.optimizer.step()
-
-        self.autoencoder.eval()
+                optimizer.step()
+            postfix_str = {"Loss": loss}
+            tbar.set_postfix(postfix_str)
+        self.neural_network.eval()
         self.eval()
         return self
 
     def _loss(
         self,
-        X,
+        X: np.ndarray,
         batch: list,
-        loss_fn: torch.nn.modules.loss._Loss,
+        matrix_dc_distance_torch: torch.Tensor
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """
         Calculate the autoencoder reconstruction + d_dc loss.
 
         Parameters
         ----------
+        X : np.ndarray
+            The data
         batch : list
             The minibatch.
-        loss_fn : torch.nn.modules.loss._Loss
-            Loss function for the reconstruction.
-        device : torch.device
-            Device to be trained on.
+        matrix_dc_distance_torch : torch.Tensor
+            A matrix containing pairwise dc distances
 
         Returns
-        -------loss
+        -------
         loss : torch.Tensor
             The final SHADE loss.
         """
-
         # Reconstrucion
-        batch_data = batch[1].to(self.device)
-        emb_data = self.encode(batch_data)
-        reconstructed = self.decode(emb_data)
-        loss_rec = loss_fn(reconstructed, batch_data)
-
+        ssl_loss, embedded, _ = self.neural_network.loss(batch, self.ssl_loss_fn, self.device)
         # Density loss
         if self.dc_tree is None:
             # Batch-wise DCTree
-            if self.increase_inter_cluster_distance:
-                dc_clusterer = DCTree_Clusterer(
-                    min_points=self.min_points,
-                    increase_inter_cluster_distance=self.increase_inter_cluster_distance,
-                )
-                dc_clusterer.fit_predict(X[batch[0]])
-                batch_dc_dists = torch.tensor(
-                    dc_clusterer.dc_tree.dc_distances(), device=self.device
-                )
-            else:
-                batch_dc_dists = torch.tensor(
-                    DCTree(X[batch[0]], min_points=self.min_points).dc_distances(),
-                    device=self.device,
-                )
+            dc_distances = DCTree(X[batch[0]], min_points=self.min_points).dc_distances()
+            batch_dc_dists = torch.tensor(dc_distances, device=self.device)
         else:
             # DCTree of all data points X
             if self.use_matrix_dc_distance:
-                batch_dc_dists = torch.tensor(
-                    self.matrix_dc_distance[np.ix_(batch[0], batch[0])], device=self.device
-                )
+                idxs = batch[0].to(self.device)
+                batch_dc_dists = matrix_dc_distance_torch[idxs[:, None], idxs[None, :]]
             else:
-                batch_dc_dists = torch.tensor(
-                    self.dc_tree.dc_distances(batch[0], batch[0]), device=self.device
-                )
-
-        batch_eucl_dists = squared_euclidean_distance(emb_data, emb_data)
+                dc_distances = self.dc_tree.dc_distances(batch[0], batch[0])
+                batch_dc_dists = torch.tensor(dc_distances, device=self.device)
+        batch_eucl_dists = squared_euclidean_distance(embedded, embedded)
         loss_dens = (batch_eucl_dists - batch_dc_dists).pow(2).mean()
-
-        return loss_rec, loss_dens
+        loss = self.ssl_loss_weight * ssl_loss + self.density_loss_weight * loss_dens
+        return loss
diff --git a/clustpy/deep/tests/test_shade.py b/clustpy/deep/tests/test_shade.py
new file mode 100644
index 0000000..487ff34
--- /dev/null
+++ b/clustpy/deep/tests/test_shade.py
@@ -0,0 +1,25 @@
+from clustpy.deep import SHADE
+import numpy as np
+from clustpy.utils.checks import check_clustpy_estimator
+from clustpy.deep.tests._helpers_for_tests import _test_dc_algorithm_simple, _test_dc_algorithm_with_augmentation
+
+
+def test_shade_estimator():
+    # Ignore check_methods_subset_invariance due to numerical issues
+    check_clustpy_estimator(SHADE(min_points=2, pretrain_epochs=0, clustering_epochs=3),
+                            ("check_complex_data", "check_methods_subset_invariance"))
+
+
+def test_shade():
+    shade = SHADE()
+    _test_dc_algorithm_simple(shade, check_predict=False)
+
+
+def test_shade_wo_matrix_distance():
+    shade = SHADE(use_matrix_dc_distance=False)
+    _test_dc_algorithm_simple(shade, check_predict=False)
+
+
+def test_shade_wo_complete_dc_tree():
+    shade = SHADE(use_complete_dc_tree=False)
+    _test_dc_algorithm_simple(shade, check_predict=False)
diff --git a/clustpy/hierarchical/__init__.py b/clustpy/hierarchical/__init__.py
index e13ac0a..51ee526 100644
--- a/clustpy/hierarchical/__init__.py
+++ b/clustpy/hierarchical/__init__.py
@@ -1,3 +1,5 @@
 from .diana import Diana
+from .dctree_clusterer import DCTree_Clusterer
 
-__all__ = ["Diana"]
+__all__ = ["Diana",
+           "DCTree_Clusterer"]
diff --git a/clustpy/hierarchical/dctree_clusterer.py b/clustpy/hierarchical/dctree_clusterer.py
new file mode 100644
index 0000000..46f5ad2
--- /dev/null
+++ b/clustpy/hierarchical/dctree_clusterer.py
@@ -0,0 +1,153 @@
+"""
+@authors:
+Pascal Weber
+"""
+
+# - Author: Pascal Weber
+# - Source: https://github.com/pasiweber/SHADE
+
+from __future__ import annotations
+import numpy as np
+import sys
+from clustpy.utils.dctree import DCTree, _DCNode
+from typing import Optional
+from sklearn.base import ClusterMixin, BaseEstimator
+from clustpy.utils.checks import check_parameters
+
+
+sys.setrecursionlimit(1000000000)
+
+
+class DCTree_Clusterer(ClusterMixin, BaseEstimator):
+    """
+    The DCTree clustering algorithm.
+    Identifies stable nodes within the DCTree and labels the data accordingly.
+
+    Parameters
+    ----------
+    min_points : int
+        the minimum number of points (default: 5)
+    use_less_memory: bool
+      Use less memory when constructing the DCTree.
+      This will, however, increase the runtime (default: False)
+
+    Attributes
+    ----------
+    n_clusters_ : int
+        The final number of clusters
+    labels_ : np.ndarray
+        The final labels
+    dc_tree_ : BinaryClusterTree
+        The resulting cluster tree
+    n_features_in_ : int
+        the number of features used for the fitting
+
+    References
+    ----------
+    SHADE: Deep Density-based Clustering
+    Anna Beer; Pascal Weber; Lukas Miklautz; Collin Leiber; Walid Durani; Christian Böhm
+    IEEE International Conference on Data Mining (ICDM), Abu Dhabi, United Arab Emirates, 2024, pp. 675-680, doi: 10.1109/ICDM59182.2024.
+    """
+
+    def __init__(self, min_points: int = 5, use_less_memory: bool = False):
+        self.min_points = min_points
+        self.use_less_memory = use_less_memory
+
+    def fit(self, X: np.ndarray, y: np.ndarray=None) -> 'DCTree_Clusterer':
+        """
+        Initiate the actual clustering process on the input data set.
+        The resulting cluster labels will be stored in the labels_ attribute.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            the given data set
+        y : np.ndarray
+            the labels (can be ignored)
+
+        Returns
+        -------
+        self : DCTree_Clusterer
+            this instance of the DCTree_Clusterer algorithm
+        """
+        X, _, _ = check_parameters(X=X, y=y)
+        self.dc_tree_ = DCTree(X, min_points=self.min_points, use_less_memory=self.use_less_memory)
+        condensed_root = self._condense(self.dc_tree_.root)
+        stable_nodes = self._get_stable_nodes(condensed_root)
+        labels = np.full(self.dc_tree_.n, -1 if stable_nodes else 0, dtype=np.int32)
+        self.n_clusters_ = len(stable_nodes) if stable_nodes else 1
+        for idx, node in enumerate(stable_nodes):
+            labels[node.leaves] = idx
+        self.labels_ = labels
+        self.n_features_in_ = X.shape[1]
+        return self
+
+    def _condense(self, node: Optional[_DCNode]) -> Optional[_DCNode]:
+        """
+        Condense the tree to nodes, where both children contain at least min_points leaves.
+        Uses a recursive strategy that checks each node separately.
+
+        Parameters
+        ----------
+        node : _DCNode
+            the node that is checked for its children
+
+        Returns
+        -------
+        condensed_node : _DCNode
+            either a condensed node or None
+        """
+        if node is None or len(node.leaves) < self.min_points:
+            return None
+        # Process children
+        L = self._condense(node.left)
+        R = self._condense(node.right)
+        # If both children have min_points children, keep this branch.
+        size_L = len(node.left.leaves) if node.left is not None else 0
+        size_R = len(node.right.leaves) if node.right is not None else 0
+        if size_L >= self.min_points and size_R >= self.min_points:
+            if (L is not None and R is not None) or (L is None and R is None):
+                return _DCNode(node.id, node.dist, node.leaves, L, R)
+            elif L is not None:
+                return L
+            else:
+                return R
+        elif L is not None:
+            return _DCNode(node.id, L.dist, node.leaves, L.left, L.right)
+        elif R is not None:
+            return _DCNode(node.id, R.dist, node.leaves, R.left, R.right)
+        return None
+
+    def _get_stable_nodes(self, node: Optional[_DCNode], parent_dist: float = None) -> list:
+        """
+        Identify stable nodes in the tree.
+
+        Parameters
+        ----------
+        node : _DCNode
+            the node that is checked.
+        parent_dist : float
+            Distance in the parent node. Can be None in the case of the root (default: None)
+
+        Returns
+        -------
+        stable_nodes : list
+            list of stable nodes
+        """
+        if node is None: # In case root is None
+            return []
+        node.stability_ = (1.0/node.dist - 1.0/parent_dist) * len(node.leaves) if parent_dist is not None else 0
+        # Calculate stability for children
+        sum_child_stabilities = 0
+        child_results = []
+        for child in [node.left, node.right]:
+            if child is not None:
+                child_results += self._get_stable_nodes(child, node.dist)
+                sum_child_stabilities += child.stability_
+        # Flag stable nodes
+        if node.stability_ >= sum_child_stabilities:
+            stable_nodes = [node]
+        else:
+            node.stability_ = sum_child_stabilities
+            stable_nodes = child_results
+        return stable_nodes
diff --git a/clustpy/hierarchical/tests/test_dctree_clusterer.py b/clustpy/hierarchical/tests/test_dctree_clusterer.py
new file mode 100644
index 0000000..e3f798a
--- /dev/null
+++ b/clustpy/hierarchical/tests/test_dctree_clusterer.py
@@ -0,0 +1,17 @@
+from clustpy.hierarchical import DCTree_Clusterer
+from clustpy.utils.checks import check_clustpy_estimator
+from sklearn.datasets import make_blobs
+import numpy as np
+
+
+def test_dctree_clusterer_estimator():
+    check_clustpy_estimator(DCTree_Clusterer(min_points=2), ("check_complex_data"))
+
+
+def test_simple_dctree_clusterer():
+    X, labels = make_blobs(200, 4, centers=3, random_state=1)
+    dctree_clusterer = DCTree_Clusterer()
+    assert not hasattr(dctree_clusterer, "labels_")
+    dctree_clusterer.fit(X)
+    assert dctree_clusterer.labels_.dtype == np.int32
+    assert dctree_clusterer.labels_.shape == labels.shape
diff --git a/clustpy/utils/__init__.py b/clustpy/utils/__init__.py
index a485729..8970132 100644
--- a/clustpy/utils/__init__.py
+++ b/clustpy/utils/__init__.py
@@ -3,6 +3,7 @@
 from .diptest import dip_test, dip_pval, dip_boot_samples, dip_gradient, dip_pval_gradient, plot_dip
 from .plots import plot_with_transformation, plot_image, plot_scatter_matrix, plot_histogram, plot_1d_data, \
     plot_2d_data, plot_3d_data
+from .dctree import DCTree, reachability_distances, minimum_spanning_tree_prims
 
 __all__ = ['evaluate_dataset',
            'evaluate_multiple_datasets',
@@ -22,4 +23,7 @@
            'dip_gradient',
            'dip_pval_gradient',
            'plot_dip',
-           'evaluation_df_to_latex_table']
+           'evaluation_df_to_latex_table',
+           'DCTree',
+           'reachability_distances',
+           'minimum_spanning_tree_prims']
diff --git a/clustpy/utils/dctree.py b/clustpy/utils/dctree.py
new file mode 100644
index 0000000..60f1751
--- /dev/null
+++ b/clustpy/utils/dctree.py
@@ -0,0 +1,607 @@
+"""
+@authors:
+Pascal Weber
+"""
+
+# Implementation of the dc-distance with a DCTree by
+# - Author: Pascal Weber
+# - Source: https://github.com/pasiweber/SHADE
+
+# Paper: Connecting the Dots -- Density-Connectivity Distance unifies DBSCAN, k-Center and Spectral Clustering
+# Authors: Anna Beer, Andrew Draganov, Ellen Hohma, Philipp Jahn, Christian M.M. Frey, and Ira Assent
+# Link: https://doi.org/10.1145/3580305.3599283
+
+
+from __future__ import annotations
+import numpy as np
+from typing import List, Optional, Sequence, Tuple, Union
+from scipy.spatial.distance import pdist, cdist, squareform
+
+
+class DCTree:
+    """
+    The DCTree computes the dc_distances and stores them in a tree structure.
+    By using the euler tour of the tree and a sparse table for range minimum queries,
+    the lca-elements in the tree (i.e. the dc_distances) can be computed in O(1) time.
+    The function `dc_dist(i, j)` returns the dc_distance between `points[i]` and `points[j]`
+    in O(1) time.
+    The function `dc_distance(idx_X, idx_Y=None, access_method="tree")` returns a dc_distance matrix
+    with the dc_distance from each pair of `points[idx_X]` and `points[idx_Y]`.
+    See the section Functions for more details.
+
+    Parameters
+    ----------
+    X : np.ndarray
+        points, of which the dc_distances should be computed of.
+    min_points : int, optional
+        min_points parameter used for the computation of the dc_distances (default: 5).
+    use_less_memory: bool
+      Use less memory when constructing the DCTree.
+      This will, however, increase the runtime (default: False).
+
+    Functions
+    ---------
+    DCTree[x]:
+        Returns the _DCNode of given index.
+    dc_dist : (i, j) -> distance
+        returns the dc_distance between points[i] and points[j] in O(1) time.
+    dc_distances : (idx_X, idx_Y) -> np.ndarray
+        Computes the dc_distance matrix between each pair of points[idx_X] and points[idx_Y].
+        Returns dc_dists as ndarray of shape (n_samples_X, n_samples_Y)
+
+    Examples
+    --------
+    >>> points = np.array([[1,6],[2,6],[6,2],[14,17],[123,3246],[52,8323],[265,73]])
+    >>> dc_tree = dcdist.DCTree(points, 5)
+    >>> print(dc_tree.dc_dist(2,5))
+    >>> print(dc_tree.dc_distances(range(len(points))))
+    >>> print(dc_tree.dc_distances([0,1], [2,3]))
+
+    References
+    ----------
+    Anna Beer, Andrew Draganov, Ellen Hohma, Philipp Jahn, Christian M.M. Frey, and Ira Assent.
+    "Connecting the Dots -- Density-Connectivity Distance unifies DBSCAN, k-Center and Spectral
+    Clustering." KDD '23. 2023.
+    """
+
+    def __init__(
+        self,
+        X: np.ndarray,
+        min_points: int = 5,
+        use_less_memory: bool = False
+    ):
+        self.n = X.shape[0]
+        self.min_points = min_points
+        if not use_less_memory:
+            # Calculate pair-wise reachability distance
+            X = reachability_distances(X, min_points)
+        mst_edges = minimum_spanning_tree_prims(X, use_less_memory=use_less_memory, min_points=min_points)
+        self.root = self._build_tree(mst_edges)
+        self._init_fast_index()
+
+    def _build_tree(self, mst_edges: np.ndarray) -> _DCNode:
+        """
+        Build the final DCTree using the MST edges.
+
+        Parameters
+        ----------
+        mst_edges : np.ndarray
+            The edges of the MST, including the dc-distances
+
+        Returns
+        -------
+        new_root_node : _DCNode
+            The root node of the tree
+        """
+        mst_edges.sort(order="dist")
+        # Create leaf nodes
+        leaf_nodes = [_DCNode(idx, 0.0, [idx]) for idx in range(self.n)]
+        idx = self.n
+        for i, j, dist in mst_edges:
+            root_i = self._get_root(leaf_nodes[i])
+            root_j = self._get_root(leaf_nodes[j])
+            new_leaves = root_i.leaves + root_j.leaves
+            new_root_node = _DCNode(
+                id=idx,
+                dist=dist,
+                left=root_i,
+                right=root_j,
+                leaves=new_leaves,
+            )
+            root_i.parent = new_root_node
+            root_i.root = new_root_node
+            root_j.parent = new_root_node
+            root_j.root = new_root_node
+            idx += 1
+        return new_root_node
+
+    def _get_root(self, node: _DCNode) -> '_DCNode':
+        """
+        Get the root of a node and update all root_ entries that have been visited.
+
+        Parameters
+        ----------
+        node : _DCNode
+            The input node
+
+        Returns
+        -------
+        root_node : _DCNode
+            The root node
+        """
+        root_node = node
+        while root_node.root is not None:
+            root_node = root_node.root
+        # Override old roots
+        if root_node is not node:
+            current_node = node
+            while current_node.root != root_node:
+                next_node = current_node.root
+                current_node.root = root_node
+                current_node = next_node
+        return root_node
+
+    def _init_fast_index(self) -> None:
+        """
+        Build a fast index structure for the DCTree.
+        """
+        n_nodes = 2 * self.n - 1
+        self.euler = []
+        self.level = []
+        self.f_occur = [None] * n_nodes
+        # Euler tour to get the euler, level, and f_occur lists in O(n) time.
+        DOWN, UP = 0, 1
+        stack = [(self.root, 0, DOWN)]  # (node, level, DOWN / UP)
+        while len(stack) > 0:
+            (node, level, status) = stack.pop()
+            if status == DOWN:
+                if self.f_occur[node.id] is None:
+                    self.f_occur[node.id] = len(self.euler)
+                self.euler.append(node)
+                self.level.append(level) 
+                if node.right is not None:
+                    stack.append((node, level, UP))
+                    stack.append((node.right, level + 1, DOWN))
+                if node.left is not None:
+                    stack.append((node, level, UP))
+                    stack.append((node.left, level + 1, DOWN))
+            elif status == UP:
+                self.euler.append(node)
+                self.level.append(level)
+        # SparseTable structure which finds the index of the minimum value within the range [l,r] in self.level
+        levels = np.array(self.level, dtype=int)
+        n_elements = len(levels)
+        log_n = n_elements.bit_length()
+        self.sparse_table = np.zeros((log_n, n_elements), dtype=int)
+        self.sparse_table[0] = np.arange(n_elements)
+        for j in range(1, log_n):
+            stride = 1 << (j - 1)
+            limit = n_elements - (1 << j) + 1
+            if limit <= 0:
+                break
+            left_indices = self.sparse_table[j - 1, :limit]
+            right_indices = self.sparse_table[j - 1, stride : stride + limit]
+            choose_left = levels[left_indices] <= levels[right_indices]
+            self.sparse_table[j, :limit] = np.where(choose_left, left_indices, right_indices)
+
+    def __getitem__(
+        self,
+        point_idx: Union[int, Sequence[int], np.ndarray]
+    ) -> Union[_DCNode, List[_DCNode]]:
+        """
+        Returns the _DCNode of given index if `point_idx` is an integer or a list of _DCNodes if `point_idx' is a Sequence.
+
+        Parameters
+        ----------
+        point_idx : Union[int, Sequence[int], np.ndarray]
+            The input parameter as described above
+
+        Returns
+        -------
+        result : Union[_DCNode, List[_DCNode]]
+            The querried nodes in the tree
+        """
+        if isinstance(point_idx, int):
+            return self.euler[self.f_occur[point_idx]]
+        elif isinstance(point_idx, (Sequence, np.ndarray)):
+            return [self.euler[self.f_occur[i]] for i in point_idx]
+        else:
+            raise IndexError(f"`{point_idx}` needs to be an integer, Sequence or np.ndarray!")
+
+    def dc_dist(self, i: int, j: int) -> float:
+        """
+        Returns the dc_distance between points[i] and points[j] in O(1) time.
+        
+        Parameters
+        ----------
+        i : int
+            index of the first point
+        j : int
+            index of the second point
+        
+        Returns
+        -------
+        dc_distance : float
+            The dc_distance of the two points
+        """
+        if i == j:
+            return 0.0
+        l = self.f_occur[i]
+        r = self.f_occur[j]
+        if l > r:
+            l, r = r, l
+        k = (r - l + 1).bit_length() - 1
+        idx_l = self.sparse_table[k, l]
+        idx_r = self.sparse_table[k, r - (1 << k) + 1]
+        ancestor = self.euler[idx_l if self.level[idx_l] <= self.level[idx_r] else idx_r]
+        dc_distance = ancestor.dist
+        return dc_distance
+
+    def dc_distances(
+        self,
+        idx_X: Union[Sequence[int], np.ndarray, None] = None,
+        idx_Y: Union[Sequence[int], np.ndarray, None] = None,
+        access_method: str = "tree",
+    ) -> np.ndarray:
+        """
+        Computes the dc_distance matrix between each pair of points[idx_X] and points[idx_Y].
+        If idx_X=None, idx_X=range(n) is used.
+        If idx_Y=None, idx_Y=idx_X is used.
+
+        Parameters
+        ----------
+        idx_X : Union[Sequence[int], np.ndarray, None]
+            the first set of indices
+        idx_Y : Union[Sequence[int], np.ndarray, None]
+            the second set of indices
+        access_method : str
+            "tree":     traverses the tree in O(n) time (n = len(points)), no matter the size of X / Y.
+            "dc_dist":  uses the dc_dist function and needs O(k*l) time (k = len(X), l = len(Y))).
+            (default: "tree")
+        
+        Returns
+        -------
+        dc_dists : np.array
+            ndarray of shape (n_samples_X, n_samples_Y) containing the distances
+        """
+        if idx_X is None:
+            idx_X = range(self.n)
+        if idx_Y is None:
+            idx_Y = idx_X
+        dc_dists = np.zeros((len(idx_X), len(idx_Y)))
+        if access_method == "dc_dist":
+            # Get distances from the index structure
+            for i in range(len(idx_X)):
+                start_index = i + 1 if idx_X is idx_Y else 0
+                for j in range(start_index, len(idx_Y)):
+                    dc_dists[i, j] = self.dc_dist(idx_X[i], idx_Y[j])
+        elif access_method == "tree":
+            # Get distances from the tree
+            idx_rev_X = np.full(self.n, -1, dtype=int)
+            idx_rev_X[idx_X] = range(len(idx_X))
+            if idx_X is idx_Y:
+                idx_rev_Y = idx_rev_X
+            else:
+                idx_rev_Y = np.full(self.n, -1, dtype=int)
+                idx_rev_Y[idx_Y] = range(len(idx_Y))
+            node_list = [self.root]
+            while len(node_list) > 0:
+                node = node_list.pop()
+                if node.left is not None:
+                    node_list.append(node.left)
+                if node.right is not None:
+                    node_list.append(node.right)
+                if node.left is not None and node.right is not None:
+                    i_leaves = node.left.leaves
+                    j_leaves = node.right.leaves
+                    (i_, j_) = (idx_rev_X[i_leaves], idx_rev_Y[j_leaves])
+                    (i_, j_) = (i_[i_ != -1], j_[j_ != -1])
+                    if len(i_) > 0 and len(j_) > 0:
+                        dc_dists[(i_[:, np.newaxis], j_[np.newaxis, :])] = node.dist
+                    # In case of asymmetric call
+                    if idx_X is not idx_Y:
+                        (i_, j_) = (idx_rev_Y[i_leaves], idx_rev_X[j_leaves])
+                        (i_, j_) = (i_[i_ != -1], j_[j_ != -1])
+                        if len(i_) > 0 and len(j_) > 0:
+                            dc_dists[(j_[:, np.newaxis], i_[np.newaxis, :])] = node.dist
+        else:
+            raise ValueError(f"'{access_method}' is no valid `access_method`")
+        if idx_X is idx_Y:
+            # Mirror values
+            dc_dists = dc_dists + dc_dists.T
+        return dc_dists
+
+
+    def _traverse_until_k_clusters(self, n_clusters: int) -> List[_DCNode]:
+        """
+        Traverse the tree to identify n_clusters nodes that minimize the maximum within-cluster distance.
+
+        Parameters
+        ----------
+        n_clusters : int
+            the number of desired clusters
+
+        Returns
+        -------
+        result_nodes : List[_DCNode]
+            List of nodes, where each node represents the root of a cluster
+        """
+        assert n_clusters <= self.n, "n_clusters can not be larger than the number of input points"
+        node_list = [self.root]
+        result_nodes = []
+        id_threshold = 2 * self.n - n_clusters - 1
+        while len(node_list) > 0:
+            node = node_list.pop()
+            if node.id <= id_threshold:
+                result_nodes.append(node)
+            else:
+                if node.left is not None:
+                    node_list.append(node.left)
+                if node.right is not None:
+                    node_list.append(node.right)
+        assert len(result_nodes) == n_clusters, "Length of the result_nodes list is not equal to k"
+        return result_nodes
+
+    def get_k_center(self, n_clusters: int) -> np.ndarray:
+        """
+        Extract the k center approach.
+        It identifies clusters such that the maximum distance within a cluster is minimized.
+
+        Parameters
+        ----------
+        n_clusters : int
+            the number of desired clusters
+
+        Returns
+        -------
+        labels : np.ndarray
+            The cluster labels
+        """
+        nodes = self._traverse_until_k_clusters(n_clusters)
+        labels = np.zeros(self.n, dtype=np.int32)
+        for i, node in enumerate(nodes):
+            labels[np.array(node.leaves)] = i
+        return labels
+
+    def get_eps_for_k(self, n_clusters: int, eps: float = 1e-10):
+        """
+        Receive the largest possible eps value such that one receives exectly n_clusters.
+
+        Parameters
+        ----------
+        n_clusters : int
+            the number of desired clusters
+        eps : float
+            a small constaint used to differentiate from the next potential cluster (default: 1e-10)
+            
+        Returns
+        -------
+        min_eps : float
+            The resulting epsilon value
+        """
+        assert eps > 0, "Eps has to be a positive number"
+        nodes = self._traverse_until_k_clusters(n_clusters)
+        min_eps = np.inf
+        for node in nodes:
+            if node.parent is not None:
+                min_eps = min(min_eps, node.parent.dist)
+        return min_eps - eps
+
+    def __repr__(self) -> str:
+        """
+        Return a string describing the DCTree.
+
+        Returns
+        -------
+        repr_string : str
+            The string representation of the tree
+        """
+        if self.root is None:
+            return ""
+        pointer_right = "   "
+        pointer_left = "   " if self.root.right else "   "
+        repr_string = (
+            f"{self.root}"
+            f"{self.__repr__help(self.root.left, pointer_left, '', self.root.right is not None)}"
+            f"{self.__repr__help(self.root.right, pointer_right, '', False)}"
+        )
+        return repr_string
+
+    def __repr__help(self, node: Optional[_DCNode], pointer: str, padding: str, has_right_sibling: bool) -> str:
+        """
+        Helper for the __repr__ function.
+
+        Parameters
+        ----------
+        node : Optional[_DCNode]
+            The current node
+        pointer : str
+            The pointer string
+        padding : str
+            The padding string
+        has_right_sibling : bool
+            True if node has a right sibbling
+
+        Returns
+        -------
+        repr_string : str
+            The string for this node
+        """
+        if node is None:
+            return ""
+        padding_for_both = padding + ("   " if has_right_sibling else "   ")
+        pointer_right = "   "
+        pointer_left = "   " if node.right else "   "
+        repr_string = (
+            f"\n   {padding.replace('|', ' ')}// #region"
+            f"\n{padding}{pointer}{node}"
+            f"{self.__repr__help(node.left, pointer_left, padding_for_both, node.right is not None)}"
+            f"{self.__repr__help(node.right, pointer_right, padding_for_both, False)}"
+            f"\n   {padding.replace('|', ' ')}// #endregion"
+        )
+        return repr_string
+
+
+class _DCNode:
+    """
+    A node in a DCTree.
+    Each node contains a set of leaves and can have a left and a right child node.
+
+    Parameters
+    ----------
+    id : int
+        the id of the node
+    dist : float
+        the dc distance
+    leaves : List[int]
+        list of the ids of its child nodes
+    left : Optional[_DCNode]
+        the left child node (default: None)
+    right : Optional[_DCNode]
+        the right child node (default: None)
+    parent : Optional[_DCNode]
+        the parent node. If None, it will be set to itself (default: None)
+    root : Optional[_DCNode]
+        the root node (default: None)
+    """
+
+    def __init__(
+        self,
+        id: int,
+        dist: float,
+        leaves: List[int],
+        left: Optional[_DCNode] = None,
+        right: Optional[_DCNode] = None,
+        parent: Optional[_DCNode] = None,
+        root: Optional[_DCNode] = None
+    ):
+        self.id = id
+        self.dist = dist
+        self.leaves = leaves
+        self.left = left
+        self.right = right
+        if not parent:
+            self.parent = self
+        else:
+            self.parent = parent
+        self.root = root
+
+    def __repr__(self) -> str:
+        """
+        Return a string containing the id and dist of this node.
+
+        Returns
+        -------
+        to_str : str
+            The string
+        """
+        to_str = f"DCNode #{self.id} ({self.dist})"
+        return to_str
+
+    def __lt__(self, other_node: _DCNode) -> bool:
+        """
+        Less than method of the node. Compares the dist with respect to the other input node.
+
+        Parameters
+        ----------
+        other_node : _DCNode
+            the other node
+
+        Returns
+        -------
+        is_less : bool
+            True if dist is smaller than dist of the other node
+        """
+        less_than = self.dist < other_node.dist
+        return less_than
+
+
+def reachability_distances(X: np.ndarray, min_points: int = 5) -> np.ndarray:
+    """
+    Calculates the reachability distances between points using the min_points threshold in O(n^2) time.
+
+    Parameters
+    ----------
+    X : np.ndarray
+        the points
+    min_points : int, optional
+        min_points parameter (default: 5)
+
+    Returns
+    -------
+    reach_distances : np.ndarray
+        Array containing the pair-wise reachability distances
+
+    Raises
+    ------
+    Raises a ValueError if min_points is larger than the number of points.
+    """
+    if min_points > X.shape[0]:
+        raise ValueError(f"Min points ({min_points}) can't exceed the size of the dataset ({X.shape[0]})")
+    eucl_distances = squareform(pdist(X, metric="euclidean"))
+    if min_points > 1:
+        core_distances = np.partition(eucl_distances, min_points - 1, axis=1)[:, min_points - 1]
+        reach_distances = np.maximum(eucl_distances, np.maximum.outer(core_distances, core_distances))
+        np.fill_diagonal(reach_distances, 0)
+    else:
+        reach_distances = eucl_distances
+    return reach_distances
+
+
+def minimum_spanning_tree_prims(matrix: np.ndarray, use_less_memory: bool = False, min_points: int = None) -> np.ndarray:
+    """
+    Create a Minimum-spanning-tree of a given matrix using Prim's algorithm.
+    The tree will be build in O(n^2) time.
+
+    Parameters
+    ----------
+    matrix : np.ndarray
+        The input matrix
+    use_less_memory : bool
+        If true, the MST will not directly be build for the input matrix but the matrix will be used to construct a distance matrix first.
+        Saves quadratic RAM usage, but also needs double the time for computing (default: False)
+    min_points : int
+        Min_points for calculating the reachability distance. Only relevant if use_less_memory is True.
+        If min_points is None, the euclidean distance will be used (default: None)
+
+    Returns
+    -------
+    mst_edges : np.ndarray
+        The edges of the Minimum-spanning-tree, represented as a (n-1, 3) matrix with entries corresponding to (node_i, node_j, dist_ij)
+    """
+    assert (matrix.shape[0] == matrix.shape[1]) or use_less_memory, "Input matrix must be quadratic or use_less_memory must be True."
+    n = matrix.shape[0]
+    nodes_min_dist = np.full(n, np.inf)
+    parent = np.zeros(n, dtype=int)
+    not_in_mst = np.ones(n, dtype=bool)
+    mst_edges = np.empty((n - 1), dtype=([("i", int), ("j", int), ("dist", float)]))
+    # If min_points is not None, use reachability distance => calculate core distances of all points
+    if use_less_memory and min_points is not None:
+        core_distances = np.zeros(n)
+        for i in range(n):
+            eucl_distances = cdist([matrix[i]], matrix, metric="euclidean").ravel()
+            core_distances[i] = np.partition(eucl_distances, min_points - 1)[min_points - 1]
+    # Start building the MST
+    u = 0
+    nodes_min_dist[u] = 0
+    not_in_mst[u] = False
+    for i in range(n - 1):
+        if use_less_memory:
+            eucl_distances = cdist([matrix[u]], matrix, metric="euclidean").ravel()
+            if min_points is None:
+                dist_u = eucl_distances
+            else:
+                dist_u = np.maximum(eucl_distances, np.maximum(core_distances[u], core_distances))
+        else:
+            # If use_less_memory=False, 'matrix' is expected to be the precomputed distance matrix
+            dist_u = matrix[u]
+        update_mask = not_in_mst & (dist_u < nodes_min_dist)
+        # Update distances and parents
+        nodes_min_dist[update_mask] = dist_u[update_mask]
+        parent[update_mask] = u
+        # Select next closest unvisited node
+        masked_dists = np.where(not_in_mst, nodes_min_dist, np.inf)
+        u = np.argmin(masked_dists)
+        mst_edges[i] = (parent[u], u, nodes_min_dist[u])
+        not_in_mst[u] = False
+    return mst_edges
diff --git a/clustpy/utils/tests/test_dctree.py b/clustpy/utils/tests/test_dctree.py
new file mode 100644
index 0000000..1dea910
--- /dev/null
+++ b/clustpy/utils/tests/test_dctree.py
@@ -0,0 +1,162 @@
+from clustpy.utils import minimum_spanning_tree_prims, reachability_distances, DCTree
+import numpy as np
+
+def test_dc_tree_min():
+    X = np.array([
+        [0.0, 0.0],
+        [-3.0, 0.0],
+        [2.0, 0.0],
+        [3.0, 0.0],
+        [-7.0, 0.0],
+        [-8.0, 0.0]
+    ])
+    # Min_points = 3
+    dctree = DCTree(X, min_points = 3, use_less_memory = False)
+    root = dctree.root
+    assert root.dist == 5.0
+    assert root.left.leaves == [0, 2, 3, 1, 4]
+    assert root.right.leaves == [5]
+    assert root.right.dist == 0.0
+    assert root.right.right is None
+    assert root.right.left is None
+    assert root.left.dist == 4.0
+    assert root.left.left.leaves == [0, 2, 3, 1]
+    assert root.left.right.leaves == [4]
+    assert root.left.right.dist == 0.0
+    assert root.left.right.left is None
+    assert root.left.right.right is None
+    assert root.left.left.dist == 4.0
+    assert root.left.left.left.leaves == [0, 2, 3]
+    assert root.left.left.right.leaves == [1]
+    assert root.left.left.left.dist == 3.0
+    assert root.left.left.left.left.leaves == [0, 2]
+    assert root.left.left.left.right.leaves == [3]
+    assert root.left.left.left.left.dist == 3.0
+    assert root.left.left.left.left.left.leaves == [0]
+    assert root.left.left.left.left.right.leaves == [2]
+    all_distances = dctree.dc_distances()
+    expected_distances = np.array([[0., 4., 3., 3., 4., 5.],
+                 [4., 0., 4., 4., 4., 5.],
+                 [3., 4., 0., 3., 4., 5.],
+                 [3., 4., 3., 0., 4., 5.],
+                 [4., 4., 4., 4., 0., 5.],
+                 [5., 5., 5., 5., 5., 0.]
+    ])
+    assert np.array_equal(all_distances, expected_distances)
+    all_distances_reverse = dctree.dc_distances(np.arange(6), np.arange(5)[::-1], access_method="dc_dist")
+    expected_distances = expected_distances[:, np.arange(5)[::-1]]
+    assert np.array_equal(all_distances_reverse, expected_distances)
+    labels = dctree.get_k_center(3)
+    assert np.array_equal(labels, np.array([2, 2, 2, 2, 1, 0]))
+    eps = dctree.get_eps_for_k(3)
+    assert np.abs(eps - 4.0) < 1e-5
+    # Min_points = 2
+    dctree = DCTree(X, min_points = 2, use_less_memory = True)
+    root = dctree.root
+    assert root.dist == 4.0
+    assert root.left.leaves == [0, 2, 3, 1]
+    assert root.right.leaves == [4, 5]
+    assert root.right.dist == 1.0
+    assert root.right.left.leaves == [4]
+    assert root.right.right.leaves == [5]
+    assert root.right.left.dist == 0.0
+    assert root.right.left.left is None
+    assert root.right.left.right is None
+    assert root.right.right.dist == 0.0
+    assert root.right.right.left is None
+    assert root.right.right.right is None
+    assert root.left.dist == 3.0
+    assert root.left.left.leaves == [0, 2, 3]
+    assert root.left.right.leaves == [1]
+    assert root.left.right.dist == 0.0
+    assert root.left.right.left is None
+    assert root.left.right.right is None
+    assert root.left.left.dist == 2.0
+    assert root.left.left.left.leaves == [0]
+    assert root.left.left.right.leaves == [2, 3]
+    assert root.left.left.right.dist == 1.0
+    assert root.left.left.right.left.leaves == [2]
+    assert root.left.left.right.right.leaves == [3]
+    all_distances = dctree.dc_distances()
+    expected_distances = np.array([[0., 3., 2., 2., 4., 4.],
+                 [3., 0., 3., 3., 4., 4.],
+                 [2., 3., 0., 1., 4., 4.],
+                 [2., 3., 1., 0., 4., 4.],
+                 [4., 4., 4., 4., 0., 1.],
+                 [4., 4., 4., 4., 1., 0.]
+    ])
+    assert np.array_equal(all_distances, expected_distances)
+    all_distances_reverse = dctree.dc_distances(np.arange(6), np.arange(5)[::-1], access_method="dc_dist")
+    expected_distances = expected_distances[:, np.arange(5)[::-1]]
+    assert np.array_equal(all_distances_reverse, expected_distances)
+    labels = dctree.get_k_center(3)
+    assert np.array_equal(labels, np.array([2, 1, 2, 2, 0, 0]))
+    eps = dctree.get_eps_for_k(3)
+    assert np.abs(eps - 3.0) < 1e-5
+    # Test different accesses to DCTree
+    node = dctree[4]
+    assert node.id == 4
+    all_nodes = dctree[np.arange(11)]
+    for i, node in enumerate(all_nodes):
+        assert node.id == i
+    # Test repr
+    assert type(dctree.__repr__) is str
+    assert type(node.__repr__) is str
+
+
+def test_reachability_distances():
+    X = np.array([
+        [0.0, 0.0],
+        [-3.0, 0.0],
+        [2.0, 0.0],
+        [3.0, 0.0],
+        [-7.0, 0.0],
+        [-8.0, 0.]
+    ])
+    distances = reachability_distances(X, min_points = 2)
+    expected_distances = [[0., 3., 2., 3., 7., 8.],
+                 [3., 0., 5., 6., 4., 5.],
+                 [2., 5., 0., 1., 9., 10.],
+                 [3., 6., 1., 0., 10., 11. ],
+                 [7., 4., 9., 10, 0., 1.],
+                 [8., 5., 10., 11., 1., 0.]
+    ]
+    assert np.array_equal(distances, expected_distances)
+    distances = reachability_distances(X, min_points = 3)
+    expected_distances = [[0., 4., 3., 3., 7., 8.],
+                 [4., 0., 5., 6., 4., 5.],
+                 [3., 5., 0., 3., 9., 10.],
+                 [3., 6., 3., 0., 10., 11.],
+                 [7., 4., 9., 10., 0., 5.],
+                 [8., 5., 10., 11., 5., 0.]
+    ]
+    assert np.array_equal(distances, expected_distances)
+
+
+def test_minimum_spanning_tree_prims():
+    distance_matrix = np.array([
+            [0.0, 3.0, 2.0, 3.0, 7.0, 8.0],
+            [3.0, 0.0, 5.0, 6.0, 4.0, 5.0],
+            [2.0, 5.0, 0.0, 1.0, 9.0, 10.0],
+            [3.0, 6.0, 1.0, 0.0, 10.0, 11.0],
+            [7.0, 4.0, 9.0, 10.0, 0.0, 1.0],
+            [8.0, 5.0, 10.0, 11.0, 1.0, 0.0]
+        ])
+    mst = minimum_spanning_tree_prims(distance_matrix, use_less_memory=False, min_points=None)
+    expected_mst = np.array([(0, 2, 2.0), (2, 3, 1.0), (0, 1, 3.0), (1, 4, 4.0), (4, 5, 1.0)], dtype=([("i", int), ("j", int), ("dist", float)]))
+    # test with use_less_memory=True
+    X = np.array([
+            [0.0, 0.0],
+            [-3.0, 0.0],
+            [2.0, 0.0],
+            [3.0, 0.0],
+            [-7.0, 0.0],
+            [-8.0, 0.]
+        ])
+    mst = minimum_spanning_tree_prims(X, use_less_memory=True, min_points=None)
+    assert np.array_equal(mst, expected_mst)
+    # Test reachability distance
+    core_distances = [3, 4, 2, 3, 7]
+    mst = minimum_spanning_tree_prims(X, use_less_memory=True, min_points=3)
+    expected_mst = np.array([(0, 2, 3.0), (0, 3, 3.0), (0, 1, 4.0), (1, 4, 4.0), (1, 5, 5.0)], dtype=([("i", int), ("j", int), ("dist", float)]))
+    assert np.array_equal(mst, expected_mst)

From 0edf985e4caa7c0a40e30172d81e74fb12940fdd Mon Sep 17 00:00:00 2001
From: Collin Leiber <33159895+collinleiber@users.noreply.github.com>
Date: Thu, 11 Jun 2026 10:01:35 +0300
Subject: [PATCH 3/5] Add missing import to shade.py

---
 clustpy/deep/shade.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clustpy/deep/shade.py b/clustpy/deep/shade.py
index 5bf50bb..1b595ea 100644
--- a/clustpy/deep/shade.py
+++ b/clustpy/deep/shade.py
@@ -16,6 +16,7 @@
 import tqdm
 from typing import Callable, Optional, Tuple
 from sklearn.utils.validation import check_is_fitted
+from sklearn.base import ClusterMixin
 
 
 class SHADE(_AbstractDeepClusteringAlgo):

From 943af52cbc45a1a2d56080d1be22fe465bad7deb Mon Sep 17 00:00:00 2001
From: Collin Leiber <33159895+collinleiber@users.noreply.github.com>
Date: Thu, 11 Jun 2026 16:47:41 +0300
Subject: [PATCH 4/5] Update dctree.py

---
 clustpy/utils/dctree.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/clustpy/utils/dctree.py b/clustpy/utils/dctree.py
index 60f1751..2e87ca1 100644
--- a/clustpy/utils/dctree.py
+++ b/clustpy/utils/dctree.py
@@ -400,11 +400,9 @@ def __repr__(self) -> str:
             return ""
         pointer_right = "   "
         pointer_left = "   " if self.root.right else "   "
-        repr_string = (
-            f"{self.root}"
-            f"{self.__repr__help(self.root.left, pointer_left, '', self.root.right is not None)}"
-            f"{self.__repr__help(self.root.right, pointer_right, '', False)}"
-        )
+        repr_string = f"{self.root}" 
+        repr_string += f"{self.__repr__help(self.root.left, pointer_left, '', self.root.right is not None)}" 
+        repr_string += f"{self.__repr__help(self.root.right, pointer_right, '', False)}"
         return repr_string
 
     def __repr__help(self, node: Optional[_DCNode], pointer: str, padding: str, has_right_sibling: bool) -> str:
@@ -432,13 +430,11 @@ def __repr__help(self, node: Optional[_DCNode], pointer: str, padding: str, has_
         padding_for_both = padding + ("   " if has_right_sibling else "   ")
         pointer_right = "   "
         pointer_left = "   " if node.right else "   "
-        repr_string = (
-            f"\n   {padding.replace('|', ' ')}// #region"
-            f"\n{padding}{pointer}{node}"
-            f"{self.__repr__help(node.left, pointer_left, padding_for_both, node.right is not None)}"
-            f"{self.__repr__help(node.right, pointer_right, padding_for_both, False)}"
-            f"\n   {padding.replace('|', ' ')}// #endregion"
-        )
+        repr_string = f"\n   {padding.replace('|', ' ')}// #region" 
+        repr_string += f"\n{padding}{pointer}{node}" 
+        repr_string += f"{self.__repr__help(node.left, pointer_left, padding_for_both, node.right is not None)}" 
+        repr_string += f"{self.__repr__help(node.right, pointer_right, padding_for_both, False)}" 
+        repr_string += f"\n   {padding.replace('|', ' ')}// #endregion"
         return repr_string
 
 

From c32c1f7a4dc1a85df0ccd395f3e7e925fc3c8c97 Mon Sep 17 00:00:00 2001
From: Collin Leiber <33159895+collinleiber@users.noreply.github.com>
Date: Thu, 11 Jun 2026 16:43:10 +0200
Subject: [PATCH 5/5] Update test_dctree.py

---
 clustpy/utils/tests/test_dctree.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clustpy/utils/tests/test_dctree.py b/clustpy/utils/tests/test_dctree.py
index 1dea910..9d79944 100644
--- a/clustpy/utils/tests/test_dctree.py
+++ b/clustpy/utils/tests/test_dctree.py
@@ -100,8 +100,8 @@ def test_dc_tree_min():
     for i, node in enumerate(all_nodes):
         assert node.id == i
     # Test repr
-    assert type(dctree.__repr__) is str
-    assert type(node.__repr__) is str
+    assert type(dctree.__repr__()) is str
+    assert type(node.__repr__()) is str
 
 
 def test_reachability_distances():