From d1bb9e9a601fc9844112948649d10c265d0a7bd5 Mon Sep 17 00:00:00 2001
From: babayagaofficial <baba.yaga.df@gmail.com>
Date: Tue, 17 Feb 2026 16:26:39 +0100
Subject: [PATCH 1/7] feat: Option to run split and type commands with previous
 results from split and type on a subset of plasmids, using three different
 approaches. Outputs a comparison and distances to previous results. feat:
 Make some of the visualisation outputs optional via flags that turn them off.

---
 plasnet/alt_label_propagation.py | 104 ++++++++++++++++++++++++++++
 plasnet/base_graph.py            |   5 ++
 plasnet/clustering_dists.py      | 114 +++++++++++++++++++++++++++++++
 plasnet/community_graph.py       |  89 ++++++++++++++++++++++++
 plasnet/list_of_graphs.py        |  10 ++-
 plasnet/plasmid_graph.py         |  10 ++-
 plasnet/plasnet_main.py          |  93 ++++++++++++++++++++-----
 plasnet/subcommunity_graph.py    |   2 +-
 8 files changed, 405 insertions(+), 22 deletions(-)
 create mode 100644 plasnet/alt_label_propagation.py
 create mode 100644 plasnet/clustering_dists.py
diff --git a/plasnet/alt_label_propagation.py b/plasnet/alt_label_propagation.py
new file mode 100644
index 0000000..601ca48
--- /dev/null
+++ b/plasnet/alt_label_propagation.py
@@ -0,0 +1,104 @@
+
+from collections import Counter, defaultdict, deque
+
+import networkx as nx
+from networkx.utils import groups, not_implemented_for, py_random_state
+
+
+
+@py_random_state(2)
+def appendable_lpa_communities(G, initial_labels=None, seed=None):
+    """Returns communities in `G` as detected by asynchronous label
+    propagation.
+
+    The asynchronous label propagation algorithm is described in
+    [1]_. The algorithm is probabilistic and the found communities may
+    vary on different executions.
+
+    The algorithm proceeds as follows. After initializing each node with
+    a unique label, the algorithm repeatedly sets the label of a node to
+    be the label that appears most frequently among that nodes
+    neighbors. The algorithm halts when each node has the label that
+    appears most frequently among its neighbors. The algorithm is
+    asynchronous because each node is updated without waiting for
+    updates on the remaining nodes.
+
+    This generalized version of the algorithm in [1]_ accepts edge
+    weights.
+
+    Parameters
+    ----------
+    G : Graph
+
+    weight : string
+        The edge attribute representing the weight of an edge.
+        If None, each edge is assumed to have weight one. In this
+        algorithm, the weight of an edge is used in determining the
+        frequency with which a label appears among the neighbors of a
+        node: a higher weight means the label appears more often.
+
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    Returns
+    -------
+    communities : iterable
+        Iterable of communities given as sets of nodes.
+
+    Notes
+    -----
+    Edge weight attributes must be numerical.
+
+    References
+    ----------
+    .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near
+           linear time algorithm to detect community structures in large-scale
+           networks." Physical Review E 76.3 (2007): 036106.
+    """
+
+
+    if not initial_labels:
+        labels = {n: i for i, n in enumerate(G)}
+    else:
+        start = max(initial_labels.values())
+        H = G.copy()
+        H.remove_nodes_from(initial_labels.keys())
+        labels = {n: i+start for i,n in enumerate(H)}
+        labels.update(initial_labels)
+            
+    cont = True
+
+    while cont:
+        cont = False
+        nodes = list(G)
+        seed.shuffle(nodes)
+
+        for node in nodes:
+            if not G[node]:
+                continue
+
+            # Get label frequencies among adjacent nodes.
+            # Depending on the order they are processed in,
+            # some nodes will be in iteration t and others in t-1,
+            # making the algorithm asynchronous.
+            # initialising a Counter from an iterator of labels is
+            # faster for getting unweighted label frequencies
+            label_freq = Counter(map(labels.get, G[node]))
+
+            # Get the labels that appear with maximum frequency.
+            max_freq = max(label_freq.values())
+            best_labels = [
+                label for label, freq in label_freq.items() if freq == max_freq
+            ]
+
+            # If the node does not have one of the maximum frequency labels,
+            # randomly choose one of them and update the node's label.
+            # Continue the iteration as long as at least one node
+            # doesn't have a maximum frequency label.
+            if labels[node] not in best_labels:
+                labels[node] = seed.choice(best_labels)
+                cont = True
+
+
+    yield from groups(labels).values()
\ No newline at end of file
diff --git a/plasnet/base_graph.py b/plasnet/base_graph.py
index e160d0a..3d134e9 100644
--- a/plasnet/base_graph.py
+++ b/plasnet/base_graph.py
@@ -143,3 +143,8 @@ def load(cls: Type[BaseGraphType], filepath: Path) -> BaseGraphType:
     def write_classification(self, typing_fh: TextIO) -> None:
         for node in self.nodes:
             typing_fh.write(f"{node}\t{self.label}\n")
+
+    def compare_classification(self, prev_typing: dict, typing_fh: TextIO) -> None:
+        for node in self.nodes:
+            if node in prev_typing.keys():
+                typing_fh.write(f"{node}\t{self.label}\t{prev_typing[node]}\n")
diff --git a/plasnet/clustering_dists.py b/plasnet/clustering_dists.py
new file mode 100644
index 0000000..05a123d
--- /dev/null
+++ b/plasnet/clustering_dists.py
@@ -0,0 +1,114 @@
+import pandas as pd
+import numpy as np
+
+def read_in_clusters(compare_tsv):
+    pling_df = pd.read_csv(compare_tsv, sep="\t")
+    plasmids = list(pling_df["plasmid"].values)
+    clusters_pling = {i:set(pling_df[pling_df["type"]==el]["plasmid"].values) for i,el in enumerate(list(set(pling_df["type"])))}
+    clusters_pling_old = {i:set(pling_df[pling_df["previous_type"]==el]["plasmid"].values) for i,el in enumerate(list(set(pling_df["previous_type"])))}
+    return clusters_pling, clusters_pling_old, plasmids
+
+def make_contingency_matrix(clusters_1, clusters_2): #clusters_1 and clusters_2 are dictionaries of clusters, k_1 and k_2 the lengths of the respective dictionaries
+    k_1 = len(clusters_1)
+    k_2 = len(clusters_2)
+    contingency = np.zeros((k_1,k_2))
+    for i in range(k_1):
+        for j in range(k_2):
+            contingency[i][j] = len(clusters_1[i].intersection(clusters_2[j]))
+    return contingency, k_1, k_2
+
+def split_join(contingency, k_1, k_2, n): #clusters_1 and clusters_2 are dictionaries of clusters, n is the total number of data points (plasmids)
+    dist = 2*n - sum([max(contingency[i]) for i in range(k_1)]) - sum([max(contingency[:,j]) for j in range(k_2)])
+    return int(dist)
+
+def rand_index(contingency):
+    contingency = np.asarray(contingency)
+
+    def comb2(x):
+        return x * (x - 1) / 2.0
+
+    n = contingency.sum()
+    if n <= 1:
+        return 1.0  # degenerate case
+
+    # True positives
+    tp = np.sum(comb2(contingency))
+
+    # Row and column sums
+    row_sums = contingency.sum(axis=1)
+    col_sums = contingency.sum(axis=0)
+
+    sum_rows = np.sum(comb2(row_sums))
+    sum_cols = np.sum(comb2(col_sums))
+
+    fp = sum_cols - tp
+    fn = sum_rows - tp
+
+    total_pairs = comb2(n)
+    tn = total_pairs - tp - fp - fn
+
+    ri = (tp + tn) / total_pairs
+    return ri
+
+def adjusted_rand_index(contingency):
+    # Helper function: n choose 2
+    def comb2(x):
+        return x * (x - 1) / 2.0
+
+    n = contingency.sum()
+    if n <= 1:
+        return 0.0
+
+    # Sum over all pairs in cells
+    sum_comb_cells = np.sum(comb2(contingency))
+
+    # Row and column sums
+    row_sums = contingency.sum(axis=1)
+    col_sums = contingency.sum(axis=0)
+
+    sum_comb_rows = np.sum(comb2(row_sums))
+    sum_comb_cols = np.sum(comb2(col_sums))
+
+    total_pairs = comb2(n)
+
+    expected_index = (sum_comb_rows * sum_comb_cols) / total_pairs
+    max_index = 0.5 * (sum_comb_rows + sum_comb_cols)
+
+    denominator = max_index - expected_index
+    if denominator == 0:
+        return 0.0
+
+    ari = (sum_comb_cells - expected_index) / denominator
+    return ari
+
+def mutual_information(contingency):
+    n = contingency.sum()
+    if n == 0:
+        return 0.0
+
+    row_sums = contingency.sum(axis=1)
+    col_sums = contingency.sum(axis=0)
+
+    # Only consider nonzero entries
+    nz = contingency > 0
+    nij = contingency[nz]
+
+    # Corresponding row and column sums
+    i_idx, j_idx = np.nonzero(nz)
+    ai = row_sums[i_idx]
+    bj = col_sums[j_idx]
+
+    # Compute MI
+    mi = np.sum(
+        (nij / n) * np.log((nij * n) / (ai * bj))
+    )
+
+    return mi
+
+def all_clustering_dists(contingency,k_1,k_2,n):
+    dists = {}
+    dists["rand index"]=rand_index(contingency)
+    dists["adjusted rand index"]=adjusted_rand_index(contingency)
+    dists["mutual information"]=mutual_information(contingency)
+    dists["split join"]=split_join(contingency,k_1,k_2,n)
+    return dists
diff --git a/plasnet/community_graph.py b/plasnet/community_graph.py
index dc7729d..6fb42fe 100644
--- a/plasnet/community_graph.py
+++ b/plasnet/community_graph.py
@@ -2,6 +2,8 @@
 
 from typing import Optional
 
+from plasnet.alt_label_propagation import appendable_lpa_communities
+
 import networkx as nx
 
 from plasnet.ColorPicker import ColorPicker
@@ -96,6 +98,93 @@ def split_graph_into_subcommunities(
                 self._node_to_colour[node] = colour
 
         return Subcommunities(subcommunities)
+    
+    def split_graph_given_labels(
+            self, small_subcommunity_size_threshold: int, typings: list[dict]
+    ) -> Subcommunities:
+        
+        old_plasmids = [plasmid for typing in typings for plasmid in typing.keys() if plasmid in self.nodes]
+        new_plasmids = [plasmid for plasmid in self.nodes if plasmid not in old_plasmids]
+        new_subcommunities_nodes: list[set[str]] = list(
+            nx.community.asyn_lpa_communities(G=self.subgraph(new_plasmids), seed=42)
+        )
+
+        label = 0
+        map = {}
+        for typing in typings:
+            for i, subcomm in enumerate(typing.values()):
+                map[subcomm] = label + i
+            label = label + len(typing.keys())
+        initial_labels = {n: map[typing[n]] for n in old_plasmids}
+        for subcomm in new_subcommunities_nodes:
+            for plasmid in list(subcomm):
+                initial_labels[plasmid] = label
+            label = label + 1
+        subcommunities_nodes: list[set[str]] = list(
+            appendable_lpa_communities(G=self, initial_labels=initial_labels, seed=42)
+        )
+        subcommunities_nodes = self._fix_small_subcommunities(
+            subcommunities_nodes, small_subcommunity_size_threshold
+        )
+
+        subcommunities = []
+        for subcommunity_index, subcommunity_nodes in enumerate(subcommunities_nodes):
+            colour = ColorPicker.get_color_given_index(subcommunity_index)
+
+            subcommunity = SubcommunityGraph(
+                self.subgraph(subcommunity_nodes),
+                self._hub_connectivity_threshold,
+                self._edge_density,
+                label=f"{self.label}_subcommunity_{subcommunity_index}",
+                colour=colour,
+            )
+            subcommunities.append(subcommunity)
+
+            for node in subcommunity_nodes:
+                self._node_to_colour[node] = colour
+
+        return Subcommunities(subcommunities)
+    
+    def nearest_neighbour(
+        self, typing, new_plasmids
+    ) -> Subcommunities:
+        subcommunity_names = set(typing["type"].to_list()) 
+        subcommunity_labels = {subcomm:[plasmid for plasmid in typing[typing["type"]==subcomm]["plasmid"].values] for subcomm in list(subcommunity_names) if subcomm.split("_")[1]==self.label.split("_")[1]} #select only those that are in this community
+        max_label = len(subcommunity_labels.keys())
+        
+        for plasmid in new_plasmids:
+            if plasmid in self.nodes:
+                neighbours = [n for n in self[plasmid] if n not in new_plasmids]
+                if len(neighbours)==0:
+                    subcommunity_labels[f"community_{self.label}_subcommunity_{max_label}"] = [plasmid]
+                    max_label = max_label + 1
+                else:
+                    neighbours = sorted(neighbours, key=lambda n: self.edges[n,plasmid][DistanceTags.SplitDistanceTag.value])
+                    min_dist = self.edges[neighbours[0],plasmid][DistanceTags.SplitDistanceTag.value]
+                    nearest = [neighbour for neighbour in neighbours if self.edges[neighbour,plasmid][DistanceTags.SplitDistanceTag.value]==min_dist]
+                    nearest = sorted(nearest, key=lambda n: len(typing[typing["type"]==typing[typing["plasmid"]==n]["type"].values[0]]))
+                    nn = nearest[-1] #select nearest neighbour with largest subcommunity size
+                    subcommunity_labels[typing[typing["plasmid"]==nn]["type"].values[0]].append(plasmid)
+                
+
+        subcommunities = []
+        for subcommunity_label in subcommunity_labels.keys():
+            subcommunity_index = int(subcommunity_label.split("_")[-1])
+            colour = ColorPicker.get_color_given_index(subcommunity_index)
+
+            subcommunity = SubcommunityGraph(
+                self.subgraph(subcommunity_labels[subcommunity_label]),
+                self._hub_connectivity_threshold,
+                self._edge_density,
+                label=subcommunity_label, #reuse old labels here!
+                colour=colour,
+            )
+            subcommunities.append(subcommunity)
+
+            for node in subcommunity_labels[subcommunity_label]:
+                self._node_to_colour[node] = colour
+
+        return Subcommunities(subcommunities)
 
     def _get_libs_relative_path(self) -> str:
         return ".."
diff --git a/plasnet/list_of_graphs.py b/plasnet/list_of_graphs.py
index c2c4f6c..99751c0 100644
--- a/plasnet/list_of_graphs.py
+++ b/plasnet/list_of_graphs.py
@@ -1,6 +1,6 @@
 import pickle
 from pathlib import Path
-from typing import Generator, cast
+from typing import Generator, cast, Optional
 
 from plasnet.base_graph import BaseGraphType
 
@@ -27,11 +27,15 @@ def save_graph_as_text(self, filepath: Path) -> None:
             for graph_as_text in self._get_each_graph_as_list_of_nodes_in_text_format():
                 print(graph_as_text, file=fh)
 
-    def save_classification(self, filepath: Path, header: str) -> None:
+    def save_classification(self, filepath: Path, header: str, prev_typing: Optional[dict]=None) -> None:
         with open(filepath, "w") as fh:
             print(header, file=fh)
             for subgraph in self:
-                subgraph.write_classification(fh)
+                if prev_typing:
+                    subgraph.compare_classification(prev_typing, fh)
+                else:
+                    subgraph.write_classification(fh)
+                
 
     def get_graphs_sorted_by_size(self) -> "ListOfGraphs[BaseGraphType]":
         return ListOfGraphs(sorted(self, key=lambda graph: graph.number_of_nodes(), reverse=True))
diff --git a/plasnet/plasmid_graph.py b/plasnet/plasmid_graph.py
index fc38445..210125b 100644
--- a/plasnet/plasmid_graph.py
+++ b/plasnet/plasmid_graph.py
@@ -26,6 +26,7 @@ def build(
         distance_filepath: Path,
         distance_threshold: float,
         plasmids_metadata: list[str],
+        existing_graphs: Optional[tuple]=None
     ) -> "PlasmidGraph":
         """
         Creates a plasmid graph from plasmid and distance files.
@@ -62,7 +63,7 @@ def build(
         """  # noqa: E501
         plasmids = pd.read_csv(plasmids_filepath)
 
-        distance_df = pd.read_csv(distance_filepath, sep="\t")
+        distance_df = pd.read_csv(distance_filepath, dtype={"plasmid_1":str, "plasmid_2":str}, sep="\t")
         distance_df[DistanceTags.SplitDistanceTag.value] = distance_df["distance"]
 
         # apply distance threshold
@@ -75,6 +76,7 @@ def build(
             DistanceTags.SplitDistanceTag.value
         ].round(2)
 
+
         # create graph
         graph = nx.from_pandas_edgelist(
             distance_df,
@@ -84,6 +86,11 @@ def build(
             create_using=PlasmidGraph,
         )
 
+        if existing_graphs:
+            for existing_graph in existing_graphs:
+                graph = nx.compose(graph,existing_graph)
+
+
         # add all nodes to the graph, including those that have no edges
         # possibly add metadata if they were provided
         plasmid_metadata_is_too_short = len(plasmids_metadata) < len(plasmids["plasmid"])
@@ -98,6 +105,7 @@ def build(
         graph.add_nodes_from(nodes_and_metadata)
 
         return PlasmidGraph(graph)
+    
 
     def split_graph_into_communities(
         self, bh_connectivity: int, bh_neighbours_edge_density: float
diff --git a/plasnet/plasnet_main.py b/plasnet/plasnet_main.py
index e464a43..d580278 100644
--- a/plasnet/plasnet_main.py
+++ b/plasnet/plasnet_main.py
@@ -14,6 +14,7 @@
 from plasnet.sample_graphs import SampleGraphs
 from plasnet.subcommunities import Subcommunities
 from plasnet.utils import PathlibPath, distance_df_to_dict
+from plasnet.clustering_dists import read_in_clusters, make_contingency_matrix, all_clustering_dists
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 
@@ -97,6 +98,13 @@ def cli() -> None:
 @click.option(
     "--plasmids-metadata", type=PathlibPath(exists=True), help="Plasmids metadata text file."
 )
+@click.option(
+    "--graph-pickle", multiple=True, help="Existing plasmid graph to append new plasmids to."
+)
+@click.option(
+    "--prev_typing", multiple=True, help="Previous community typing, if appending to an existing plasmid graph."
+)
+@click.option("--no-community-vis", is_flag=True)
 def split(
     plasmids: Path,
     distances: Path,
@@ -107,13 +115,22 @@ def split(
     output_plasmid_graph: bool,
     output_type: Optional[str],
     plasmids_metadata: Optional[Path],
+    graph_pickle: Optional[tuple],
+    prev_typing: Optional[tuple],
+    no_community_vis: bool
 ) -> None:
     visualisations_dir = output_dir / "visualisations"
     logging.info(f"Creating plasmid graph from {plasmids} and {distances}")
     metadata = []
+
     if plasmids_metadata:
         metadata = plasmids_metadata.read_text().splitlines()
-    plasmid_graph = PlasmidGraph.build(plasmids, distances, distance_threshold, metadata)
+    if graph_pickle:
+        existing_graphs = [cast(PlasmidGraph, PlasmidGraph.load(graph)) for graph in graph_pickle]
+        plasmid_graph = PlasmidGraph.build(plasmids, distances, distance_threshold, metadata, existing_graphs)
+        typings = [pd.read_csv(prev, sep="\t", index_col=0).to_dict()["community"] for prev in prev_typing]
+    else:
+        plasmid_graph = PlasmidGraph.build(plasmids, distances, distance_threshold, metadata)
 
     if output_plasmid_graph:
         logging.info("Producing full plasmid graph visualisation")
@@ -126,10 +143,11 @@ def split(
         bh_connectivity, bh_neighbours_edge_density
     )
 
-    logging.info("Producing communities visualisation")
-    OutputProducer.produce_communities_visualisation(
-        communities, visualisations_dir / "communities", output_type
-    )
+    if not no_community_vis:
+        logging.info("Producing communities visualisation")
+        OutputProducer.produce_communities_visualisation(
+            communities, visualisations_dir / "communities", output_type
+        )
 
     logging.info("Serialising objects")
     objects_dir = output_dir / "objects"
@@ -138,6 +156,9 @@ def split(
     communities.save(objects_dir / "communities.pkl")
     communities.save_graph_as_text(objects_dir / "communities.txt")
     communities.save_classification(objects_dir / "communities.tsv", "plasmid\tcommunity")
+    if prev_typing:
+        for i, typing in enumerate(typings):
+            communities.save_classification(objects_dir / f"compare_communities_{i}.tsv", "plasmid\tcommunity\tprevious_community", prev_typing=typing)
 
     logging.info("All done!")
 
@@ -191,6 +212,15 @@ def split(
     default="html",
     help="Whether to output networks as html visualisations, cytoscape formatted json, or both.",
 )
+@click.option(
+    "--prev_typing", multiple=True, help="Previous subcommunity typing, if it exists."
+)
+@click.option("--reclustering_method", type=click.Choice(["unbiased", "biased", "nearest_neighbour"]), default="unbiased", help=
+    "unbiased: If including a previous subcommunity typing, all previous and new genomes will be reclustered from scratch, ignoring previous typing.\n"
+    "biased: The asynchronous label propagation will start with the previous typing as initial labels.\n"
+    "nearest_neighbour: Does not cluster the new genomes, rather, assigns type based on the closest neighbour of the previous typing."
+)
+@click.option("--no-vis", is_flag=True)
 def type(
     communities_pickle: Path,
     distances: Path,
@@ -198,6 +228,9 @@ def type(
     distance_threshold: float,
     small_subcommunity_size_threshold: int,
     output_type: Optional[str],
+    prev_typing: Optional[tuple],
+    reclustering_method: Optional[str],
+    no_vis: bool
 ) -> None:
     logging.info(f"Loading communities from {communities_pickle}")
     communities = cast(Communities, Communities.load(communities_pickle))
@@ -218,26 +251,39 @@ def type(
     communities.filter_by_distance(distance_threshold)
 
     logging.info("Typing communities (i.e. splitting them into subcommunities)")
+
+    if prev_typing and reclustering_method=="nearest_neighbour":
+        typing = pd.read_csv(prev_typing[0], sep="\t") #nearest neighbour does not support merging graphs
+    elif prev_typing:
+        typings = [pd.read_csv(prev, sep="\t", index_col=0).to_dict()["type"] for prev in prev_typing]
+
     all_subcommunities = Subcommunities()
     all_hub_plasmids = set()
     for community in communities:
         hub_plasmids = community.remove_hub_plasmids()
         all_hub_plasmids.update(hub_plasmids)
-        subcommunities = community.split_graph_into_subcommunities(
-            small_subcommunity_size_threshold
-        )
+        if prev_typing and reclustering_method=="biased":
+            subcommunities = community.split_graph_given_labels(small_subcommunity_size_threshold, typings)
+        elif prev_typing and reclustering_method=="nearest_neighbour":
+            new_plasmids = [plasmid for plasmid in community.nodes if plasmid not in typing["plasmid"].to_list()]
+            subcommunities = community.nearest_neighbour(typing, new_plasmids)
+        else:
+            subcommunities = community.split_graph_into_subcommunities(
+                small_subcommunity_size_threshold
+            )
         all_subcommunities.extend(subcommunities)
 
-    logging.info("Producing communities visualisations")
-    original_communities.recolour_nodes(communities)
-    OutputProducer.produce_communities_visualisation(
-        original_communities, output_dir / "visualisations/communities", output_type
-    )
+    if not no_vis:
+        logging.info("Producing communities visualisations")
+        original_communities.recolour_nodes(communities)
+        OutputProducer.produce_communities_visualisation(
+            original_communities, output_dir / "visualisations/communities", output_type
+        )
 
-    logging.info("Producing subcommunities visualisations")
-    OutputProducer.produce_subcommunities_visualisation(
-        all_subcommunities, output_dir / "visualisations/subcommunities", output_type
-    )
+        logging.info("Producing subcommunities visualisations")
+        OutputProducer.produce_subcommunities_visualisation(
+            all_subcommunities, output_dir / "visualisations/subcommunities", output_type
+        )
 
     logging.info("Serialising objects")
     objects_dir = output_dir / "objects"
@@ -250,6 +296,19 @@ def type(
         for plasmid in all_hub_plasmids:
             print(plasmid, file=hub_plasmids_fh)
 
+    if prev_typing and reclustering_method!="nearest_neighbour":
+        for i, typing in enumerate(typings):
+            all_subcommunities.save_classification(objects_dir / f"compare_typing_{i}.tsv", "plasmid\ttype\tprevious_type",prev_typing=typing)
+            
+            clusters_pling, clusters_pling_old, plasmids = read_in_clusters(objects_dir / f"compare_typing_{i}.tsv")
+            n = len(plasmids)
+            contingency, k_1, k_2 = make_contingency_matrix(clusters_pling, clusters_pling_old)
+            clust_dists = all_clustering_dists(contingency, k_1, k_2, n)
+            with open(objects_dir / f"clustering_dists_{i}.tsv", "w") as f:
+                f.write("distance_type\tdistance\n")
+                for key in clust_dists.keys():
+                    f.write(f"{key}\t{clust_dists[key]}\n")
+
     logging.info("All done!")
 
 
diff --git a/plasnet/subcommunity_graph.py b/plasnet/subcommunity_graph.py
index 687d6ee..c775a3e 100644
--- a/plasnet/subcommunity_graph.py
+++ b/plasnet/subcommunity_graph.py
@@ -25,4 +25,4 @@ def _get_samples_selectors_HTML(self) -> str:
         return ""
 
     def _get_node_color(self, node: str) -> str:
-        return self._colour
+        return self._colour
\ No newline at end of file

From 687c25d497bf7ac44b0c6906101dc6bb46a1b7c6 Mon Sep 17 00:00:00 2001
From: babayagaofficial <baba.yaga.df@gmail.com>
Date: Tue, 17 Feb 2026 16:46:15 +0100
Subject: [PATCH 2/7] trying to fix pre-commit

---
 .github/workflows/ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 02049be..aac651f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -20,7 +20,7 @@ jobs:
       uses: pre-commit/action@v3.0.0
 
     - name: Run pre-commit CI
-      uses: pre-commit-ci/lite-action@v1.0.1
+      uses: pre-commit-ci/lite-action@v1
       if: always()
 
     - name: Install and configure Poetry

From e6e142ab4891546f6c73c208a9c42bd3d32a1157 Mon Sep 17 00:00:00 2001
From: babayagaofficial <baba.yaga.df@gmail.com>
Date: Tue, 17 Feb 2026 18:35:49 +0100
Subject: [PATCH 3/7] change permissions and pray this fixes pre-commit
 workflow

---
 .github/workflows/ci.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index aac651f..89f2156 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -2,6 +2,11 @@ on:
   pull_request:
     branches: [main]
 
+permissions:
+  contents: write
+  pull-requests: write
+  actions: write
+
 jobs:
   main:
     runs-on: ${{ matrix.os }}
@@ -20,7 +25,7 @@ jobs:
       uses: pre-commit/action@v3.0.0
 
     - name: Run pre-commit CI
-      uses: pre-commit-ci/lite-action@v1
+      uses: pre-commit-ci/lite-action@v1.0.1
       if: always()
 
     - name: Install and configure Poetry

From beb2f436ece27c0f011080f2a1c36224071982f5 Mon Sep 17 00:00:00 2001
From: babayagaofficial <baba.yaga.df@gmail.com>
Date: Tue, 17 Feb 2026 18:49:34 +0100
Subject: [PATCH 4/7] changed by pre-commit

---
 .isort.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.isort.cfg b/.isort.cfg
index 9645ea6..130275a 100644
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -1,2 +1,2 @@
 [settings]
-known_third_party = click,networkx,pandas
+known_third_party = click,networkx,numpy,pandas

From 3ed25ffe559a8d151ea08f29dbbcbe1137df7ce1 Mon Sep 17 00:00:00 2001
From: babayagaofficial <baba.yaga.df@gmail.com>
Date: Tue, 17 Feb 2026 19:13:31 +0100
Subject: [PATCH 5/7] and more pre-commit

---
 plasnet/alt_label_propagation.py | 20 +++------
 plasnet/clustering_dists.py      | 49 +++++++++++++-------
 plasnet/community_graph.py       | 68 ++++++++++++++++++----------
 plasnet/list_of_graphs.py        |  7 +--
 plasnet/plasnet_main.py          | 76 ++++++++++++++++++++++----------
 5 files changed, 141 insertions(+), 79 deletions(-)

diff --git a/plasnet/alt_label_propagation.py b/plasnet/alt_label_propagation.py
index 601ca48..7bc3867 100644
--- a/plasnet/alt_label_propagation.py
+++ b/plasnet/alt_label_propagation.py
@@ -1,12 +1,8 @@
+from collections import Counter
 
-from collections import Counter, defaultdict, deque
+from networkx.utils import groups
 
-import networkx as nx
-from networkx.utils import groups, not_implemented_for, py_random_state
 
-
-
-@py_random_state(2)
 def appendable_lpa_communities(G, initial_labels=None, seed=None):
     """Returns communities in `G` as detected by asynchronous label
     propagation.
@@ -57,16 +53,15 @@ def appendable_lpa_communities(G, initial_labels=None, seed=None):
            networks." Physical Review E 76.3 (2007): 036106.
     """
 
-
     if not initial_labels:
         labels = {n: i for i, n in enumerate(G)}
     else:
         start = max(initial_labels.values())
         H = G.copy()
         H.remove_nodes_from(initial_labels.keys())
-        labels = {n: i+start for i,n in enumerate(H)}
+        labels = {n: i + start for i, n in enumerate(H)}
         labels.update(initial_labels)
-            
+
     cont = True
 
     while cont:
@@ -88,9 +83,7 @@ def appendable_lpa_communities(G, initial_labels=None, seed=None):
 
             # Get the labels that appear with maximum frequency.
             max_freq = max(label_freq.values())
-            best_labels = [
-                label for label, freq in label_freq.items() if freq == max_freq
-            ]
+            best_labels = [label for label, freq in label_freq.items() if freq == max_freq]
 
             # If the node does not have one of the maximum frequency labels,
             # randomly choose one of them and update the node's label.
@@ -100,5 +93,4 @@ def appendable_lpa_communities(G, initial_labels=None, seed=None):
                 labels[node] = seed.choice(best_labels)
                 cont = True
 
-
-    yield from groups(labels).values()
\ No newline at end of file
+    yield from groups(labels).values()
diff --git a/plasnet/clustering_dists.py b/plasnet/clustering_dists.py
index 05a123d..5de5d23 100644
--- a/plasnet/clustering_dists.py
+++ b/plasnet/clustering_dists.py
@@ -1,26 +1,44 @@
-import pandas as pd
 import numpy as np
+import pandas as pd
+
 
 def read_in_clusters(compare_tsv):
     pling_df = pd.read_csv(compare_tsv, sep="\t")
     plasmids = list(pling_df["plasmid"].values)
-    clusters_pling = {i:set(pling_df[pling_df["type"]==el]["plasmid"].values) for i,el in enumerate(list(set(pling_df["type"])))}
-    clusters_pling_old = {i:set(pling_df[pling_df["previous_type"]==el]["plasmid"].values) for i,el in enumerate(list(set(pling_df["previous_type"])))}
+    clusters_pling = {
+        i: set(pling_df[pling_df["type"] == el]["plasmid"].values)
+        for i, el in enumerate(list(set(pling_df["type"])))
+    }
+    clusters_pling_old = {
+        i: set(pling_df[pling_df["previous_type"] == el]["plasmid"].values)
+        for i, el in enumerate(list(set(pling_df["previous_type"])))
+    }
     return clusters_pling, clusters_pling_old, plasmids
 
-def make_contingency_matrix(clusters_1, clusters_2): #clusters_1 and clusters_2 are dictionaries of clusters, k_1 and k_2 the lengths of the respective dictionaries
+
+def make_contingency_matrix(
+    clusters_1, clusters_2
+):  # clusters_1 and clusters_2 are dictionaries of clusters,
+    # k_1 and k_2 the lengths of the respective dictionaries
     k_1 = len(clusters_1)
     k_2 = len(clusters_2)
-    contingency = np.zeros((k_1,k_2))
+    contingency = np.zeros((k_1, k_2))
     for i in range(k_1):
         for j in range(k_2):
             contingency[i][j] = len(clusters_1[i].intersection(clusters_2[j]))
     return contingency, k_1, k_2
 
-def split_join(contingency, k_1, k_2, n): #clusters_1 and clusters_2 are dictionaries of clusters, n is the total number of data points (plasmids)
-    dist = 2*n - sum([max(contingency[i]) for i in range(k_1)]) - sum([max(contingency[:,j]) for j in range(k_2)])
+
+def split_join(contingency, k_1, k_2, n):  # clusters_1 and clusters_2 are dictionaries of clusters,
+    # n is the total number of data points (plasmids)
+    dist = (
+        2 * n
+        - sum([max(contingency[i]) for i in range(k_1)])
+        - sum([max(contingency[:, j]) for j in range(k_2)])
+    )
     return int(dist)
 
+
 def rand_index(contingency):
     contingency = np.asarray(contingency)
 
@@ -50,6 +68,7 @@ def comb2(x):
     ri = (tp + tn) / total_pairs
     return ri
 
+
 def adjusted_rand_index(contingency):
     # Helper function: n choose 2
     def comb2(x):
@@ -81,6 +100,7 @@ def comb2(x):
     ari = (sum_comb_cells - expected_index) / denominator
     return ari
 
+
 def mutual_information(contingency):
     n = contingency.sum()
     if n == 0:
@@ -99,16 +119,15 @@ def mutual_information(contingency):
     bj = col_sums[j_idx]
 
     # Compute MI
-    mi = np.sum(
-        (nij / n) * np.log((nij * n) / (ai * bj))
-    )
+    mi = np.sum((nij / n) * np.log((nij * n) / (ai * bj)))
 
     return mi
 
-def all_clustering_dists(contingency,k_1,k_2,n):
+
+def all_clustering_dists(contingency, k_1, k_2, n):
     dists = {}
-    dists["rand index"]=rand_index(contingency)
-    dists["adjusted rand index"]=adjusted_rand_index(contingency)
-    dists["mutual information"]=mutual_information(contingency)
-    dists["split join"]=split_join(contingency,k_1,k_2,n)
+    dists["rand index"] = rand_index(contingency)
+    dists["adjusted rand index"] = adjusted_rand_index(contingency)
+    dists["mutual information"] = mutual_information(contingency)
+    dists["split join"] = split_join(contingency, k_1, k_2, n)
     return dists
diff --git a/plasnet/community_graph.py b/plasnet/community_graph.py
index 6fb42fe..c4aeba0 100644
--- a/plasnet/community_graph.py
+++ b/plasnet/community_graph.py
@@ -2,10 +2,9 @@
 
 from typing import Optional
 
-from plasnet.alt_label_propagation import appendable_lpa_communities
-
 import networkx as nx
 
+from plasnet.alt_label_propagation import appendable_lpa_communities
 from plasnet.ColorPicker import ColorPicker
 from plasnet.hub_graph import HubGraph
 from plasnet.subcommunities import Subcommunities
@@ -98,12 +97,13 @@ def split_graph_into_subcommunities(
                 self._node_to_colour[node] = colour
 
         return Subcommunities(subcommunities)
-    
+
     def split_graph_given_labels(
-            self, small_subcommunity_size_threshold: int, typings: list[dict]
+        self, small_subcommunity_size_threshold: int, typings: list[dict]
     ) -> Subcommunities:
-        
-        old_plasmids = [plasmid for typing in typings for plasmid in typing.keys() if plasmid in self.nodes]
+        old_plasmids = [
+            plasmid for typing in typings for plasmid in typing.keys() if plasmid in self.nodes
+        ]
         new_plasmids = [plasmid for plasmid in self.nodes if plasmid not in old_plasmids]
         new_subcommunities_nodes: list[set[str]] = list(
             nx.community.asyn_lpa_communities(G=self.subgraph(new_plasmids), seed=42)
@@ -144,28 +144,50 @@ def split_graph_given_labels(
                 self._node_to_colour[node] = colour
 
         return Subcommunities(subcommunities)
-    
-    def nearest_neighbour(
-        self, typing, new_plasmids
-    ) -> Subcommunities:
-        subcommunity_names = set(typing["type"].to_list()) 
-        subcommunity_labels = {subcomm:[plasmid for plasmid in typing[typing["type"]==subcomm]["plasmid"].values] for subcomm in list(subcommunity_names) if subcomm.split("_")[1]==self.label.split("_")[1]} #select only those that are in this community
+
+    def nearest_neighbour(self, typing, new_plasmids) -> Subcommunities:
+        subcommunity_names = set(typing["type"].to_list())
+        subcommunity_labels = {
+            subcomm: [plasmid for plasmid in typing[typing["type"] == subcomm]["plasmid"].values]
+            for subcomm in list(subcommunity_names)
+            if subcomm.split("_")[1] == self.label.split("_")[1]
+        }  # select only those that are in this community
         max_label = len(subcommunity_labels.keys())
-        
+
         for plasmid in new_plasmids:
             if plasmid in self.nodes:
                 neighbours = [n for n in self[plasmid] if n not in new_plasmids]
-                if len(neighbours)==0:
-                    subcommunity_labels[f"community_{self.label}_subcommunity_{max_label}"] = [plasmid]
+                if len(neighbours) == 0:
+                    subcommunity_labels[f"community_{self.label}_subcommunity_{max_label}"] = [
+                        plasmid
+                    ]
                     max_label = max_label + 1
                 else:
-                    neighbours = sorted(neighbours, key=lambda n: self.edges[n,plasmid][DistanceTags.SplitDistanceTag.value])
-                    min_dist = self.edges[neighbours[0],plasmid][DistanceTags.SplitDistanceTag.value]
-                    nearest = [neighbour for neighbour in neighbours if self.edges[neighbour,plasmid][DistanceTags.SplitDistanceTag.value]==min_dist]
-                    nearest = sorted(nearest, key=lambda n: len(typing[typing["type"]==typing[typing["plasmid"]==n]["type"].values[0]]))
-                    nn = nearest[-1] #select nearest neighbour with largest subcommunity size
-                    subcommunity_labels[typing[typing["plasmid"]==nn]["type"].values[0]].append(plasmid)
-                
+                    neighbours = sorted(
+                        neighbours,
+                        key=lambda n: self.edges[n, plasmid][DistanceTags.SplitDistanceTag.value],
+                    )
+                    min_dist = self.edges[neighbours[0], plasmid][
+                        DistanceTags.SplitDistanceTag.value
+                    ]
+                    nearest = [
+                        neighbour
+                        for neighbour in neighbours
+                        if self.edges[neighbour, plasmid][DistanceTags.SplitDistanceTag.value]
+                        == min_dist
+                    ]
+                    nearest = sorted(
+                        nearest,
+                        key=lambda n: len(
+                            typing[
+                                typing["type"] == typing[typing["plasmid"] == n]["type"].values[0]
+                            ]
+                        ),
+                    )
+                    nn = nearest[-1]  # select nearest neighbour with largest subcommunity size
+                    subcommunity_labels[typing[typing["plasmid"] == nn]["type"].values[0]].append(
+                        plasmid
+                    )
 
         subcommunities = []
         for subcommunity_label in subcommunity_labels.keys():
@@ -176,7 +198,7 @@ def nearest_neighbour(
                 self.subgraph(subcommunity_labels[subcommunity_label]),
                 self._hub_connectivity_threshold,
                 self._edge_density,
-                label=subcommunity_label, #reuse old labels here!
+                label=subcommunity_label,  # reuse old labels here!
                 colour=colour,
             )
             subcommunities.append(subcommunity)
diff --git a/plasnet/list_of_graphs.py b/plasnet/list_of_graphs.py
index 99751c0..d7c2660 100644
--- a/plasnet/list_of_graphs.py
+++ b/plasnet/list_of_graphs.py
@@ -1,6 +1,6 @@
 import pickle
 from pathlib import Path
-from typing import Generator, cast, Optional
+from typing import Generator, Optional, cast
 
 from plasnet.base_graph import BaseGraphType
 
@@ -27,7 +27,9 @@ def save_graph_as_text(self, filepath: Path) -> None:
             for graph_as_text in self._get_each_graph_as_list_of_nodes_in_text_format():
                 print(graph_as_text, file=fh)
 
-    def save_classification(self, filepath: Path, header: str, prev_typing: Optional[dict]=None) -> None:
+    def save_classification(
+        self, filepath: Path, header: str, prev_typing: Optional[dict] = None
+    ) -> None:
         with open(filepath, "w") as fh:
             print(header, file=fh)
             for subgraph in self:
@@ -35,7 +37,6 @@ def save_classification(self, filepath: Path, header: str, prev_typing: Optional
                     subgraph.compare_classification(prev_typing, fh)
                 else:
                     subgraph.write_classification(fh)
-                
 
     def get_graphs_sorted_by_size(self) -> "ListOfGraphs[BaseGraphType]":
         return ListOfGraphs(sorted(self, key=lambda graph: graph.number_of_nodes(), reverse=True))
diff --git a/plasnet/plasnet_main.py b/plasnet/plasnet_main.py
index d580278..3fade9a 100644
--- a/plasnet/plasnet_main.py
+++ b/plasnet/plasnet_main.py
@@ -7,6 +7,7 @@
 import pandas as pd
 
 from plasnet import __version__
+from plasnet.clustering_dists import all_clustering_dists, make_contingency_matrix, read_in_clusters
 from plasnet.communities import Communities
 from plasnet.output_producer import OutputProducer
 from plasnet.plasmid_graph import PlasmidGraph
@@ -14,7 +15,6 @@
 from plasnet.sample_graphs import SampleGraphs
 from plasnet.subcommunities import Subcommunities
 from plasnet.utils import PathlibPath, distance_df_to_dict
-from plasnet.clustering_dists import read_in_clusters, make_contingency_matrix, all_clustering_dists
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 
@@ -102,7 +102,9 @@ def cli() -> None:
     "--graph-pickle", multiple=True, help="Existing plasmid graph to append new plasmids to."
 )
 @click.option(
-    "--prev_typing", multiple=True, help="Previous community typing, if appending to an existing plasmid graph."
+    "--prev_typing",
+    multiple=True,
+    help="Previous community typing, if appending to an existing plasmid graph.",
 )
 @click.option("--no-community-vis", is_flag=True)
 def split(
@@ -117,7 +119,7 @@ def split(
     plasmids_metadata: Optional[Path],
     graph_pickle: Optional[tuple],
     prev_typing: Optional[tuple],
-    no_community_vis: bool
+    no_community_vis: bool,
 ) -> None:
     visualisations_dir = output_dir / "visualisations"
     logging.info(f"Creating plasmid graph from {plasmids} and {distances}")
@@ -127,8 +129,12 @@ def split(
         metadata = plasmids_metadata.read_text().splitlines()
     if graph_pickle:
         existing_graphs = [cast(PlasmidGraph, PlasmidGraph.load(graph)) for graph in graph_pickle]
-        plasmid_graph = PlasmidGraph.build(plasmids, distances, distance_threshold, metadata, existing_graphs)
-        typings = [pd.read_csv(prev, sep="\t", index_col=0).to_dict()["community"] for prev in prev_typing]
+        plasmid_graph = PlasmidGraph.build(
+            plasmids, distances, distance_threshold, metadata, existing_graphs
+        )
+        typings = [
+            pd.read_csv(prev, sep="\t", index_col=0).to_dict()["community"] for prev in prev_typing
+        ]
     else:
         plasmid_graph = PlasmidGraph.build(plasmids, distances, distance_threshold, metadata)
 
@@ -158,7 +164,11 @@ def split(
     communities.save_classification(objects_dir / "communities.tsv", "plasmid\tcommunity")
     if prev_typing:
         for i, typing in enumerate(typings):
-            communities.save_classification(objects_dir / f"compare_communities_{i}.tsv", "plasmid\tcommunity\tprevious_community", prev_typing=typing)
+            communities.save_classification(
+                objects_dir / f"compare_communities_{i}.tsv",
+                "plasmid\tcommunity\tprevious_community",
+                prev_typing=typing,
+            )
 
     logging.info("All done!")
 
@@ -212,13 +222,17 @@ def split(
     default="html",
     help="Whether to output networks as html visualisations, cytoscape formatted json, or both.",
 )
+@click.option("--prev_typing", multiple=True, help="Previous subcommunity typing, if it exists.")
 @click.option(
-    "--prev_typing", multiple=True, help="Previous subcommunity typing, if it exists."
-)
-@click.option("--reclustering_method", type=click.Choice(["unbiased", "biased", "nearest_neighbour"]), default="unbiased", help=
-    "unbiased: If including a previous subcommunity typing, all previous and new genomes will be reclustered from scratch, ignoring previous typing.\n"
-    "biased: The asynchronous label propagation will start with the previous typing as initial labels.\n"
-    "nearest_neighbour: Does not cluster the new genomes, rather, assigns type based on the closest neighbour of the previous typing."
+    "--reclustering_method",
+    type=click.Choice(["unbiased", "biased", "nearest_neighbour"]),
+    default="unbiased",
+    help="unbiased: If including a previous subcommunity typing, all previous and new genomes "
+    "will be reclustered from scratch, ignoring previous typing.\n"
+    "biased: The asynchronous label propagation will start with the previous typing as initial "
+    "labels.\n"
+    "nearest_neighbour: Does not cluster the new genomes, rather, assigns type based on the "
+    "closest neighbour of the previous typing.",
 )
 @click.option("--no-vis", is_flag=True)
 def type(
@@ -230,7 +244,7 @@ def type(
     output_type: Optional[str],
     prev_typing: Optional[tuple],
     reclustering_method: Optional[str],
-    no_vis: bool
+    no_vis: bool,
 ) -> None:
     logging.info(f"Loading communities from {communities_pickle}")
     communities = cast(Communities, Communities.load(communities_pickle))
@@ -252,20 +266,28 @@ def type(
 
     logging.info("Typing communities (i.e. splitting them into subcommunities)")
 
-    if prev_typing and reclustering_method=="nearest_neighbour":
-        typing = pd.read_csv(prev_typing[0], sep="\t") #nearest neighbour does not support merging graphs
+    if prev_typing and reclustering_method == "nearest_neighbour":
+        typing = pd.read_csv(
+            prev_typing[0], sep="\t"
+        )  # nearest neighbour does not support merging graphs
     elif prev_typing:
-        typings = [pd.read_csv(prev, sep="\t", index_col=0).to_dict()["type"] for prev in prev_typing]
+        typings = [
+            pd.read_csv(prev, sep="\t", index_col=0).to_dict()["type"] for prev in prev_typing
+        ]
 
     all_subcommunities = Subcommunities()
     all_hub_plasmids = set()
     for community in communities:
         hub_plasmids = community.remove_hub_plasmids()
         all_hub_plasmids.update(hub_plasmids)
-        if prev_typing and reclustering_method=="biased":
-            subcommunities = community.split_graph_given_labels(small_subcommunity_size_threshold, typings)
-        elif prev_typing and reclustering_method=="nearest_neighbour":
-            new_plasmids = [plasmid for plasmid in community.nodes if plasmid not in typing["plasmid"].to_list()]
+        if prev_typing and reclustering_method == "biased":
+            subcommunities = community.split_graph_given_labels(
+                small_subcommunity_size_threshold, typings
+            )
+        elif prev_typing and reclustering_method == "nearest_neighbour":
+            new_plasmids = [
+                plasmid for plasmid in community.nodes if plasmid not in typing["plasmid"].to_list()
+            ]
             subcommunities = community.nearest_neighbour(typing, new_plasmids)
         else:
             subcommunities = community.split_graph_into_subcommunities(
@@ -296,11 +318,17 @@ def type(
         for plasmid in all_hub_plasmids:
             print(plasmid, file=hub_plasmids_fh)
 
-    if prev_typing and reclustering_method!="nearest_neighbour":
+    if prev_typing and reclustering_method != "nearest_neighbour":
         for i, typing in enumerate(typings):
-            all_subcommunities.save_classification(objects_dir / f"compare_typing_{i}.tsv", "plasmid\ttype\tprevious_type",prev_typing=typing)
-            
-            clusters_pling, clusters_pling_old, plasmids = read_in_clusters(objects_dir / f"compare_typing_{i}.tsv")
+            all_subcommunities.save_classification(
+                objects_dir / f"compare_typing_{i}.tsv",
+                "plasmid\ttype\tprevious_type",
+                prev_typing=typing,
+            )
+
+            clusters_pling, clusters_pling_old, plasmids = read_in_clusters(
+                objects_dir / f"compare_typing_{i}.tsv"
+            )
             n = len(plasmids)
             contingency, k_1, k_2 = make_contingency_matrix(clusters_pling, clusters_pling_old)
             clust_dists = all_clustering_dists(contingency, k_1, k_2, n)

From eee33d3349907945a7748943a4c07f12d0b3e70b Mon Sep 17 00:00:00 2001
From: babayagaofficial <baba.yaga.df@gmail.com>
Date: Tue, 17 Feb 2026 19:16:46 +0100
Subject: [PATCH 6/7] still more pre-commit changes

---
 plasnet/plasmid_graph.py      | 11 +++++------
 plasnet/subcommunity_graph.py |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/plasnet/plasmid_graph.py b/plasnet/plasmid_graph.py
index 210125b..5cdad3a 100644
--- a/plasnet/plasmid_graph.py
+++ b/plasnet/plasmid_graph.py
@@ -26,7 +26,7 @@ def build(
         distance_filepath: Path,
         distance_threshold: float,
         plasmids_metadata: list[str],
-        existing_graphs: Optional[tuple]=None
+        existing_graphs: Optional[tuple] = None,
     ) -> "PlasmidGraph":
         """
         Creates a plasmid graph from plasmid and distance files.
@@ -63,7 +63,9 @@ def build(
         """  # noqa: E501
         plasmids = pd.read_csv(plasmids_filepath)
 
-        distance_df = pd.read_csv(distance_filepath, dtype={"plasmid_1":str, "plasmid_2":str}, sep="\t")
+        distance_df = pd.read_csv(
+            distance_filepath, dtype={"plasmid_1": str, "plasmid_2": str}, sep="\t"
+        )
         distance_df[DistanceTags.SplitDistanceTag.value] = distance_df["distance"]
 
         # apply distance threshold
@@ -76,7 +78,6 @@ def build(
             DistanceTags.SplitDistanceTag.value
         ].round(2)
 
-
         # create graph
         graph = nx.from_pandas_edgelist(
             distance_df,
@@ -88,8 +89,7 @@ def build(
 
         if existing_graphs:
             for existing_graph in existing_graphs:
-                graph = nx.compose(graph,existing_graph)
-
+                graph = nx.compose(graph, existing_graph)
 
         # add all nodes to the graph, including those that have no edges
         # possibly add metadata if they were provided
@@ -105,7 +105,6 @@ def build(
         graph.add_nodes_from(nodes_and_metadata)
 
         return PlasmidGraph(graph)
-    
 
     def split_graph_into_communities(
         self, bh_connectivity: int, bh_neighbours_edge_density: float
diff --git a/plasnet/subcommunity_graph.py b/plasnet/subcommunity_graph.py
index c775a3e..687d6ee 100644
--- a/plasnet/subcommunity_graph.py
+++ b/plasnet/subcommunity_graph.py
@@ -25,4 +25,4 @@ def _get_samples_selectors_HTML(self) -> str:
         return ""
 
     def _get_node_color(self, node: str) -> str:
-        return self._colour
\ No newline at end of file
+        return self._colour

From 414b4e16a1bc31f1afbb4b6a781c801af4084a67 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 17 Feb 2026 18:18:07 +0000
Subject: [PATCH 7/7] Updated coverage.svg

---
 coverage.svg | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/coverage.svg b/coverage.svg
index 3438732..12876e6 100644
--- a/coverage.svg
+++ b/coverage.svg
@@ -9,13 +9,13 @@
     </mask>
     <g mask="url(#a)">
         <path fill="#555" d="M0 0h63v20H0z"/>
-        <path fill="#4c1" d="M63 0h36v20H63z"/>
+        <path fill="#a4a61d" d="M63 0h36v20H63z"/>
         <path fill="url(#b)" d="M0 0h99v20H0z"/>
     </g>
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">97%</text>
-        <text x="80" y="14">97%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">78%</text>
+        <text x="80" y="14">78%</text>
     </g>
 </svg>