Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
We follow SemVer as most of the Julia ecosystem. Below you might see the "breaking" label even for minor version bumps -- we use it a bit more loosely to denote things that are not breaking by SemVer's definition but might cause breakage to people using internal or experimental APIs or undocumented implementation details.

## unreleased

- `is_articulation(g, v)` for checking whether a single vertex is an articulation point
- The iFUB algorithm is used for faster diameter calculation and now supports weighted graph diameter calculation
- ECG community detection algorithm

## v1.14.0 - 2026-02-26
- **(breaking)** `neighbors`, `inneighbors`, and `outneighbors` now return an immutable `FrozenVector` instead of `Vector`
Expand Down
1 change: 1 addition & 0 deletions docs/src/algorithms/community.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Pages = [
"community/core-periphery.jl",
"community/label_propagation.jl",
"community/louvain.jl",
"community/ecg.jl",
"community/modularity.jl",
"community/rich_club.jl",
]
Expand Down
3 changes: 3 additions & 0 deletions src/Graphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ export
triangles,
label_propagation,
louvain,
ecg,
ecg_weights,
maximal_cliques,
maximum_clique,
clique_number,
Expand Down Expand Up @@ -553,6 +555,7 @@ include("centrality/radiality.jl")
include("community/modularity.jl")
include("community/label_propagation.jl")
include("community/louvain.jl")
include("community/ecg.jl")
include("community/core-periphery.jl")
include("community/clustering.jl")
include("community/cliques.jl")
Expand Down
151 changes: 151 additions & 0 deletions src/community/ecg.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""
ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, min_weight_outside_2core::Bool=true, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=1e-9, merge_tol::Real=1e-9, rng=nothing, seed=nothing)

Community detection using ensemble clustering for graphs (ECG). Weights the edges based on the
proportion of time the endpoints are in the same cluster of a Louvain without merges before running
a final Louvain to detect communities.

### Optional Arguments
- `distmx=weights(g)`: distance matrix for weighted graphs
- `ensemble_size=16`: the number of no merge Louvains in the ensemble
- `min_edge_weight=0.05`: the minimum edge weight passed to the final Louvain (to retain the original topology).
- `min_weight_outside_2core=true`: a flag to set the weight of edges outside the 2-core to the minimum value. If the graph is directed, the coreness is computed only using out degrees. Must be false is the graph has loops or parallel edges.
- `γ=1.0`: where `γ > 0` is a resolution parameter. Higher resolutions lead to more
communities, while lower resolutions lead to fewer communities. Where `γ=1.0` it
leads to the traditional definition of the modularity.
- `max_moves=1000`: maximum number of rounds moving vertices before merging for each Louvain.
- `max_merges=1000`: maximum number of merges in the final Louvain.
- `move_tol=1e-9`: necessary increase of modularity to move a vertex in each Louvain.
- `merge_tol=1e-9`: necessary increase of modularity in the move stage to merge in the final Louvain.
- `rng=nothing`: rng to use for reproducibility. May only pass one of rng or seed.
- `seed=nothing`: seed to use for reproducibility. May only pass one of rng or seed.

### References
- [Valérie Poulin and François Théberge. Ensemble Clustering for Graphs: Comparisons and Applications. Applied Network Science, 4:4 (2019)][https://doi.org/10.1007/s41109-019-0162-z]


# Examples
```jldoctest
julia> using Graphs

julia> barbell = blockdiag(complete_graph(3), complete_graph(3));

julia> add_edge!(barbell, 1, 4);

julia> ecg(barbell)
6-element Vector{Int64}:
1
1
1
2
2
2

julia> ecg(barbell, γ=0.01)
6-element Vector{Int64}:
1
1
1
1
1
1
```
"""
function ecg(
g::AbstractGraph{T};
γ=1.0,
ensemble_size::Integer=16,
min_edge_weight::Real=0.05,
min_weight_outside_2core::Bool=true,
distmx::AbstractArray{<:Number}=weights(g),
max_moves::Integer=1000,
max_merges::Integer=1000,
move_tol::Real=1e-9,
merge_tol::Real=1e-9,
rng::Union{Nothing,AbstractRNG}=nothing,
seed::Union{Nothing,Integer}=nothing,
) where {T}
min_weight_outside_2core &&
has_self_loops(g) &&
throw(
ArgumentError("min_weight_outside_2core must be false if the graph has loops.")
)
rng = rng_from_rng_or_seed(rng, seed)
if nv(g) == 0
return T[]
end
ensemble_weights = ecg_weights(
g;
γ=γ,
ensemble_size=ensemble_size,
distmx=distmx,
max_moves=max_moves,
move_tol=move_tol,
rng=rng,
)
if min_weight_outside_2core
corenum = core_number(g)
indices = findall(
i -> (corenum[i[1]] < 2) || (corenum[i[2]] < 2),
CartesianIndices(ensemble_weights),
)
ensemble_weights[indices] .= 0.0
end
weights =
(1-min_edge_weight)*ensemble_weights +
min_edge_weight * adjacency_matrix(g, Float64)
return louvain(
g;
γ=γ,
distmx=weights,
max_moves=max_moves,
max_merges=max_merges,
move_tol=move_tol,
merge_tol=merge_tol,
rng=rng,
)
end

"""
ensemble_weights(g; c, distmx, max_moves, move_tol, rng, seed)

Compute edge weights via an ensemble of no merge Louvains. The weight of each edge is
the proportion of time the endpoints are in the same community.
"""
function ecg_weights(
g::AbstractGraph{T};
γ=1.0,
ensemble_size::Integer=16,
distmx::AbstractArray{<:Number}=weights(g),
max_moves::Integer=1000,
move_tol::Real=1e-9,
rng::Union{Nothing,AbstractRNG}=nothing,
seed::Union{Nothing,Integer}=nothing,
) where {T}
rng = rng_from_rng_or_seed(rng, seed)
# Create sparse adjacency matrix full of explicit zeros
ensemble_weights = adjacency_matrix(g, Float64)
ensemble_weights.nzval .= 0

for _ in 1:ensemble_size
ensemble_communities = louvain(
g;
γ=γ,
distmx=distmx,
max_moves=max_moves,
max_merges=0,
move_tol=move_tol,
rng=rng,
)
for e in edges(g)
if ensemble_communities[src(e)] == ensemble_communities[dst(e)]
ensemble_weights[src(e), dst(e)] += 1 / ensemble_size
if !is_directed(g)
ensemble_weights[dst(e), src(e)] += 1 / ensemble_size
end
end
end
end

return ensemble_weights
end
129 changes: 129 additions & 0 deletions test/community/ecg.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
@testset "ECG" begin
# Test ecg_weights
# Undirected
barbell = barbell_graph(3, 3)
c = sparse(
[
0.0 1.0 1.0 0.0 0.0 0.0;
1.0 0.0 1.0 0.0 0.0 0.0;
1.0 1.0 0.0 0.0 0.0 0.0;
0.0 0.0 0.0 0.0 1.0 1.0;
0.0 0.0 0.0 1.0 0.0 1.0;
0.0 0.0 0.0 1.0 1.0 0.0
],
)
for g in test_generic_graphs(barbell)
r = ecg_weights(g)
dropzeros!(r)
@test c == r
end

# Empty, no edges
empty = SimpleGraph(10)
c = spzeros(10, 10)
for g in test_generic_graphs(empty)
r = @inferred ecg_weights(g)
dropzeros!(r)
@test c == r
end

# Empty, no nodes
empty = SimpleGraph()
c = spzeros(0, 0)
for g in test_generic_graphs(empty)
r = @inferred ecg_weights(g)
@test c == r
end

# Undirected loops
loops = complete_graph(2)
add_edge!(loops, 1, 1)
add_edge!(loops, 2, 2)
c = sparse([
2.0 0.0;
0.0 2.0
])
for g in test_generic_graphs(loops)
r = ecg_weights(g)
dropzeros!(r)
@test c == r
end

# Directed
triangle = SimpleDiGraph(3)
add_edge!(triangle, 1, 2)
add_edge!(triangle, 2, 3)
add_edge!(triangle, 3, 1)

# Directed Loops
barbell = blockdiag(triangle, triangle)
add_edge!(barbell, 1, 4)
c = sparse(
[
0.0 1.0 0.0 0.0 0.0 0.0;
0.0 0.0 1.0 0.0 0.0 0.0;
1.0 0.0 0.0 0.0 0.0 0.0;
0.0 0.0 0.0 0.0 1.0 0.0;
0.0 0.0 0.0 0.0 0.0 1.0;
0.0 0.0 0.0 1.0 0.0 0.0
],
)
for g in test_generic_graphs(barbell)
r = ecg_weights(g)
dropzeros!(r)
@test r == c
end

# Directed loops
barbell = SimpleDiGraph(2)
add_edge!(barbell, 1, 1)
add_edge!(barbell, 2, 2)
add_edge!(barbell, 1, 2)
c = sparse([
1.0 0.0;
0.0 1.0
])
for g in test_generic_graphs(barbell)
r = ecg_weights(g)
dropzeros!(r)
@test r == c
end

# Test ECG
# Undirected
barbell = barbell_graph(3, 3)
c = [1, 1, 1, 2, 2, 2]
for g in test_generic_graphs(barbell)
r = ecg(g)
@test c == r
end

# Directed
triangle = SimpleDiGraph(3)
add_edge!(triangle, 1, 2)
add_edge!(triangle, 2, 3)
add_edge!(triangle, 3, 1)

barbell = blockdiag(triangle, triangle)
add_edge!(barbell, 1, 4)
c = [1, 1, 1, 2, 2, 2]
for g in test_generic_graphs(barbell)
r = ecg(g)
@test r == c
end

# Empty, no edges
empty = SimpleGraph(10)
c = collect(1:10)
for g in test_generic_graphs(empty)
r = ecg(g)
@test c == r
end

# Empty, no nodes
empty = SimpleGraph()
for g in test_generic_graphs(empty)
r = ecg(g)
@test length(r) == 0
end
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ tests = [
"traversals/all_simple_paths",
"community/cliques",
"community/core-periphery",
"community/ecg",
"community/independent_sets",
"community/label_propagation",
"community/louvain",
Expand Down
Loading