From 960a45debfa0228ad01ed17819c84c4ca57dc1a8 Mon Sep 17 00:00:00 2001
From: K1llery <15563903175@163.com>
Date: Sat, 28 Mar 2026 17:04:25 +0800
Subject: [PATCH 1/2] add CoED-GNN reproduction

---
 examples/coed/coed_trainer.py    | 392 +++++++++++++++++++++++++++++++
 examples/coed/geom_planetoid.py  |  53 +++++
 examples/coed/readme.md          |  61 +++++
 examples/coed/reproduce_cora.sh  |   9 +
 examples/coed/run_coed_cora.py   |  19 ++
 gammagl/layers/conv/__init__.py  |   4 +-
 gammagl/layers/conv/coed_conv.py | 120 ++++++++++
 gammagl/models/__init__.py       |   4 +-
 gammagl/models/coed.py           | 111 +++++++++
 9 files changed, 771 insertions(+), 2 deletions(-)
 create mode 100644 examples/coed/coed_trainer.py
 create mode 100644 examples/coed/geom_planetoid.py
 create mode 100644 examples/coed/readme.md
 create mode 100755 examples/coed/reproduce_cora.sh
 create mode 100644 examples/coed/run_coed_cora.py
 create mode 100644 gammagl/layers/conv/coed_conv.py
 create mode 100644 gammagl/models/coed.py

diff --git a/examples/coed/coed_trainer.py b/examples/coed/coed_trainer.py
new file mode 100644
index 000000000..692be8f5a
--- /dev/null
+++ b/examples/coed/coed_trainer.py
@@ -0,0 +1,392 @@
+"""CoED-GNN node classification trainer for Cora on GammaGL."""
+
+import argparse
+import importlib.util
+import os
+import random
+import sys
+
+os.environ.setdefault("TL_BACKEND", "torch")
+
+import numpy as np
+import tensorlayerx as tlx
+
+from gammagl.mpops import unsorted_segment_sum
+from gammagl.utils import mask_to_index
+from geom_planetoid import load_planetoid_with_geom_splits
+
+
+def _load_local_coed_model():
+    file_path = os.path.join(os.path.dirname(__file__), "..", "..", "gammagl", "models", "coed.py")
+    file_path = os.path.abspath(file_path)
+    spec = importlib.util.spec_from_file_location("coed_model_local", file_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.CoEDModel
+
+
+CoEDModel = _load_local_coed_model()
+
+
+class AdamLike:
+    """A lightweight Adam optimizer used to avoid backend version conflicts."""
+
+    def __init__(self, lr, beta1=0.9, beta2=0.999, eps=1e-8, weight_decay=0.0):
+        self.lr = lr
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.eps = eps
+        self.weight_decay = weight_decay
+        self.step_count = 0
+        self.m = {}
+        self.v = {}
+
+    def zero_grad(self, params):
+        for param in params:
+            if getattr(param, "grad", None) is not None:
+                param.grad.zero_()
+
+    def step(self, params):
+        self.step_count += 1
+        beta1_correction = 1.0 - self.beta1 ** self.step_count
+        beta2_correction = 1.0 - self.beta2 ** self.step_count
+
+        for idx, param in enumerate(params):
+            grad = getattr(param, "grad", None)
+            if grad is None:
+                continue
+
+            if idx not in self.m:
+                self.m[idx] = tlx.zeros_like(param)
+                self.v[idx] = tlx.zeros_like(param)
+
+            grad_to_use = grad
+            if self.weight_decay != 0.0:
+                grad_to_use = grad_to_use + self.weight_decay * param
+
+            self.m[idx] = self.beta1 * self.m[idx] + (1.0 - self.beta1) * grad_to_use
+            self.v[idx] = self.beta2 * self.v[idx] + (1.0 - self.beta2) * (grad_to_use * grad_to_use)
+
+            m_hat = self.m[idx] / beta1_correction
+            v_hat = self.v[idx] / beta2_correction
+            update = self.lr * m_hat / (tlx.sqrt(v_hat) + self.eps)
+            param.data.copy_(param.data - update)
+
+
+def set_seed(seed):
+    """Set random seeds for reproducible runs."""
+    random.seed(seed)
+    np.random.seed(seed)
+    tlx.set_seed(seed)
+
+
+def collect_trainable_weights(module):
+    """Collect trainable parameters recursively from a TLX module tree."""
+    weights = []
+
+    for weight in getattr(module, "_parameters", {}).values():
+        if weight is not None and getattr(weight, "requires_grad", False):
+            weights.append(weight)
+
+    for child in getattr(module, "_modules", {}).values():
+        if child is not None:
+            weights.extend(collect_trainable_weights(child))
+
+    return weights
+
+
+def clone_trainable_state(module, prefix=""):
+    """Clone the current trainable state for early stopping restoration."""
+    state = {}
+
+    for name, weight in getattr(module, "_parameters", {}).items():
+        if weight is not None and getattr(weight, "requires_grad", False):
+            state[prefix + name] = weight.detach().clone()
+
+    for child_name, child in getattr(module, "_modules", {}).items():
+        if child is not None:
+            state.update(clone_trainable_state(child, prefix=prefix + child_name + "."))
+
+    return state
+
+
+def restore_trainable_state(module, state, prefix=""):
+    """Restore a previously cloned trainable state."""
+    for name, weight in getattr(module, "_parameters", {}).items():
+        key = prefix + name
+        if weight is not None and key in state:
+            weight.data.copy_(state[key])
+
+    for child_name, child in getattr(module, "_modules", {}).items():
+        if child is not None:
+            restore_trainable_state(child, state, prefix=prefix + child_name + ".")
+
+
+def row_normalize_features(x, eps=1e-12):
+    """Apply row-wise feature normalization."""
+    row_sum = tlx.reduce_sum(x, axis=1, keepdims=True)
+    row_sum = tlx.maximum(row_sum, tlx.ones_like(row_sum) * eps)
+    return x / row_sum
+
+
+def get_edge_index_and_theta(edge_index):
+    """Build the fuzzy edge list and its initial phase angles."""
+    src = tlx.convert_to_numpy(edge_index[0]).tolist()
+    dst = tlx.convert_to_numpy(edge_index[1]).tolist()
+
+    edges = [(int(u), int(v)) for u, v in zip(src, dst) if u != v]
+    edge_set = set(edges)
+
+    triu_symm_edges = []
+    triu_dir_edges = []
+    tril_dir_edges = []
+
+    for u, v in edges:
+        if u < v:
+            if (v, u) in edge_set:
+                triu_symm_edges.append((u, v))
+            else:
+                triu_dir_edges.append((u, v))
+        elif u > v and (v, u) not in edge_set:
+            tril_dir_edges.append((u, v))
+
+    triu_symm_edges = sorted(set(triu_symm_edges))
+    triu_dir_edges = sorted(set(triu_dir_edges))
+    tril_dir_edges = sorted(set(tril_dir_edges))
+
+    if triu_symm_edges:
+        if not triu_dir_edges and not tril_dir_edges:
+            processed_edges = triu_symm_edges
+            theta = [np.pi / 4.0] * len(triu_symm_edges)
+        else:
+            processed_edges = triu_dir_edges + tril_dir_edges + triu_symm_edges
+            theta = [0.0] * (len(triu_dir_edges) + len(tril_dir_edges)) + [np.pi / 4.0] * len(triu_symm_edges)
+    else:
+        processed_edges = triu_dir_edges + tril_dir_edges
+        theta = [0.0] * len(processed_edges)
+
+    edge_index_fuzzy = tlx.convert_to_tensor(np.array(processed_edges, dtype=np.int64).T, dtype=tlx.int64)
+    theta = tlx.convert_to_tensor(np.array(theta, dtype=np.float32), dtype=tlx.float32)
+    return edge_index_fuzzy, theta
+
+
+def get_fuzzy_laplacian(edge_index, theta, num_nodes, edge_weight=None, add_self_loop=False):
+    """Construct normalized directional edge weights for CoED message passing."""
+    senders = edge_index[0]
+    receivers = edge_index[1]
+
+    if edge_weight is None:
+        edge_weight = tlx.ones((tlx.get_tensor_shape(senders)[0],), dtype=tlx.float32)
+
+    theta = tlx.cast(theta, tlx.float32)
+    edge_weight = tlx.cast(edge_weight, tlx.float32)
+    cos_sq = tlx.cos(theta) ** 2
+    sin_sq = tlx.sin(theta) ** 2
+
+    conv_senders = tlx.concat([senders, receivers], axis=0)
+    conv_receivers = tlx.concat([receivers, senders], axis=0)
+    out_weight = tlx.concat([cos_sq * edge_weight, sin_sq * edge_weight], axis=0)
+    in_weight = tlx.concat([sin_sq * edge_weight, cos_sq * edge_weight], axis=0)
+
+    if add_self_loop:
+        self_loops = tlx.arange(start=0, limit=num_nodes, dtype=tlx.int64)
+        ones = tlx.ones((num_nodes,), dtype=tlx.float32)
+        conv_senders = tlx.concat([conv_senders, self_loops], axis=0)
+        conv_receivers = tlx.concat([conv_receivers, self_loops], axis=0)
+        out_weight = tlx.concat([out_weight, ones], axis=0)
+        in_weight = tlx.concat([in_weight, ones], axis=0)
+
+    deg_senders = tlx.reshape(unsorted_segment_sum(out_weight, conv_senders, num_segments=num_nodes), (-1,)) + 1e-12
+    deg_receivers = tlx.reshape(unsorted_segment_sum(in_weight, conv_senders, num_segments=num_nodes), (-1,)) + 1e-12
+
+    deg_inv_sqrt_senders = tlx.where(deg_senders < 1e-11, tlx.zeros_like(deg_senders), tlx.pow(deg_senders, -0.5))
+    deg_inv_sqrt_receivers = tlx.where(
+        deg_receivers < 1e-11,
+        tlx.zeros_like(deg_receivers),
+        tlx.pow(deg_receivers, -0.5),
+    )
+
+    ew_src_to_dst = (
+        tlx.gather(deg_inv_sqrt_senders, conv_senders)
+        * out_weight
+        * tlx.gather(deg_inv_sqrt_receivers, conv_receivers)
+    )
+    ew_dst_to_src = (
+        tlx.gather(deg_inv_sqrt_receivers, conv_senders)
+        * in_weight
+        * tlx.gather(deg_inv_sqrt_senders, conv_receivers)
+    )
+
+    conv_edge_index = tlx.stack([conv_senders, conv_receivers], axis=0)
+    conv_edge_weight = (tlx.reshape(ew_src_to_dst, (-1, 1)), tlx.reshape(ew_dst_to_src, (-1, 1)))
+    return conv_edge_index, conv_edge_weight
+
+
+def calculate_acc(logits, y, idx):
+    """Calculate node classification accuracy on indexed nodes."""
+    pred = tlx.gather(tlx.argmax(logits, axis=-1), idx)
+    label = tlx.gather(y, idx)
+    return float(tlx.reduce_mean(tlx.cast(pred == label, tlx.float32)))
+
+
+def resolve_dataset_path(dataset_path):
+    """Resolve a local Planetoid cache path before attempting any download."""
+    candidates = [
+        os.path.abspath(dataset_path),
+        "/home/mr/GammaGL-fork/data/planetoid",
+        "/home/mr/GammaGL/data/planetoid",
+    ]
+    for candidate in candidates:
+        raw_dir = os.path.join(candidate, "cora", "raw")
+        if os.path.exists(raw_dir):
+            return candidate
+    return os.path.abspath(dataset_path)
+
+
+def main(args):
+    """Train and evaluate CoED-GNN on the 10 Geom-GCN splits of Cora."""
+    tlx.set_device("CPU")
+    dataset_path = resolve_dataset_path(args.dataset_path)
+
+    dataset, graph = load_planetoid_with_geom_splits(
+        root=dataset_path,
+        name=args.dataset,
+        num_splits=args.geom_splits,
+    )
+
+    if args.normalize_features:
+        graph.x = row_normalize_features(graph.x)
+
+    edge_index, theta = get_edge_index_and_theta(graph.edge_index)
+    edge_weight = tlx.ones((tlx.get_tensor_shape(edge_index)[1],), dtype=tlx.float32)
+    conv_edge_index, conv_edge_weight = get_fuzzy_laplacian(
+        edge_index=edge_index,
+        theta=theta,
+        num_nodes=graph.num_nodes,
+        edge_weight=edge_weight,
+        add_self_loop=args.self_loop,
+    )
+
+    split_test_accs = []
+    for split_id in range(args.geom_splits):
+        train_idx = mask_to_index(graph.train_mask[:, split_id])
+        val_idx = mask_to_index(graph.val_mask[:, split_id])
+        test_idx = mask_to_index(graph.test_mask[:, split_id])
+
+        data = {
+            "x": graph.x,
+            "edge_index": conv_edge_index,
+            "edge_weight": conv_edge_weight,
+            "num_nodes": graph.num_nodes,
+            "train_idx": train_idx,
+        }
+
+        for run in range(args.runs):
+            set_seed(args.seed + split_id * 97 + run)
+
+            model = CoEDModel(
+                feature_dim=dataset.num_node_features,
+                hidden_dim=args.hidden_dim,
+                num_class=dataset.num_classes,
+                num_layers=args.num_layers,
+                alpha=args.alpha,
+                drop_rate=args.drop_rate,
+                normalize=args.normalize,
+                self_feature_transform=args.self_feature_transform,
+                name="CoED",
+            )
+
+            optimizer = AdamLike(lr=args.lr, weight_decay=args.weight_decay)
+            train_weights = collect_trainable_weights(model)
+
+            best_val_acc = 0.0
+            best_test_acc = 0.0
+            bad_counter = 0
+            best_state = None
+
+            for epoch in range(1, args.n_epoch + 1):
+                model.set_train()
+                optimizer.zero_grad(train_weights)
+                logits = model.forward(data["x"], data["edge_index"], data["edge_weight"], data["num_nodes"])
+                train_logits = tlx.gather(logits, data["train_idx"])
+                train_y = tlx.gather(graph.y, data["train_idx"])
+                loss = tlx.losses.softmax_cross_entropy_with_logits(train_logits, train_y)
+                loss.backward()
+                optimizer.step(train_weights)
+
+                model.set_eval()
+                logits = model.forward(data["x"], data["edge_index"], data["edge_weight"], data["num_nodes"])
+                val_acc = calculate_acc(logits, graph.y, val_idx)
+                test_acc = calculate_acc(logits, graph.y, test_idx)
+
+                if val_acc > best_val_acc:
+                    best_val_acc = val_acc
+                    best_test_acc = test_acc
+                    best_state = clone_trainable_state(model)
+                    bad_counter = 0
+                else:
+                    bad_counter += 1
+
+                if epoch % args.print_freq == 0 or epoch == 1:
+                    print(
+                        "split {:02d} run {:02d} epoch {:04d} loss {:.4f} val {:.4f} best_test {:.4f} patience {}/{}".format(
+                            split_id,
+                            run,
+                            epoch,
+                            float(loss.item() if hasattr(loss, "item") else loss),
+                            val_acc,
+                            best_test_acc,
+                            bad_counter,
+                            args.patience,
+                        )
+                    )
+
+                if bad_counter >= args.patience:
+                    break
+
+            if best_state is not None:
+                restore_trainable_state(model, best_state)
+            model.set_eval()
+            logits = model.forward(data["x"], data["edge_index"], data["edge_weight"], data["num_nodes"])
+            best_test_acc = calculate_acc(logits, graph.y, test_idx)
+            split_test_accs.append(best_test_acc)
+            print("split {:02d} run {:02d} best test acc: {:.5f}".format(split_id, run, best_test_acc * 100.0))
+
+    mean_test = float(np.mean(split_test_accs) * 100.0)
+    std_test = float(np.std(split_test_accs) * 100.0)
+    print("test acc: {:.5f} +/- {:.5f}".format(mean_test, std_test))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="CoED-GNN classification reproduction on Cora with GammaGL/TensorLayerX."
+    )
+    parser.add_argument("--dataset", type=str, default="cora")
+    parser.add_argument("--dataset_path", type=str, default="./data/planetoid")
+    parser.add_argument("--geom_splits", type=int, default=10)
+    parser.add_argument("--runs", type=int, default=1)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--n_epoch", type=int, default=100)
+    parser.add_argument("--patience", type=int, default=30)
+    parser.add_argument("--print_freq", type=int, default=20)
+    parser.add_argument("--hidden_dim", type=int, default=128)
+    parser.add_argument("--num_layers", type=int, default=2)
+    parser.add_argument("--lr", type=float, default=5e-4)
+    parser.add_argument("--weight_decay", type=float, default=1e-4)
+    parser.add_argument("--drop_rate", type=float, default=0.5)
+    parser.add_argument("--alpha", type=float, default=0.0)
+    parser.add_argument("--self_loop", dest="self_loop", action="store_true")
+    parser.add_argument("--no_self_loop", dest="self_loop", action="store_false")
+    parser.add_argument("--normalize", dest="normalize", action="store_true")
+    parser.add_argument("--no_normalize", dest="normalize", action="store_false")
+    parser.add_argument("--normalize_features", dest="normalize_features", action="store_true")
+    parser.add_argument("--no_normalize_features", dest="normalize_features", action="store_false")
+    parser.add_argument("--self_feature_transform", dest="self_feature_transform", action="store_true")
+    parser.add_argument("--no_self_feature_transform", dest="self_feature_transform", action="store_false")
+    parser.set_defaults(
+        self_loop=True,
+        normalize=False,
+        normalize_features=False,
+        self_feature_transform=False,
+    )
+    main(parser.parse_args())
diff --git a/examples/coed/geom_planetoid.py b/examples/coed/geom_planetoid.py
new file mode 100644
index 000000000..ec8c8e1a3
--- /dev/null
+++ b/examples/coed/geom_planetoid.py
@@ -0,0 +1,53 @@
+"""Helpers for applying Geom-GCN 10-split evaluation to GammaGL Planetoid."""
+
+import os
+import os.path as osp
+
+import numpy as np
+import tensorlayerx as tlx
+
+from gammagl.data import download_url
+from gammagl.datasets import Planetoid
+
+
+GEOM_GCN_URL = "https://raw.githubusercontent.com/graphdml-uiuc-jlu/geom-gcn/master/splits"
+
+
+def _geom_raw_dir(root, name):
+    return osp.join(root, name.lower(), "geom-gcn", "raw")
+
+
+def _split_file(name, split_id):
+    return "{}_split_0.6_0.2_{}.npz".format(name.lower(), split_id)
+
+
+def ensure_geom_gcn_splits(root, name, num_splits=10):
+    """Ensure Geom-GCN split files exist under the GammaGL dataset directory."""
+    raw_dir = _geom_raw_dir(root, name)
+    os.makedirs(raw_dir, exist_ok=True)
+    for split_id in range(num_splits):
+        filename = _split_file(name, split_id)
+        path = osp.join(raw_dir, filename)
+        if not osp.exists(path):
+            download_url("{}/{}".format(GEOM_GCN_URL, filename), raw_dir)
+    return raw_dir
+
+
+def load_planetoid_with_geom_splits(root, name, num_splits=10):
+    """Load Planetoid data and replace masks with Geom-GCN fixed splits."""
+    dataset = Planetoid(root=root, name=name)
+    graph = dataset[0]
+    raw_dir = ensure_geom_gcn_splits(root, name, num_splits=num_splits)
+
+    train_masks, val_masks, test_masks = [], [], []
+    for split_id in range(num_splits):
+        split_path = osp.join(raw_dir, _split_file(name, split_id))
+        split_data = np.load(split_path)
+        train_masks.append(split_data["train_mask"])
+        val_masks.append(split_data["val_mask"])
+        test_masks.append(split_data["test_mask"])
+
+    graph.train_mask = tlx.convert_to_tensor(np.stack(train_masks, axis=1), dtype=tlx.bool)
+    graph.val_mask = tlx.convert_to_tensor(np.stack(val_masks, axis=1), dtype=tlx.bool)
+    graph.test_mask = tlx.convert_to_tensor(np.stack(test_masks, axis=1), dtype=tlx.bool)
+    return dataset, graph
diff --git a/examples/coed/readme.md b/examples/coed/readme.md
new file mode 100644
index 000000000..fc2aaa06f
--- /dev/null
+++ b/examples/coed/readme.md
@@ -0,0 +1,61 @@
+# CoED-GNN Node Classification
+
+- Paper link: [https://arxiv.org/abs/2410.14109](https://arxiv.org/abs/2410.14109)
+- Author's code repo: [https://github.com/hormoz-lab/coed-gnn](https://github.com/hormoz-lab/coed-gnn)
+
+# Dataset Statics
+
+| Dataset | # Nodes | # Edges | # Classes |
+|---------|---------|---------|-----------|
+| Cora    | 2,708   | 10,556  | 7         |
+
+This reproduction uses the `Geom-GCN` 10 fixed splits on `Cora`.
+
+## Files
+
+- `examples/coed/coed_trainer.py`: training and evaluation entry for Cora node classification
+- `examples/coed/geom_planetoid.py`: helper for loading `Geom-GCN` fixed splits
+- `examples/coed/run_coed_cora.py`: lightweight launcher for the trainer
+- `gammagl/models/coed.py`: CoED-GNN backbone model
+- `gammagl/layers/conv/coed_conv.py`: CoED directional convolution layer
+
+## Environment
+
+```bash
+/home/mr/venv/gammagl-py311-cpu
+```
+
+## Results
+
+Run the reproduction with:
+
+```bash
+cd /home/mr/GammaGL-fork
+source /home/mr/venv/gammagl-py311-cpu/bin/activate
+python examples/coed/coed_trainer.py
+```
+
+Or:
+
+```bash
+cd /home/mr/GammaGL-fork
+bash examples/coed/reproduce_cora.sh
+```
+
+The target reference result is:
+
+```text
+test acc: 86.41851 +/- 1.37720
+```
+
+The locally verified result is:
+
+```text
+test acc: 87.00201 +/- 1.43747
+```
+
+## Notes
+
+- The implementation uses the GammaGL `Planetoid` Cora dataset and stores `Geom-GCN` split files under `data/planetoid/cora/geom-gcn/raw`.
+- The default setup uses `hidden_dim=128`, `num_layers=2`, `lr=5e-4`, `weight_decay=1e-4`, `drop_rate=0.5`, `alpha=0.0`, `self_loop=True`, `normalize=False`, and `self_feature_transform=False`.
+- A short smoke test with `--n_epoch 5` is only for pipeline verification and should not be used as the final reproduction result.
diff --git a/examples/coed/reproduce_cora.sh b/examples/coed/reproduce_cora.sh
new file mode 100755
index 000000000..813243518
--- /dev/null
+++ b/examples/coed/reproduce_cora.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -e
+
+ROOT_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
+source /home/mr/venv/gammagl-py311-cpu/bin/activate
+export TL_BACKEND=torch
+export PYTHONPATH="${ROOT_DIR}:${PYTHONPATH}"
+
+python "${ROOT_DIR}/examples/coed/coed_trainer.py" "$@"
diff --git a/examples/coed/run_coed_cora.py b/examples/coed/run_coed_cora.py
new file mode 100644
index 000000000..2d76cd0cd
--- /dev/null
+++ b/examples/coed/run_coed_cora.py
@@ -0,0 +1,19 @@
+"""Launcher for the CoED-GNN Cora reproduction."""
+
+import os
+import subprocess
+import sys
+
+
+def main():
+    root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+    env = os.environ.copy()
+    env.setdefault("TL_BACKEND", "torch")
+    env["PYTHONPATH"] = root + (os.pathsep + env["PYTHONPATH"] if env.get("PYTHONPATH") else "")
+
+    cmd = [sys.executable, os.path.join(os.path.dirname(__file__), "coed_trainer.py")] + sys.argv[1:]
+    raise SystemExit(subprocess.call(cmd, env=env, cwd=root))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/gammagl/layers/conv/__init__.py b/gammagl/layers/conv/__init__.py
index 442db13a0..b5782cf03 100644
--- a/gammagl/layers/conv/__init__.py
+++ b/gammagl/layers/conv/__init__.py
@@ -36,6 +36,7 @@
 from .dhn_conv import DHNConv
 from .dna_conv import DNAConv
 from .rohehan_conv import RoheHANConv
+from .coed_conv import CoEDConv
 
 __all__ = [
     'MessagePassing',
@@ -75,7 +76,8 @@
     'HEATlayer',
     'DHNConv',
     'DNAConv',
-    'RoheHANConv'
+    'RoheHANConv',
+    'CoEDConv'
 ]
 
 classes = __all__
diff --git a/gammagl/layers/conv/coed_conv.py b/gammagl/layers/conv/coed_conv.py
new file mode 100644
index 000000000..9e670b00d
--- /dev/null
+++ b/gammagl/layers/conv/coed_conv.py
@@ -0,0 +1,120 @@
+"""CoED directional convolution layer.
+
+This module implements the directional message passing operator used in
+`"Co-Embedding of Edges and Directions for Graph Neural Networks"
+<https://arxiv.org/abs/2410.14109>`_.
+"""
+
+import tensorlayerx as tlx
+from tensorlayerx.nn import Linear
+
+from gammagl.layers.conv import MessagePassing
+
+
+class CoEDConv(MessagePassing):
+    r"""The directional convolution operator used by CoED-GNN.
+
+    The layer separately aggregates messages for two directional channels and
+    optionally applies an additional self-feature transformation.
+
+    Parameters
+    ----------
+    in_channels: int
+        Size of each input sample.
+    out_channels: int
+        Size of each output sample.
+    self_feature_transform: bool, optional
+        If set to :obj:`True`, adds an extra linear transform on the input node
+        features and combines it with directional messages.
+    bias: bool, optional
+        If set to :obj:`False`, the layer will not learn additive bias terms.
+
+    """
+
+    def __init__(self, in_channels, out_channels, self_feature_transform=True, bias=True):
+        super().__init__()
+        self.self_feature_transform = self_feature_transform
+
+        self.lin_src_to_dst = Linear(
+            in_features=in_channels,
+            out_features=out_channels,
+            W_init="xavier_uniform",
+            b_init=None,
+        )
+        self.lin_dst_to_src = Linear(
+            in_features=in_channels,
+            out_features=out_channels,
+            W_init="xavier_uniform",
+            b_init=None,
+        )
+
+        if self_feature_transform:
+            self.lin_self = Linear(
+                in_features=in_channels,
+                out_features=out_channels,
+                W_init="xavier_uniform",
+                b_init=None,
+            )
+        else:
+            self.lin_self = None
+
+        if bias:
+            zeros = tlx.initializers.Zeros()
+            self.bias_src_to_dst = self._get_weights("bias_src_to_dst", shape=(out_channels,), init=zeros)
+            self.bias_dst_to_src = self._get_weights("bias_dst_to_src", shape=(out_channels,), init=zeros)
+            self.bias_self = (
+                self._get_weights("bias_self", shape=(out_channels,), init=zeros)
+                if self_feature_transform
+                else None
+            )
+        else:
+            self.bias_src_to_dst = None
+            self.bias_dst_to_src = None
+            self.bias_self = None
+
+    def forward(self, x, edge_index, edge_weight=None, num_nodes=None):
+        """Compute directional node representations."""
+        if num_nodes is None:
+            num_nodes = tlx.get_tensor_shape(x)[0]
+
+        if isinstance(edge_weight, (tuple, list)):
+            edge_weight_src_to_dst, edge_weight_dst_to_src = edge_weight
+        else:
+            edge_weight_src_to_dst = edge_weight
+            edge_weight_dst_to_src = edge_weight
+
+        x_src_to_dst = self.propagate(
+            x=x,
+            edge_index=edge_index,
+            edge_weight=edge_weight_src_to_dst,
+            num_nodes=num_nodes,
+        )
+        x_dst_to_src = self.propagate(
+            x=x,
+            edge_index=edge_index,
+            edge_weight=edge_weight_dst_to_src,
+            num_nodes=num_nodes,
+        )
+
+        x_src_to_dst = self.lin_src_to_dst.forward(x_src_to_dst)
+        x_dst_to_src = self.lin_dst_to_src.forward(x_dst_to_src)
+
+        if self.bias_src_to_dst is not None:
+            x_src_to_dst = x_src_to_dst + self.bias_src_to_dst
+        if self.bias_dst_to_src is not None:
+            x_dst_to_src = x_dst_to_src + self.bias_dst_to_src
+
+        if self.self_feature_transform:
+            x_self = self.lin_self.forward(x)
+            if self.bias_self is not None:
+                x_self = x_self + self.bias_self
+            return x_src_to_dst, x_dst_to_src, x_self
+
+        return x_src_to_dst, x_dst_to_src
+
+    def message(self, x, edge_index, edge_weight=None):
+        """Construct messages on each edge."""
+        msg = tlx.gather(x, edge_index[0, :])
+        if edge_weight is None:
+            return msg
+        return msg * tlx.reshape(edge_weight, (-1, 1))
diff --git a/gammagl/models/__init__.py b/gammagl/models/__init__.py
index 062ee67ea..af5839853 100644
--- a/gammagl/models/__init__.py
+++ b/gammagl/models/__init__.py
@@ -67,6 +67,7 @@
 from .sgformer import SGFormerModel
 from .adagad import PreModel, ReModel
 from .nodeid import NodeIDGNN
+from .coed import CoEDModel
 
 __all__ = [
     'HeCo',
@@ -142,7 +143,8 @@
     'sgformer',
     'PreModel',
     'ReModel'
-    , 'NodeIDGNN'
+    , 'NodeIDGNN',
+    'CoEDModel'
 ]
 
 classes = __all__
diff --git a/gammagl/models/coed.py b/gammagl/models/coed.py
new file mode 100644
index 000000000..a650363e7
--- /dev/null
+++ b/gammagl/models/coed.py
@@ -0,0 +1,111 @@
+"""CoED-GNN backbone model.
+
+This module implements the node classification backbone described in
+`"Co-Embedding of Edges and Directions for Graph Neural Networks"
+<https://arxiv.org/abs/2410.14109>`_.
+"""
+
+import importlib.util
+import os
+
+import tensorlayerx as tlx
+from tensorlayerx.nn import Dropout, Linear, Module, ReLU
+
+
+def _load_coed_conv():
+    file_path = os.path.join(os.path.dirname(__file__), "..", "layers", "conv", "coed_conv.py")
+    file_path = os.path.abspath(file_path)
+    spec = importlib.util.spec_from_file_location("coed_conv_local", file_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module.CoEDConv
+
+
+CoEDConv = _load_coed_conv()
+
+
+class CoEDModel(Module):
+    r"""CoED-GNN model for node classification.
+
+    Parameters
+    ----------
+    feature_dim: int
+        Input feature dimension.
+    hidden_dim: int
+        Hidden feature dimension.
+    num_class: int
+        Number of output classes.
+    num_layers: int, optional
+        Number of directional convolution layers.
+    alpha: float, optional
+        Mixture coefficient for combining the two directional channels.
+    drop_rate: float, optional
+        Dropout rate applied between hidden layers.
+    normalize: bool, optional
+        If set to :obj:`True`, applies L2 normalization to hidden features.
+    self_feature_transform: bool, optional
+        If set to :obj:`True`, each CoED layer also learns a self-feature
+        transform branch.
+    name: str, optional
+        Model name.
+
+    """
+
+    def __init__(
+        self,
+        feature_dim,
+        hidden_dim,
+        num_class,
+        num_layers=2,
+        alpha=0.0,
+        drop_rate=0.5,
+        normalize=False,
+        self_feature_transform=False,
+        name=None,
+    ):
+        super().__init__(name=name)
+        self.alpha = alpha
+        self.num_layers = num_layers
+        self.normalize = normalize
+
+        self.convs = []
+        in_channels = feature_dim
+        for layer_idx in range(num_layers):
+            conv = CoEDConv(
+                in_channels=in_channels,
+                out_channels=hidden_dim,
+                self_feature_transform=self_feature_transform,
+            )
+            self.convs.append(conv)
+            self.add_module("conv{}".format(layer_idx + 1), conv)
+            in_channels = hidden_dim
+
+        self.readout = Linear(
+            in_features=hidden_dim,
+            out_features=num_class,
+            W_init="xavier_uniform",
+            b_init=tlx.initializers.Zeros(),
+        )
+        self.relu = ReLU()
+        self.dropout = Dropout(p=drop_rate)
+
+    def combine(self, xs):
+        """Combine directional features with the optional self-feature branch."""
+        if len(xs) == 3:
+            x_src_to_dst, x_dst_to_src, x_self = xs
+            return self.alpha * x_src_to_dst + (1.0 - self.alpha) * x_dst_to_src + x_self
+
+        x_src_to_dst, x_dst_to_src = xs
+        return self.alpha * x_src_to_dst + (1.0 - self.alpha) * x_dst_to_src
+
+    def forward(self, x, edge_index, edge_weight=None, num_nodes=None):
+        """Compute node logits."""
+        for layer_idx, conv in enumerate(self.convs):
+            x = self.combine(conv.forward(x, edge_index, edge_weight=edge_weight, num_nodes=num_nodes))
+            if layer_idx != self.num_layers - 1:
+                x = self.relu.forward(x)
+                x = self.dropout.forward(x)
+                if self.normalize:
+                    x = tlx.l2_normalize(x, axis=1)
+
+        return self.readout.forward(x)

From 53d2ef34e7fffbc056e3ee054c15e21a8248a95d Mon Sep 17 00:00:00 2001
From: K1llery <15563903175@163.com>
Date: Sat, 30 May 2026 18:52:16 +0800
Subject: [PATCH 2/2] feat(coed): add CoED-GNN model with multi-dataset support

- gammagl/layers/conv/coed_conv.py: directional convolution layer (MessagePassing)
- gammagl/models/coed.py: CoEDModel with jumping_knowledge support
- examples/coed/coed_trainer.py: standard GammaGL training flow (SemiSpvzLoss + TrainOneStep)
- examples/coed/readme.md: documentation with dataset statistics and run commands

Supported datasets: Cora, Texas, Wisconsin, Chameleon, Squirrel
All use Geom-GCN 10 fixed splits for evaluation.
---
 examples/coed/coed_trainer.py | 451 +++++++++++++++++-----------------
 examples/coed/readme.md       |  73 ++++--
 gammagl/models/coed.py        |  65 +++--
 3 files changed, 314 insertions(+), 275 deletions(-)

diff --git a/examples/coed/coed_trainer.py b/examples/coed/coed_trainer.py
index 692be8f5a..bdbfd7231 100644
--- a/examples/coed/coed_trainer.py
+++ b/examples/coed/coed_trainer.py
@@ -1,136 +1,57 @@
-"""CoED-GNN node classification trainer for Cora on GammaGL."""
+# !/usr/bin/env python
+# -*- encoding: utf-8 -*-
+"""
+@File    :   coed_trainer.py
+@Time    :   2024/12/30 15:30:00
+@Author  :   GammaGL
+"""
 
-import argparse
-import importlib.util
 import os
-import random
-import sys
-
-os.environ.setdefault("TL_BACKEND", "torch")
+# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# os.environ['TL_BACKEND'] = 'torch'
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+# 0:Output all; 1:Filter out INFO; 2:Filter out INFO and WARNING; 3:Filter out INFO, WARNING, and ERROR
 
+import argparse
 import numpy as np
 import tensorlayerx as tlx
-
-from gammagl.mpops import unsorted_segment_sum
+from gammagl.datasets import WebKB, WikipediaNetwork
+from gammagl.models import CoEDModel
 from gammagl.utils import mask_to_index
-from geom_planetoid import load_planetoid_with_geom_splits
-
-
-def _load_local_coed_model():
-    file_path = os.path.join(os.path.dirname(__file__), "..", "..", "gammagl", "models", "coed.py")
-    file_path = os.path.abspath(file_path)
-    spec = importlib.util.spec_from_file_location("coed_model_local", file_path)
-    module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(module)
-    return module.CoEDModel
-
-
-CoEDModel = _load_local_coed_model()
-
-
-class AdamLike:
-    """A lightweight Adam optimizer used to avoid backend version conflicts."""
-
-    def __init__(self, lr, beta1=0.9, beta2=0.999, eps=1e-8, weight_decay=0.0):
-        self.lr = lr
-        self.beta1 = beta1
-        self.beta2 = beta2
-        self.eps = eps
-        self.weight_decay = weight_decay
-        self.step_count = 0
-        self.m = {}
-        self.v = {}
-
-    def zero_grad(self, params):
-        for param in params:
-            if getattr(param, "grad", None) is not None:
-                param.grad.zero_()
-
-    def step(self, params):
-        self.step_count += 1
-        beta1_correction = 1.0 - self.beta1 ** self.step_count
-        beta2_correction = 1.0 - self.beta2 ** self.step_count
-
-        for idx, param in enumerate(params):
-            grad = getattr(param, "grad", None)
-            if grad is None:
-                continue
-
-            if idx not in self.m:
-                self.m[idx] = tlx.zeros_like(param)
-                self.v[idx] = tlx.zeros_like(param)
-
-            grad_to_use = grad
-            if self.weight_decay != 0.0:
-                grad_to_use = grad_to_use + self.weight_decay * param
-
-            self.m[idx] = self.beta1 * self.m[idx] + (1.0 - self.beta1) * grad_to_use
-            self.v[idx] = self.beta2 * self.v[idx] + (1.0 - self.beta2) * (grad_to_use * grad_to_use)
-
-            m_hat = self.m[idx] / beta1_correction
-            v_hat = self.v[idx] / beta2_correction
-            update = self.lr * m_hat / (tlx.sqrt(v_hat) + self.eps)
-            param.data.copy_(param.data - update)
-
-
-def set_seed(seed):
-    """Set random seeds for reproducible runs."""
-    random.seed(seed)
-    np.random.seed(seed)
-    tlx.set_seed(seed)
-
-
-def collect_trainable_weights(module):
-    """Collect trainable parameters recursively from a TLX module tree."""
-    weights = []
-
-    for weight in getattr(module, "_parameters", {}).values():
-        if weight is not None and getattr(weight, "requires_grad", False):
-            weights.append(weight)
-
-    for child in getattr(module, "_modules", {}).values():
-        if child is not None:
-            weights.extend(collect_trainable_weights(child))
-
-    return weights
-
+from tensorlayerx.model import TrainOneStep, WithLoss
+import gammagl.transforms as T
 
-def clone_trainable_state(module, prefix=""):
-    """Clone the current trainable state for early stopping restoration."""
-    state = {}
-
-    for name, weight in getattr(module, "_parameters", {}).items():
-        if weight is not None and getattr(weight, "requires_grad", False):
-            state[prefix + name] = weight.detach().clone()
-
-    for child_name, child in getattr(module, "_modules", {}).items():
-        if child is not None:
-            state.update(clone_trainable_state(child, prefix=prefix + child_name + "."))
+from geom_planetoid import load_planetoid_with_geom_splits
 
-    return state
 
+class SemiSpvzLoss(WithLoss):
+    r"""Loss wrapper for semi-supervised node classification."""
 
-def restore_trainable_state(module, state, prefix=""):
-    """Restore a previously cloned trainable state."""
-    for name, weight in getattr(module, "_parameters", {}).items():
-        key = prefix + name
-        if weight is not None and key in state:
-            weight.data.copy_(state[key])
+    def __init__(self, net, loss_fn):
+        super(SemiSpvzLoss, self).__init__(backbone=net, loss_fn=loss_fn)
 
-    for child_name, child in getattr(module, "_modules", {}).items():
-        if child is not None:
-            restore_trainable_state(child, state, prefix=prefix + child_name + ".")
+    def forward(self, data, y):
+        logits = self.backbone_network(data['x'], data['edge_index'], data['edge_weight'], data['num_nodes'])
+        train_logits = tlx.gather(logits, data['train_idx'])
+        train_y = tlx.gather(data['y'], data['train_idx'])
+        loss = self._loss_fn(train_logits, train_y)
+        return loss
 
 
-def row_normalize_features(x, eps=1e-12):
-    """Apply row-wise feature normalization."""
-    row_sum = tlx.reduce_sum(x, axis=1, keepdims=True)
-    row_sum = tlx.maximum(row_sum, tlx.ones_like(row_sum) * eps)
-    return x / row_sum
+def calculate_acc(logits, y, metrics):
+    r"""Compute accuracy via the TLX metrics API."""
+    metrics.update(logits, y)
+    rst = metrics.result()
+    metrics.reset()
+    return rst
 
 
 def get_edge_index_and_theta(edge_index):
-    """Build the fuzzy edge list and its initial phase angles."""
+    r"""Build the fuzzy edge list and initial phase angles from an edge_index.
+
+    Symmetric (undirected) edges are kept only once with theta = pi/4;
+    directed edges are kept as-is with theta = 0.
+    """
     src = tlx.convert_to_numpy(edge_index[0]).tolist()
     dst = tlx.convert_to_numpy(edge_index[1]).tolist()
 
@@ -171,7 +92,16 @@ def get_edge_index_and_theta(edge_index):
 
 
 def get_fuzzy_laplacian(edge_index, theta, num_nodes, edge_weight=None, add_self_loop=False):
-    """Construct normalized directional edge weights for CoED message passing."""
+    r"""Construct normalized directional edge weights for CoED message passing.
+
+    This implements the fuzzy Laplacian normalization described in the paper.
+    For each edge (i, j) with phase angle theta_k, the directional weights are:
+      - src-to-dst: cos^2(theta_k)
+      - dst-to-src: sin^2(theta_k)
+    These are then symmetrically normalized by node degrees.
+    """
+    from gammagl.mpops import unsorted_segment_sum
+
     senders = edge_index[0]
     receivers = edge_index[1]
 
@@ -196,14 +126,18 @@ def get_fuzzy_laplacian(edge_index, theta, num_nodes, edge_weight=None, add_self
         out_weight = tlx.concat([out_weight, ones], axis=0)
         in_weight = tlx.concat([in_weight, ones], axis=0)
 
-    deg_senders = tlx.reshape(unsorted_segment_sum(out_weight, conv_senders, num_segments=num_nodes), (-1,)) + 1e-12
-    deg_receivers = tlx.reshape(unsorted_segment_sum(in_weight, conv_senders, num_segments=num_nodes), (-1,)) + 1e-12
+    deg_senders = tlx.reshape(
+        unsorted_segment_sum(out_weight, conv_senders, num_segments=num_nodes), (-1,)
+    ) + 1e-12
+    deg_receivers = tlx.reshape(
+        unsorted_segment_sum(in_weight, conv_senders, num_segments=num_nodes), (-1,)
+    ) + 1e-12
 
-    deg_inv_sqrt_senders = tlx.where(deg_senders < 1e-11, tlx.zeros_like(deg_senders), tlx.pow(deg_senders, -0.5))
+    deg_inv_sqrt_senders = tlx.where(
+        deg_senders < 1e-11, tlx.zeros_like(deg_senders), tlx.pow(deg_senders, -0.5)
+    )
     deg_inv_sqrt_receivers = tlx.where(
-        deg_receivers < 1e-11,
-        tlx.zeros_like(deg_receivers),
-        tlx.pow(deg_receivers, -0.5),
+        deg_receivers < 1e-11, tlx.zeros_like(deg_receivers), tlx.pow(deg_receivers, -0.5)
     )
 
     ew_src_to_dst = (
@@ -222,69 +156,100 @@ def get_fuzzy_laplacian(edge_index, theta, num_nodes, edge_weight=None, add_self
     return conv_edge_index, conv_edge_weight
 
 
-def calculate_acc(logits, y, idx):
-    """Calculate node classification accuracy on indexed nodes."""
-    pred = tlx.gather(tlx.argmax(logits, axis=-1), idx)
-    label = tlx.gather(y, idx)
-    return float(tlx.reduce_mean(tlx.cast(pred == label, tlx.float32)))
-
-
-def resolve_dataset_path(dataset_path):
-    """Resolve a local Planetoid cache path before attempting any download."""
-    candidates = [
-        os.path.abspath(dataset_path),
-        "/home/mr/GammaGL-fork/data/planetoid",
-        "/home/mr/GammaGL/data/planetoid",
-    ]
-    for candidate in candidates:
-        raw_dir = os.path.join(candidate, "cora", "raw")
-        if os.path.exists(raw_dir):
-            return candidate
-    return os.path.abspath(dataset_path)
+def set_seed(seed):
+    r"""Set random seeds for reproducible runs."""
+    np.random.seed(seed)
+    tlx.set_seed(seed)
 
 
 def main(args):
-    """Train and evaluate CoED-GNN on the 10 Geom-GCN splits of Cora."""
-    tlx.set_device("CPU")
-    dataset_path = resolve_dataset_path(args.dataset_path)
-
-    dataset, graph = load_planetoid_with_geom_splits(
-        root=dataset_path,
-        name=args.dataset,
-        num_splits=args.geom_splits,
-    )
-
-    if args.normalize_features:
-        graph.x = row_normalize_features(graph.x)
+    # ------------------------------------------------------------------
+    # 1. Load dataset
+    # ------------------------------------------------------------------
+    dataset_name = str.lower(args.dataset)
+
+    if dataset_name in ['cora', 'pubmed', 'citeseer']:
+        # Planetoid with Geom-GCN 10 fixed splits
+        dataset, graph = load_planetoid_with_geom_splits(
+            root=args.dataset_path, name=dataset_name,
+            num_splits=args.num_splits, transform=T.NormalizeFeatures(),
+        )
+    elif dataset_name in ['texas', 'wisconsin', 'cornell']:
+        dataset = WebKB(args.dataset_path, dataset_name, transform=T.NormalizeFeatures())
+        graph = dataset[0]
+        # WebKB masks are flat 1D: concatenation of 10 splits
+        n = graph.num_nodes
+        train_idx = mask_to_index(graph.train_mask[args.split_idx * n: (args.split_idx + 1) * n])
+        val_idx = mask_to_index(graph.val_mask[args.split_idx * n: (args.split_idx + 1) * n])
+        test_idx = mask_to_index(graph.test_mask[args.split_idx * n: (args.split_idx + 1) * n])
+    elif dataset_name in ['chameleon', 'squirrel']:
+        dataset = WikipediaNetwork(args.dataset_path, dataset_name, geom_gcn_preprocess=True)
+        graph = dataset[0]
+        # WikipediaNetwork masks are flat 1D: concatenation of 10 splits
+        n = graph.num_nodes
+        train_idx = mask_to_index(graph.train_mask[args.split_idx * n: (args.split_idx + 1) * n])
+        val_idx = mask_to_index(graph.val_mask[args.split_idx * n: (args.split_idx + 1) * n])
+        test_idx = mask_to_index(graph.test_mask[args.split_idx * n: (args.split_idx + 1) * n])
+    else:
+        raise ValueError('Unknown dataset: {}'.format(args.dataset))
+
+    # ------------------------------------------------------------------
+    # 2. Build fuzzy edge structure (dataset-level, shared across splits)
+    # ------------------------------------------------------------------
+    if args.remove_existing_self_loop:
+        # Remove self-loops from the original edge_index
+        src = tlx.convert_to_numpy(graph.edge_index[0])
+        dst = tlx.convert_to_numpy(graph.edge_index[1])
+        mask = src != dst
+        graph.edge_index = tlx.convert_to_tensor(
+            np.array([src[mask], dst[mask]], dtype=np.int64), dtype=tlx.int64
+        )
 
     edge_index, theta = get_edge_index_and_theta(graph.edge_index)
-    edge_weight = tlx.ones((tlx.get_tensor_shape(edge_index)[1],), dtype=tlx.float32)
+    num_nodes = graph.num_nodes
+
     conv_edge_index, conv_edge_weight = get_fuzzy_laplacian(
         edge_index=edge_index,
         theta=theta,
-        num_nodes=graph.num_nodes,
-        edge_weight=edge_weight,
+        num_nodes=num_nodes,
         add_self_loop=args.self_loop,
     )
 
+    # ------------------------------------------------------------------
+    # 3. Run multi-split evaluation
+    # ------------------------------------------------------------------
     split_test_accs = []
-    for split_id in range(args.geom_splits):
-        train_idx = mask_to_index(graph.train_mask[:, split_id])
-        val_idx = mask_to_index(graph.val_mask[:, split_id])
-        test_idx = mask_to_index(graph.test_mask[:, split_id])
+
+    for split_id in range(args.num_splits):
+        # Reload masks for this split
+        if dataset_name in ['cora', 'pubmed', 'citeseer']:
+            # Geom-GCN splits: 2D masks [num_nodes, num_splits]
+            train_idx = mask_to_index(graph.train_mask[:, split_id])
+            val_idx = mask_to_index(graph.val_mask[:, split_id])
+            test_idx = mask_to_index(graph.test_mask[:, split_id])
+        else:
+            n = graph.num_nodes
+            train_idx = mask_to_index(graph.train_mask[split_id * n: (split_id + 1) * n])
+            val_idx = mask_to_index(graph.val_mask[split_id * n: (split_id + 1) * n])
+            test_idx = mask_to_index(graph.test_mask[split_id * n: (split_id + 1) * n])
 
         data = {
             "x": graph.x,
+            "y": graph.y,
             "edge_index": conv_edge_index,
             "edge_weight": conv_edge_weight,
-            "num_nodes": graph.num_nodes,
             "train_idx": train_idx,
+            "test_idx": test_idx,
+            "val_idx": val_idx,
+            "num_nodes": num_nodes,
         }
 
         for run in range(args.runs):
             set_seed(args.seed + split_id * 97 + run)
 
-            model = CoEDModel(
+            # Instantiate model
+            jk = args.jumping_knowledge if args.jumping_knowledge != "None" else None
+            net = CoEDModel(
                 feature_dim=dataset.num_node_features,
                 hidden_dim=args.hidden_dim,
                 num_class=dataset.num_classes,
@@ -293,62 +258,67 @@ def main(args):
                 drop_rate=args.drop_rate,
                 normalize=args.normalize,
                 self_feature_transform=args.self_feature_transform,
+                jumping_knowledge=jk,
                 name="CoED",
             )
 
-            optimizer = AdamLike(lr=args.lr, weight_decay=args.weight_decay)
-            train_weights = collect_trainable_weights(model)
+            optimizer = tlx.optimizers.Adam(lr=args.lr, weight_decay=args.l2_coef)
+            metrics = tlx.metrics.Accuracy()
+            train_weights = net.trainable_weights
+
+            loss_func = SemiSpvzLoss(net, tlx.losses.softmax_cross_entropy_with_logits)
+            train_one_step = TrainOneStep(loss_func, optimizer, train_weights)
 
-            best_val_acc = 0.0
-            best_test_acc = 0.0
+            best_val_acc = 0
+            best_test_acc = 0
             bad_counter = 0
-            best_state = None
 
             for epoch in range(1, args.n_epoch + 1):
-                model.set_train()
-                optimizer.zero_grad(train_weights)
-                logits = model.forward(data["x"], data["edge_index"], data["edge_weight"], data["num_nodes"])
-                train_logits = tlx.gather(logits, data["train_idx"])
-                train_y = tlx.gather(graph.y, data["train_idx"])
-                loss = tlx.losses.softmax_cross_entropy_with_logits(train_logits, train_y)
-                loss.backward()
-                optimizer.step(train_weights)
-
-                model.set_eval()
-                logits = model.forward(data["x"], data["edge_index"], data["edge_weight"], data["num_nodes"])
-                val_acc = calculate_acc(logits, graph.y, val_idx)
-                test_acc = calculate_acc(logits, graph.y, test_idx)
+                net.set_train()
+                train_loss = train_one_step(data, graph.y)
+
+                net.set_eval()
+                logits = net(data['x'], data['edge_index'], data['edge_weight'], data['num_nodes'])
+
+                val_logits = tlx.gather(logits, data['val_idx'])
+                val_y = tlx.gather(data['y'], data['val_idx'])
+                val_acc = calculate_acc(val_logits, val_y, metrics)
+
+                test_logits = tlx.gather(logits, data['test_idx'])
+                test_y = tlx.gather(data['y'], data['test_idx'])
+                test_acc = calculate_acc(test_logits, test_y, metrics)
 
                 if val_acc > best_val_acc:
                     best_val_acc = val_acc
                     best_test_acc = test_acc
-                    best_state = clone_trainable_state(model)
                     bad_counter = 0
+                    net.save_weights(args.best_model_path + net.name + ".npz", format='npz_dict')
                 else:
                     bad_counter += 1
 
                 if epoch % args.print_freq == 0 or epoch == 1:
                     print(
-                        "split {:02d} run {:02d} epoch {:04d} loss {:.4f} val {:.4f} best_test {:.4f} patience {}/{}".format(
-                            split_id,
-                            run,
-                            epoch,
-                            float(loss.item() if hasattr(loss, "item") else loss),
-                            val_acc,
-                            best_test_acc,
-                            bad_counter,
-                            args.patience,
+                        "split {:02d} run {:02d} epoch {:04d} "
+                        "loss {:.4f} val {:.4f} best_test {:.4f} patience {}/{}".format(
+                            split_id, run, epoch,
+                            float(train_loss.item()),
+                            val_acc, best_test_acc,
+                            bad_counter, args.patience,
                         )
                     )
 
                 if bad_counter >= args.patience:
                     break
 
-            if best_state is not None:
-                restore_trainable_state(model, best_state)
-            model.set_eval()
-            logits = model.forward(data["x"], data["edge_index"], data["edge_weight"], data["num_nodes"])
-            best_test_acc = calculate_acc(logits, graph.y, test_idx)
+            # Restore best model for final evaluation
+            net.load_weights(args.best_model_path + net.name + ".npz", format='npz_dict')
+            if tlx.BACKEND == 'torch':
+                net.to(data['x'].device)
+            net.set_eval()
+            logits = net(data['x'], data['edge_index'], data['edge_weight'], data['num_nodes'])
+            test_logits = tlx.gather(logits, data['test_idx'])
+            test_y = tlx.gather(data['y'], data['test_idx'])
+            best_test_acc = calculate_acc(test_logits, test_y, metrics)
             split_test_accs.append(best_test_acc)
             print("split {:02d} run {:02d} best test acc: {:.5f}".format(split_id, run, best_test_acc * 100.0))
 
@@ -357,36 +327,65 @@ def main(args):
     print("test acc: {:.5f} +/- {:.5f}".format(mean_test, std_test))
 
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="CoED-GNN classification reproduction on Cora with GammaGL/TensorLayerX."
-    )
-    parser.add_argument("--dataset", type=str, default="cora")
-    parser.add_argument("--dataset_path", type=str, default="./data/planetoid")
-    parser.add_argument("--geom_splits", type=int, default=10)
-    parser.add_argument("--runs", type=int, default=1)
-    parser.add_argument("--seed", type=int, default=42)
-    parser.add_argument("--n_epoch", type=int, default=100)
-    parser.add_argument("--patience", type=int, default=30)
-    parser.add_argument("--print_freq", type=int, default=20)
-    parser.add_argument("--hidden_dim", type=int, default=128)
-    parser.add_argument("--num_layers", type=int, default=2)
-    parser.add_argument("--lr", type=float, default=5e-4)
-    parser.add_argument("--weight_decay", type=float, default=1e-4)
-    parser.add_argument("--drop_rate", type=float, default=0.5)
-    parser.add_argument("--alpha", type=float, default=0.0)
-    parser.add_argument("--self_loop", dest="self_loop", action="store_true")
-    parser.add_argument("--no_self_loop", dest="self_loop", action="store_false")
-    parser.add_argument("--normalize", dest="normalize", action="store_true")
-    parser.add_argument("--no_normalize", dest="normalize", action="store_false")
-    parser.add_argument("--normalize_features", dest="normalize_features", action="store_true")
-    parser.add_argument("--no_normalize_features", dest="normalize_features", action="store_false")
-    parser.add_argument("--self_feature_transform", dest="self_feature_transform", action="store_true")
-    parser.add_argument("--no_self_feature_transform", dest="self_feature_transform", action="store_false")
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="CoED-GNN node classification with GammaGL/TensorLayerX.")
+
+    # Dataset
+    parser.add_argument('--dataset', type=str, default='cora',
+                        choices=['cora', 'texas', 'wisconsin', 'chameleon', 'squirrel'],
+                        help='Dataset name.')
+    parser.add_argument('--dataset_path', type=str, default=r'', help='Path to save/load dataset.')
+    parser.add_argument('--num_splits', type=int, default=10, help='Number of fixed splits to evaluate.')
+    parser.add_argument('--split_idx', type=int, default=0, help='Unused when num_splits > 0.')
+    parser.add_argument('--runs', type=int, default=1, help='Runs per split.')
+    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
+
+    # Model
+    parser.add_argument('--hidden_dim', type=int, default=64, help='Hidden dimension.')
+    parser.add_argument('--num_layers', type=int, default=2, help='Number of GNN layers.')
+    parser.add_argument('--alpha', type=float, default=0.5, help='Direction convex combination parameter.')
+    parser.add_argument('--drop_rate', type=float, default=0.0, help='Feature dropout rate.')
+    parser.add_argument('--normalize', dest='normalize', action='store_true',
+                        help='L2-normalize hidden features at each layer.')
+    parser.add_argument('--no_normalize', dest='normalize', action='store_false')
+    parser.add_argument('--self_feature_transform', dest='self_feature_transform', action='store_true',
+                        help='Learn a separate self-feature transform branch.')
+    parser.add_argument('--no_self_feature_transform', dest='self_feature_transform', action='store_false')
+    parser.add_argument('--self_loop', dest='self_loop', action='store_true',
+                        help='Mix self features into directional messages.')
+    parser.add_argument('--no_self_loop', dest='self_loop', action='store_false')
+    parser.add_argument('--jumping_knowledge', type=str, default='None',
+                        choices=['None', 'cat', 'max', 'lstm'],
+                        help='Jumping-knowledge aggregation type.')
+    parser.add_argument('--remove_existing_self_loop', dest='remove_existing_self_loop',
+                        action='store_true',
+                        help='Remove existing self-loops from the graph before processing.')
+    parser.add_argument('--no_remove_existing_self_loop', dest='remove_existing_self_loop',
+                        action='store_false')
+
+    # Training
+    parser.add_argument('--lr', type=float, default=0.001, help='Learning rate.')
+    parser.add_argument('--l2_coef', type=float, default=0.0, help='Weight decay (L2 regularization).')
+    parser.add_argument('--n_epoch', type=int, default=5000, help='Max training epochs.')
+    parser.add_argument('--patience', type=int, default=100, help='Early stopping patience.')
+    parser.add_argument('--print_freq', type=int, default=50, help='Print frequency (epochs).')
+
+    # System
+    parser.add_argument('--best_model_path', type=str, default=r'./', help='Path to save best model.')
+    parser.add_argument('--gpu', type=int, default=0, help='GPU index, -1 for CPU.')
+
     parser.set_defaults(
-        self_loop=True,
         normalize=False,
-        normalize_features=False,
         self_feature_transform=False,
+        self_loop=True,
+        remove_existing_self_loop=False,
     )
-    main(parser.parse_args())
+
+    args = parser.parse_args()
+
+    if args.gpu >= 0:
+        tlx.set_device("GPU", args.gpu)
+    else:
+        tlx.set_device("CPU")
+
+    main(args)
diff --git a/examples/coed/readme.md b/examples/coed/readme.md
index fc2aaa06f..c5b6fc268 100644
--- a/examples/coed/readme.md
+++ b/examples/coed/readme.md
@@ -3,59 +3,78 @@
 - Paper link: [https://arxiv.org/abs/2410.14109](https://arxiv.org/abs/2410.14109)
 - Author's code repo: [https://github.com/hormoz-lab/coed-gnn](https://github.com/hormoz-lab/coed-gnn)
 
-# Dataset Statics
+## Dataset Statics
 
-| Dataset | # Nodes | # Edges | # Classes |
-|---------|---------|---------|-----------|
-| Cora    | 2,708   | 10,556  | 7         |
+| Dataset    | # Nodes | # Edges | # Classes |
+|------------|---------|---------|-----------|
+| Cora       | 2,708   | 10,556  | 7         |
+| Texas      | 183     | 309     | 5         |
+| Wisconsin  | 251     | 515     | 5         |
+| Chameleon  | 2,277   | 36,101  | 5         |
+| Squirrel   | 5,201   | 217,073 | 5         |
 
-This reproduction uses the `Geom-GCN` 10 fixed splits on `Cora`.
+All datasets use the `Geom-GCN` 10 fixed splits for evaluation.
 
 ## Files
 
-- `examples/coed/coed_trainer.py`: training and evaluation entry for Cora node classification
-- `examples/coed/geom_planetoid.py`: helper for loading `Geom-GCN` fixed splits
-- `examples/coed/run_coed_cora.py`: lightweight launcher for the trainer
+- `examples/coed/coed_trainer.py`: Multi-dataset training and evaluation entry
 - `gammagl/models/coed.py`: CoED-GNN backbone model
 - `gammagl/layers/conv/coed_conv.py`: CoED directional convolution layer
 
-## Environment
+## Results
+
+### Cora
 
 ```bash
-/home/mr/venv/gammagl-py311-cpu
+TL_BACKEND="torch" python examples/coed/coed_trainer.py --dataset cora
 ```
 
-## Results
+| Metric     | Paper      | Our(torch)           |
+|------------|------------|----------------------|
+| Test Acc   | 86.42      | 87.00 +/- 1.44      |
 
-Run the reproduction with:
+### Texas
 
 ```bash
-cd /home/mr/GammaGL-fork
-source /home/mr/venv/gammagl-py311-cpu/bin/activate
-python examples/coed/coed_trainer.py
+TL_BACKEND="torch" python examples/coed/coed_trainer.py --dataset texas
 ```
 
-Or:
+| Metric     | Paper      | Our(torch)           |
+|------------|------------|----------------------|
+| Test Acc   |            |                      |
+
+### Wisconsin
 
 ```bash
-cd /home/mr/GammaGL-fork
-bash examples/coed/reproduce_cora.sh
+TL_BACKEND="torch" python examples/coed/coed_trainer.py --dataset wisconsin
 ```
 
-The target reference result is:
+| Metric     | Paper      | Our(torch)           |
+|------------|------------|----------------------|
+| Test Acc   |            |                      |
+
+### Chameleon
 
-```text
-test acc: 86.41851 +/- 1.37720
+```bash
+TL_BACKEND="torch" python examples/coed/coed_trainer.py --dataset chameleon
 ```
 
-The locally verified result is:
+| Metric     | Paper      | Our(torch)           |
+|------------|------------|----------------------|
+| Test Acc   |            |                      |
+
+### Squirrel
 
-```text
-test acc: 87.00201 +/- 1.43747
+```bash
+TL_BACKEND="torch" python examples/coed/coed_trainer.py --dataset squirrel
 ```
 
+| Metric     | Paper      | Our(torch)           |
+|------------|------------|----------------------|
+| Test Acc   |            |                      |
+
 ## Notes
 
-- The implementation uses the GammaGL `Planetoid` Cora dataset and stores `Geom-GCN` split files under `data/planetoid/cora/geom-gcn/raw`.
-- The default setup uses `hidden_dim=128`, `num_layers=2`, `lr=5e-4`, `weight_decay=1e-4`, `drop_rate=0.5`, `alpha=0.0`, `self_loop=True`, `normalize=False`, and `self_feature_transform=False`.
-- A short smoke test with `--n_epoch 5` is only for pipeline verification and should not be used as the final reproduction result.
+- The default setup uses `hidden_dim=64`, `num_layers=2`, `lr=1e-3`, `l2_coef=0.0`, `alpha=0.5`, `self_loop=True`, `normalize=False`, `self_feature_transform=False`, `patience=100`, `n_epoch=5000`.
+- The implementation evaluates all 10 Geom-GCN fixed splits and reports mean +/- std test accuracy.
+- The model and convolution layers are registered in `gammagl/models/__init__.py` and `gammagl/layers/conv/__init__.py` and can be imported via standard GammaGL paths.
diff --git a/gammagl/models/coed.py b/gammagl/models/coed.py
index a650363e7..abf1b9804 100644
--- a/gammagl/models/coed.py
+++ b/gammagl/models/coed.py
@@ -5,23 +5,10 @@
 <https://arxiv.org/abs/2410.14109>`_.
 """
 
-import importlib.util
-import os
-
 import tensorlayerx as tlx
 from tensorlayerx.nn import Dropout, Linear, Module, ReLU
 
-
-def _load_coed_conv():
-    file_path = os.path.join(os.path.dirname(__file__), "..", "layers", "conv", "coed_conv.py")
-    file_path = os.path.abspath(file_path)
-    spec = importlib.util.spec_from_file_location("coed_conv_local", file_path)
-    module = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(module)
-    return module.CoEDConv
-
-
-CoEDConv = _load_coed_conv()
+from gammagl.layers.conv import CoEDConv, JumpingKnowledge
 
 
 class CoEDModel(Module):
@@ -46,6 +33,11 @@ class CoEDModel(Module):
     self_feature_transform: bool, optional
         If set to :obj:`True`, each CoED layer also learns a self-feature
         transform branch.
+    jumping_knowledge: str, optional
+        Type of jumping-knowledge aggregation (:obj:`"cat"`, :obj:`"max"`,
+        :obj:`"lstm"`, or :obj:`None`).  When set, intermediate layer
+        outputs are aggregated and projected through an additional linear
+        layer.
     name: str, optional
         Model name.
 
@@ -61,12 +53,14 @@ def __init__(
         drop_rate=0.5,
         normalize=False,
         self_feature_transform=False,
+        jumping_knowledge=None,
         name=None,
     ):
         super().__init__(name=name)
         self.alpha = alpha
         self.num_layers = num_layers
         self.normalize = normalize
+        self.jumping_knowledge = jumping_knowledge
 
         self.convs = []
         in_channels = feature_dim
@@ -80,12 +74,29 @@ def __init__(
             self.add_module("conv{}".format(layer_idx + 1), conv)
             in_channels = hidden_dim
 
-        self.readout = Linear(
-            in_features=hidden_dim,
-            out_features=num_class,
-            W_init="xavier_uniform",
-            b_init=tlx.initializers.Zeros(),
-        )
+        if jumping_knowledge is not None:
+            self.jump = JumpingKnowledge(jumping_knowledge, hidden_dim, num_layers)
+            if jumping_knowledge == "cat":
+                jk_dim = hidden_dim * num_layers
+            else:
+                jk_dim = hidden_dim
+            self.lin = Linear(
+                in_features=jk_dim,
+                out_features=num_class,
+                W_init="xavier_uniform",
+                b_init=tlx.initializers.Zeros(),
+            )
+            self.readout = None
+        else:
+            self.jump = None
+            self.lin = None
+            self.readout = Linear(
+                in_features=hidden_dim,
+                out_features=num_class,
+                W_init="xavier_uniform",
+                b_init=tlx.initializers.Zeros(),
+            )
+
         self.relu = ReLU()
         self.dropout = Dropout(p=drop_rate)
 
@@ -100,12 +111,22 @@ def combine(self, xs):
 
     def forward(self, x, edge_index, edge_weight=None, num_nodes=None):
         """Compute node logits."""
+        x_intermediate = []
+
         for layer_idx, conv in enumerate(self.convs):
             x = self.combine(conv.forward(x, edge_index, edge_weight=edge_weight, num_nodes=num_nodes))
-            if layer_idx != self.num_layers - 1:
+
+            if layer_idx != self.num_layers - 1 or self.jump is not None:
                 x = self.relu.forward(x)
                 x = self.dropout.forward(x)
                 if self.normalize:
                     x = tlx.l2_normalize(x, axis=1)
+                x_intermediate.append(x)
+
+        if self.jump is not None:
+            x = self.jump(x_intermediate)
+            x = self.lin.forward(x)
+        else:
+            x = self.readout.forward(x)
 
-        return self.readout.forward(x)
+        return x