diff --git a/openhgnn/config.ini b/openhgnn/config.ini index 8c4ec3a2..21037d2a 100644 --- a/openhgnn/config.ini +++ b/openhgnn/config.ini @@ -1559,3 +1559,24 @@ mini_batch_flag = True emb_dim=20 hid_dim=64 batch_size=128 + +[HERO] +lr = 0.0025 +patience = 60 +nb_epochs = 500 +hid_units = 256 +hid_units2 = 128 +out_ft = 256 +g_dim = 64 +g_equidim = 256 +p_equidim = 256 +edge_rate = 0.9 +alpha = 100 +beta = 500 +gamma = 10 +eta = 1 +lambbda = 1 +dataset = 'acm4GTN' +gpu = 1 +mode = 0 +save_dir = './saved_models' \ No newline at end of file diff --git a/openhgnn/layers/Fully_connect.py b/openhgnn/layers/Fully_connect.py new file mode 100644 index 00000000..45e138a2 --- /dev/null +++ b/openhgnn/layers/Fully_connect.py @@ -0,0 +1,55 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +class FullyConnect(nn.Module): + def __init__(self, in_ft, out_ft, act=nn.Identity(), drop_prob=0.0, isBias=False): + super().__init__() + self.fc = nn.Linear(in_ft, out_ft, bias=False) + if self.fc.bias is not None: + self.fc.bias.data.fill_(0.0) + + if isBias: + self.bias = nn.Parameter(torch.empty(out_ft)) + self.bias.data.fill_(0.0) + else: + self.register_parameter('bias', None) + + self.act = act + self.drop_prob = drop_prob + self.isBias = isBias + + for m in self.modules(): + self.weights_init(m) + + def weights_init(self, m): + if isinstance(m, nn.Linear): + torch.nn.init.xavier_uniform_(m.weight.data) + if m.bias is not None: + m.bias.data.fill_(0.0) + + + def forward(self, emb): + # emb (batch_size, ft) + emb = F.dropout(emb, self.drop_prob, training=self.training) + e = self.fc(emb) # (batch_size, d) + if self.isBias: + e += self.bias + return self.act(e) + + +class FullyConnect2(nn.Module): + def __init__(self, in_ft, hid_unit, out_ft, drop_prob=0.0, isBias=False): + super().__init__() + self.fc = FullyConnect(in_ft, hid_unit, act=nn.PReLU(), drop_prob=drop_prob,isBias=isBias) + self.fc2 = FullyConnect(hid_unit, out_ft, act=nn.PReLU(), drop_prob=drop_prob,isBias=isBias) + self.dense = FullyConnect(out_ft, 1, act=nn.Identity(), drop_prob=drop_prob,isBias=isBias) + + + def forward(self, emb): + # emb (batch_size, ft) + e = self.fc(emb) # (batch_size, d) + e2 = self.fc2(e) + out = self.dense(e2) + return out \ No newline at end of file diff --git a/openhgnn/layers/Linear_layer.py b/openhgnn/layers/Linear_layer.py new file mode 100644 index 00000000..c1b1aa37 --- /dev/null +++ b/openhgnn/layers/Linear_layer.py @@ -0,0 +1,52 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +class Linear_layer(nn.Module): + def __init__(self, in_ft, out_ft, act=nn.PReLU(), drop_prob=0.0, isBias=False): + super().__init__() + self.linear = nn.Linear(in_ft, out_ft, bias=False) + +# if isBias: +# self.bias = nn.Parameter(torch.empty(out_ft)) +# self.bias.data.fill_(0.0) +# else: +# self.register_parameter('bias', None) + + self.act = act + self.isBias = isBias + self.drop_prob = drop_prob + + for m in self.modules(): + self.weights_init(m) + + def weights_init(self, m): + if isinstance(m, nn.Linear): + torch.nn.init.xavier_uniform_(m.weight.data) + if m.bias is not None: + m.bias.data.fill_(0.0) + + def forward(self, emb): + # emb (batch_size, ft) +# emb = F.dropout(emb, self.drop_prob, training=self.training) + e = self.linear(emb) # (batch_size, d) +# if self.isBias: +# e += self.bias + e_out = self.act(e) + return e_out + + +class GNN(nn.Module): + def __init__(self, nb_rel, in_ft, out_ft, act=nn.PReLU(), drop_prob=0.5, isBias=False): + super().__init__() + self.encoder = nn.ModuleList() + for i in range(nb_rel): + self.encoder.append(GCN(in_ft, out_ft, act=act, isBias=isBias)) + + def forward(self, embs): + outs = [] + for emb in embs: # emb (batch_size, ft) + outs.append(self.encoder(emb)) + outs = torch.stack(outs, 0) # outs (nb_rel, batch_size, ft) + return outs diff --git a/openhgnn/layers/__init__.py b/openhgnn/layers/__init__.py index 60cdb110..4cc683a4 100644 --- a/openhgnn/layers/__init__.py +++ b/openhgnn/layers/__init__.py @@ -8,7 +8,8 @@ from .AdapropT import * from .AdapropI import * from .rgcn_layer import * - +from .Fully_connect import * +from .Linear_layer import * __all__ = [ 'HeteroEmbedLayer', 'GeneralLinear', @@ -24,7 +25,9 @@ 'AttConv', 'LSTMConv', 'AdapropT', - 'AdapropI' + 'AdapropI', + 'Fully_connect', + 'Linear_layer', ] classes = __all__ \ No newline at end of file diff --git a/openhgnn/models/HERO.py b/openhgnn/models/HERO.py new file mode 100644 index 00000000..be7a87d8 --- /dev/null +++ b/openhgnn/models/HERO.py @@ -0,0 +1,324 @@ +import torch +import torch.nn as nn +import math +import dgl +from collections import defaultdict +import numpy as np +import torch.nn.functional as F +from tqdm import tqdm +# 确保从 OpenHGNN 正确导入所需的层 +# 可能需要根据你的 OpenHGNN 版本和目录结构调整路径 +try: + # 尝试标准路径 + from openhgnn.layers.Fully_connect import FullyConnect + from openhgnn.layers.Linear_layer import Linear_layer +except ImportError: + print("警告:无法从标准 openhgnn.layers 导入层。请检查 OpenHGNN 安装或路径。") + + +from . import BaseModel, register_model # OpenHGNN 模型注册机制 +import dgl.function as fn + +VERY_SMALL_NUMBER = 1e-12 +INF = 1e20 + +@register_model('HERO') +class HERO(BaseModel): + @classmethod + def build_model_from_args(cls, args, hg): + return cls(args, hg) # 将 hg 传递给 __init__ 以便访问 ntypes/etypes + + def __init__(self, args, hg): # 接收 hg 以便获取类型信息 + super(HERO, self).__init__() + self.args = args + self.hg = hg # 保存图对象引用,主要用于获取 ntypes 和 etypes + + # --- 重要修改:不再自行计算 node_cnt 和 node_size --- + # 这些值现在应该由 Herotrainer 通过 args 传入 + if not hasattr(args, 'node_cnt') or not hasattr(args, 'node_size') or not hasattr(args, 'ft_size'): + raise ValueError("必要的参数 'node_cnt', 'node_size', 'ft_size' 未在 args 中提供!") + + self.node_cnt = args.node_cnt # 使用来自 trainer 的映射 + self.node_size = args.node_size # 使用来自 trainer 的全局节点数 + self.ft_size = args.ft_size # 使用来自 trainer 的特征维度 + + # --- 网络层初始化 --- + self.bnn = nn.ModuleDict() + self.fc = nn.ModuleDict() + # MLP 输入维度应为全局特征维度 self.ft_size + self.mlp = MLP([self.ft_size, self.args.out_ft]) # out_ft 是同构嵌入的目标维度 + + # 初始化 FC 层 (用于第二层聚合后的拼接) + # 输入维度是 hid_units2 (第二层聚合输出) + ft_size (原始特征) + fc_input_dim = args.hid_units2 + self.ft_size + for ntype in hg.ntypes: # 遍历 DGL 图中的节点类型 + # 如果该类型在 node_cnt 中定义了 (即参与了全局排序) + if ntype in self.node_cnt: + self.fc[ntype] = FullyConnect(fc_input_dim, args.out_ft) # 输出维度是异构嵌入的目标维度 + else: + print(f"警告: DGL 图中的节点类型 '{ntype}' 未在 node_cnt 中定义,未初始化 FC 层。") + + + # 初始化 BNN 层 (用于 GNN 聚合) + # 键名使用 DGL 的规范边类型字符串 (src_type, edge_name, dst_type) + for canonical_etype in hg.canonical_etypes: + etype_str = canonical_etype[1] # 获取中间的边类型名称字符串,例如 'pa' 或 'ap' + # key = f"{src_type}-{dst_type}" # 或者使用原始的 p-a 格式,如果 Herotrainer 中处理了 + # 为了安全,我们使用 DGL 提供的 etype 名称字符串 + # BNN 层 0 (第一层聚合),输入是原始特征维度 self.ft_size + self.bnn['0' + etype_str] = Linear_layer(self.ft_size, args.hid_units, act=nn.ReLU(), isBias=False) + # BNN 层 1 (第二层聚合),输入是第一层聚合的隐藏维度 args.hid_units + self.bnn['1' + etype_str] = Linear_layer(args.hid_units, args.hid_units2, act=nn.ReLU(), isBias=False) + print("HERO 模型层初始化完成.") + + + def forward(self, hg, features, distance): + """ + 模型的前向传播。 + + Args: + hg (dgl.DGLHeteroGraph): 输入的 DGL 图。 + features (torch.Tensor): 全局排序的节点特征张量。 + distance (torch.Tensor): 目标节点的特征距离矩阵。 + + Returns: + tuple: (emb_het, emb_hom) 异构和同构节点嵌入。 + """ + # 使用局部作用域,避免修改传入的原始 hg 图对象 + with hg.local_scope(): + # --- 准备工作 --- + # 1. 初始化用于存储中间结果的全局张量 (使用正确的 node_size) + # embs1 存储第一层聚合结果 (hid_units 维度) + embs1 = torch.zeros((self.node_size, self.args.hid_units), device=features.device) + # embs2 存储第二层聚合+FC后的结果 (out_ft 维度) + embs2 = torch.zeros((self.node_size, self.args.out_ft), device=features.device) + + # 2. 将全局有序特征写入 DGL 图的节点数据中,以便消息传递使用 + # 使用 self.node_cnt 进行映射 + # print("将全局特征写入 DGL 节点数据...") + for ntype in hg.ntypes: + if ntype in self.node_cnt: # 只处理在全局映射中存在的类型 + # 获取该类型节点的全局索引范围 + idx = self.node_cnt[ntype] + # 从全局 features 张量中提取对应特征 + features_slice = features[idx] + # 将特征赋给 DGL 图中该类型的节点 + hg.nodes[ntype].data['h'] = features_slice + # print(f" - 类型 '{ntype}': 分配特征,形状 {features_slice.shape}") + else: + print(f"警告: 类型 '{ntype}' 未在 node_cnt 中,无法分配特征给 DGL 节点。") + + + # 3. 构造反向图,用于从邻居向中心节点聚合信息 + # 共享节点和边数据以提高效率 + rev_hg = dgl.reverse(hg, share_ndata=True, share_edata=True) + + # --- HERO 异构嵌入计算 --- + # print("开始第一层聚合...") + # === 第一层聚合 === + # 按目标节点类型进行迭代聚合(DGL 的标准方式) + for ntype in hg.ntypes: + if ntype not in self.node_cnt: continue # 跳过未参与全局映射的类型 + # print(f" 处理目标类型: '{ntype}'") + # 用于收集来自不同关系类型的聚合结果 + aggregated_results_l1 = [] + # 遍历指向当前节点类型 ntype 的所有边类型 (在反向图中是发出边) + # etype 是 (src, name, dst),这里 ntype 是 dst + for etype in rev_hg.canonical_etypes: + src_type, etype_name, dst_type = etype + if dst_type == ntype: # 如果当前类型是这条反向边的目标类型 + # print(f" - 通过关系 '{src_type}' --'{etype_name}'--> '{dst_type}' 进行聚合") + # 获取对应的 BNN 层 (使用边类型名称) + bnn_key = '0' + etype_name + if bnn_key not in self.bnn: + print(f"警告: 未找到 BNN 层 '{bnn_key}',跳过此边类型。") + continue + + # 在反向图上执行消息传递和聚合 + # 从源节点 (src_type) 复制特征 'h' 到消息 'm' + # 对收到的消息 'm' 按目标节点 (dst_type) 求平均值 'agg_temp' + msg_func = fn.copy_u('h', 'm') + reduce_func = fn.mean('m', 'agg_temp') + rev_hg.update_all(msg_func, reduce_func, etype=etype) + + # 检查聚合结果是否存在 + if 'agg_temp' in rev_hg.nodes[dst_type].data: + # 获取聚合后的邻居特征 (形状: [num_dst_type, ft_size]) + neigh_features = rev_hg.nodes[dst_type].data.pop('agg_temp') + # 通过 BNN 层进行变换 (形状: [num_dst_type, hid_units]) + mapped_features = self.bnn[bnn_key](neigh_features) + aggregated_results_l1.append(mapped_features) + # else: + # print(f" - 注意: 类型 '{dst_type}' 没有收到来自 '{src_type}' 通过 '{etype_name}' 的消息。") + + # 如果收到了来自至少一种关系类型的聚合结果 + if aggregated_results_l1: + # 将来自不同关系类型的结果堆叠起来 (形状: [num_relations, num_dst_type, hid_units]) + stacked_results_l1 = torch.stack(aggregated_results_l1, dim=0) + # 对不同关系类型的结果求平均 (形状: [num_dst_type, hid_units]) + v_summary_l1 = torch.mean(stacked_results_l1, dim=0) + # --- 将第一层结果写入全局 embs1 和 DGL 节点数据 --- + # 获取当前类型 ntype 的全局索引 + global_idx_l1 = self.node_cnt[ntype] + # 写入全局张量 + embs1[global_idx_l1] = v_summary_l1 + # 同时写入 DGL 节点数据,供下一层聚合使用 + # (注意:需要写回原始图 hg 和反向图 rev_hg,因为下一层聚合还需要在 rev_hg 上进行) + hg.nodes[ntype].data['em1'] = v_summary_l1 + rev_hg.nodes[ntype].data['em1'] = v_summary_l1 + # print(f" 完成类型 '{ntype}' 的第一层聚合,结果形状: {v_summary_l1.shape}") + # else: + # print(f" 类型 '{ntype}' 未收到任何第一层聚合结果。") + + + # print("开始第二层聚合...") + # === 第二层聚合 === + # 与第一层类似,但使用 'em1' 作为输入特征,并使用 '1'+etype_name 的 BNN 层 + for ntype in hg.ntypes: + if ntype not in self.node_cnt: continue + # print(f" 处理目标类型: '{ntype}'") + aggregated_results_l2 = [] + for etype in rev_hg.canonical_etypes: + src_type, etype_name, dst_type = etype + if dst_type == ntype: + # print(f" - 通过关系 '{src_type}' --'{etype_name}'--> '{dst_type}' 进行聚合 (L2)") + bnn_key = '1' + etype_name + if bnn_key not in self.bnn: + print(f"警告: 未找到 BNN 层 '{bnn_key}',跳过此边类型。") + continue + + # 检查源节点是否有 'em1' 特征 + if 'em1' not in rev_hg.nodes[src_type].data: + # print(f" - 警告: 源类型 '{src_type}' 缺少 'em1' 特征,无法进行第二层聚合。") + continue # 跳过这条边 + + # 执行聚合 (使用 em1 作为消息源) + msg_func = fn.copy_u('em1', 'm') + reduce_func = fn.mean('m', 'agg_temp_l2') + rev_hg.update_all(msg_func, reduce_func, etype=etype) + + if 'agg_temp_l2' in rev_hg.nodes[dst_type].data: + neigh_features_l1 = rev_hg.nodes[dst_type].data.pop('agg_temp_l2') + mapped_features_l2 = self.bnn[bnn_key](neigh_features_l1) + aggregated_results_l2.append(mapped_features_l2) + # else: + # print(f" - 注意: 类型 '{dst_type}' 没有收到来自 '{src_type}' 通过 '{etype_name}' 的第二层消息。") + + # 如果收到了第二层聚合结果 + if aggregated_results_l2: + # 对不同关系类型结果求平均 (形状: [num_dst_type, hid_units2]) + stacked_results_l2 = torch.stack(aggregated_results_l2, dim=0) + v_summary_l2 = torch.mean(stacked_results_l2, dim=0) + + # --- 拼接原始特征并通过 FC 层 --- + # 获取当前类型 ntype 的全局索引和原始特征 + global_idx_l2 = self.node_cnt[ntype] + original_features = features[global_idx_l2] # 从全局有序特征中提取 + + # 拼接第二层聚合结果和原始特征 (形状: [num_dst_type, hid_units2 + ft_size]) + cat_features = torch.cat((v_summary_l2, original_features), dim=1) + + # 通过特定于类型的 FC 层 (形状: [num_dst_type, out_ft]) + if ntype in self.fc: + final_emb = self.fc[ntype](cat_features) + # 写入全局 embs2 张量 + embs2[global_idx_l2] = final_emb + # print(f" 完成类型 '{ntype}' 的第二层聚合与FC,结果形状: {final_emb.shape}") + else: + print(f"警告: 未找到类型 '{ntype}' 的 FC 层,无法计算最终嵌入。") + + # else: + # print(f" 类型 '{ntype}' 未收到任何第二层聚合结果。") + + # --- 选择最终的异构嵌入 --- + # 根据数据集选择使用第一层还是第二层的聚合结果 + # 注意:这里的 'acm4GTN' 是示例,需要根据实际数据集名称调整 + # 如果你的数据集名称是 'ACM' (来自 Herotrainer 的 args.dataset),确保这里也用 'ACM' + if self.args.dataset in ['acm4GTN', 'ACM']: # 如果是 ACM 数据集变体 + embs_het_full = embs1 + else: + embs_het_full = embs2 + + # --- HERO 同构嵌入计算 --- + # print("计算同构嵌入...") + # 使用 MLP 处理全局特征 + # 注意:只处理目标节点(前 args.node_num 个) + emb_f_full = self.mlp(features) # 先对所有节点计算,后面再切片 + # emb_f = emb_f_full[:self.args.node_num] # 取出目标节点的 MLP 输出 + # !!! 修正:应该对距离矩阵对应的节点(前 args.node_num 个)的 MLP 输出进行计算 + emb_f_target = emb_f_full[:self.args.node_num] + + + # 同构计算公式 (与原始代码一致) + coe2 = 1.0 / (self.args.beta + 1e-9) # 防止 beta 为 0 + # H_target^T * H_target + res = torch.mm(emb_f_target.T, emb_f_target) + # 矩阵求逆部分 (I + c * H^T * H)^(-1) + identity_matrix = torch.eye(emb_f_target.shape[1], device=features.device) + inv = torch.inverse(identity_matrix + coe2 * res) + # (I + c * H^T * H)^(-1) * (H^T * H) + res = torch.mm(inv, res) + # B = c*H - c^2*H*(I + c*H^T*H)^(-1)*(H^T*H) + res = coe2 * emb_f_target - coe2**2 * torch.mm(emb_f_target, res) + # H^T * B + tmp = torch.mm(emb_f_target.T, res) + # Part1 = H * (H^T * B) + part1 = torch.mm(emb_f_target, tmp) + + # Part2 = (-alpha / 2) * D * B + # distance 矩阵应该只包含目标节点之间的距离 + if distance.shape[0] != self.args.node_num or distance.shape[1] != self.args.node_num: + print(f"警告: 传入的 distance 矩阵形状 ({distance.shape}) 与目标节点数 ({self.args.node_num}) 不符!") + # 可能需要报错或采取默认行为 + part2 = torch.zeros_like(part1) # 例如,将 part2 设为零 + else: + part2 = (-self.args.alpha / 2.0) * torch.mm(distance, res) + + # 同构嵌入 = Part1 + Part2 + embs_hom_target = part1 + part2 + + # --- 返回目标节点的嵌入 --- + # 从完整的异构嵌入中切片出目标节点的部分 + embs_het_target = embs_het_full[:self.args.node_num] + + return embs_het_target, embs_hom_target + + def embed(self, hg, features, distance): + """ + 生成节点嵌入(通常在评估时调用,不计算梯度)。 + 逻辑与 forward 基本相同,但最后返回 .detach() 的结果。 + """ + # print("调用 embed 方法生成节点嵌入...") + # 设置模型为评估模式 (虽然此方法本身不训练,但依赖的层如 Dropout 应关闭) + self.eval() + with torch.no_grad(): # 确保不计算梯度 + # 完全复用 forward 的逻辑来计算嵌入 + embs_het_target, embs_hom_target = self.forward(hg, features, distance) + + # 返回分离后的张量 (不带梯度信息) + return embs_het_target.detach(), embs_hom_target.detach() + + +class MLP(nn.Module): + """ 简单的多层感知机 """ + def __init__(self, dim, dropprob=0.0): + super(MLP, self).__init__() + self.net = nn.ModuleList() + # 如果 args 中定义了 dropout,则使用它,否则用默认值 + dropout_rate = getattr(self.args, 'dropout', dropprob) if hasattr(self, 'args') else dropprob + self.dropout = torch.nn.Dropout(dropout_rate) + for i in range(len(dim) - 1): + self.net.append(nn.Linear(dim[i], dim[i+1])) + print(f"MLP 初始化: 维度={dim}, Dropout={dropout_rate}") + + + def forward(self, x): + # 遍历除最后一层外的所有线性层 + for i in range(len(self.net) - 1): + x = self.net[i](x) + x = F.relu(x) # ReLU 激活 + x = self.dropout(x) # Dropout + + # 最后一层线性变换 (通常不加激活和 Dropout) + y = self.net[-1](x) + return y \ No newline at end of file diff --git a/openhgnn/models/__init__.py b/openhgnn/models/__init__.py index 99578e65..76f5d529 100644 --- a/openhgnn/models/__init__.py +++ b/openhgnn/models/__init__.py @@ -137,6 +137,7 @@ def build_model_from_args(args, hg): 'Ingram': 'openhgnn.models.Ingram', 'RedGNN': 'openhgnn.models.RedGNN', 'RedGNNT': 'openhgnn.models.RedGNNT', + 'HERO': 'openhgnn.models.HERO', } @@ -199,6 +200,7 @@ def build_model_from_args(args, hg): from .Ingram import Ingram from .RedGNN import RedGNN from .RedGNNT import RedGNNT +from .HERO import HERO diff --git a/openhgnn/output/HERO/README.md b/openhgnn/output/HERO/README.md new file mode 100644 index 00000000..caf84b93 --- /dev/null +++ b/openhgnn/output/HERO/README.md @@ -0,0 +1,78 @@ +# HERO + +Paper:[Self-Supervised Heterogeneous Graph Learning](https://openreview.net/forum?id=3FJOKjooIj) +## How to run + +Clone the Openhgnn-DGL + +```python +python main.py -m HERO -t node_classification -d acm4GTN -g 0 --use_best_config +``` + +Candidate dataset: acm4GTN + +If you do not have gpu, set -gpu -1. + +## candidate dataset + +acm4GTN + +NOTE: HERO can handle mag oag dataset, we will add these two datasets in our further work. + +### description + +- Number of nodes + + | paper | 3025 | + | -------- | ---- | + | author | 5912 | + | subject | 57 | + +- Subsets: paper-author, paper-subject + + +## performance + +Node classification + +| |f1_marco|f1_micro| +|----|----|----| +|acm4GTN|0.917|0.917| + + +## TrainerFlow: herotrainer + +## Hyper-parameter specific to the model + +```python +lr = 0.0025 +patience = 60 +nb_epochs = 500 +hid_units = 256 +hid_units2 = 128 +out_ft = 256 +g_dim = 64 +g_equidim = 256 +p_equidim = 256 +edge_rate = 0.9 +alpha = 100 +beta = 500 +gamma = 10 +eta = 1 +lambbda = 1 +gpu = 1 +mode = 0 +save_dir = './saved_models' +``` + + +## More + +#### Contirbutor + +Tianyi Wang[Gamma LAB] +![My Page](www.bimu.site) + +#### If you have any questions, + +Submit an issue or email to [bimu@bupt.edu.cn](mailto:bimu@bupt.edu.cn). \ No newline at end of file diff --git a/openhgnn/output/NARS/README.md b/openhgnn/output/NARS/README.md index 7886bc99..c158b29f 100644 --- a/openhgnn/output/NARS/README.md +++ b/openhgnn/output/NARS/README.md @@ -26,7 +26,7 @@ NOTE: NARS can handle mag oag dataset, we will add these two datasets in our fur - Number of nodes - | paper | 4025 | + | paper | 4z025 | | -------- | ---- | | author | 17431 | | field | 73 | diff --git a/openhgnn/trainerflow/__init__.py b/openhgnn/trainerflow/__init__.py index 3dd96083..318b0c33 100644 --- a/openhgnn/trainerflow/__init__.py +++ b/openhgnn/trainerflow/__init__.py @@ -107,7 +107,7 @@ def build_flow(args, flow_name): 'RedGNN_trainer': 'openhgnn.trainerflow.RedGNN_trainer', 'RedGNNT_trainer': 'openhgnn.trainerflow.RedGNNT_trainer', 'HGPrompt':'openhgnn.trainerflow.HGPrompt_trainer', - + 'herotrainer':'openhgnn.trainerflow.herotrainer', } @@ -160,7 +160,7 @@ def build_flow(args, flow_name): from .DisenKGAT_trainer import * from .RedGNNT_trainer import RedGNNTTrainer from .HGPrompt import HGPrompt_trainer - +from .herotrainer import herotrainer # don't add here diff --git a/openhgnn/trainerflow/herotrainer.py b/openhgnn/trainerflow/herotrainer.py new file mode 100644 index 00000000..8a8592e2 --- /dev/null +++ b/openhgnn/trainerflow/herotrainer.py @@ -0,0 +1,420 @@ +import torch.nn as nn +import torch as th +import torch +import copy as copy +import scipy.sparse as sp +import os +import torch.nn.functional as F +from sklearn.linear_model import LogisticRegression +from sklearn import metrics +import numpy as np +import warnings +from tqdm import tqdm +from openhgnn.trainerflow import BaseFlow, register_flow +from openhgnn.models import build_model +import dgl # 确保导入 dgl + +warnings.filterwarnings("ignore") + + +@register_flow("herotrainer") +class Herotrainer(BaseFlow): + def __init__(self, args): + super(Herotrainer, self).__init__(args) + self.args = args + + prepared_data = self._prepare_hero_data_and_distance(self.hg) + self.features = prepared_data['features'].to(self.device) + self.feature_distance = prepared_data['feature_distance'].to(self.device) + self.node_cnt = prepared_data['node_cnt'] + + self.args.node_cnt = self.node_cnt + self.args.node_size = prepared_data['num_nodes_global'] + if self.features is not None and self.features.ndim == 2: + self.args.ft_size = self.features.shape[1] + else: + print( + f"警告: 处理后的 self.features 形状不正确或为None,无法设置 args.ft_size。将尝试使用配置中的 hidden_dim。") + if hasattr(args, 'hidden_dim'): # 假设配置中有 hidden_dim 作为特征原始维度 + self.args.ft_size = args.hidden_dim + else: + raise ValueError("无法确定特征维度 ft_size。") + + self.args.node_num = 3025 # HERO 目标节点数 + + # 检查 p_equidim vs g_equidim (用于 loss_spe_nontrival_1 和 loss_spe_inv) + if hasattr(self.args, 'p_equidim') and hasattr(self.args, 'g_equidim'): + if self.args.p_equidim != self.args.g_equidim: + print( + f"重要警告: args.p_equidim ({self.args.p_equidim}) 与 args.g_equidim ({self.args.g_equidim}) 不相等。") + print(" 这将导致 loss_spe_nontrival_1 和 loss_spe_inv 的对角线求和在非方阵上进行。") + print(" 请检查 HERO 论文/原始代码,确认这些维度是否需要相等,并在配置文件中修改。") + else: + print("警告: 缺少 p_equidim 或 g_equidim 参数,无法检查维度匹配性 (loss_spe_nontrival_1, loss_spe_inv)。") + + self.model = build_model(self.model).build_model_from_args(self.args, self.hg).to(self.device) + self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.args.lr) + + self.g = nn.Sequential(nn.Linear(self.args.out_ft, self.args.g_dim, bias=False), + nn.ReLU(inplace=True)).to(self.device) + self.g_1 = nn.Sequential(nn.Linear(self.args.out_ft, self.args.g_equidim, bias=False), + nn.ReLU(inplace=True)).to(self.device) + self.p_1 = nn.Sequential(nn.Linear(self.args.g_equidim, self.args.p_equidim, bias=False), + nn.ReLU(inplace=True)).to(self.device) + + self.args.batch_size = 1 + print("Herotrainer 初始化完成.") + if self.features is not None: print(f" 全局特征形状: {self.features.shape}") + if self.feature_distance is not None: print(f" 特征距离形状: {self.feature_distance.shape}") + print(f" 全局节点总数 (args.node_size): {self.args.node_size}") + print(f" 目标节点数量 (args.node_num): {self.args.node_num}") + + def _prepare_hero_data_and_distance(self, hg): + print(f"为 HERO 模型准备数据 (数据集: {self.args.dataset})...") + if self.args.dataset in ['acm4GTN', 'acm4NSHE', 'acm4NARS', 'acm4HeCo', 'ACM']: + node_order = ['paper', 'author', 'subject'] + expected_counts = {'paper': 3025, 'author': 5912, 'subject': 57} + num_target_nodes = 3025 + print(f"使用 ACM 数据集的节点顺序: {node_order}") + else: + print( + f"警告: 未为数据集 {self.args.dataset} 定义显式的全局节点顺序。将使用 DGL 图的默认 ntypes 顺序: {hg.ntypes}。") + node_order = hg.ntypes + expected_counts = {} + if not node_order: raise ValueError("DGL 图中没有任何节点类型 (hg.ntypes is empty)。") + num_target_nodes = hg.num_nodes(node_order[0]) if node_order else 0 + print( + f"警告: 自动假设目标节点类型为 '{node_order[0] if node_order else 'None'}',数量为 {num_target_nodes}。") + + actual_counts = {} + valid_node_order = [] + for ntype in node_order: + if ntype not in hg.ntypes: + print(f"警告: 期望的节点类型 '{ntype}' 在图中未找到,将跳过。") + continue + valid_node_order.append(ntype) + actual_counts[ntype] = hg.num_nodes(ntype) + if ntype in expected_counts and actual_counts[ntype] != expected_counts[ntype]: + print( + f"警告: 节点类型 '{ntype}' 的数量不匹配! 期望 {expected_counts[ntype]}, 实际 {actual_counts[ntype]}。") + node_order = valid_node_order + + node_cnt = {} + current_idx = 0 + print("创建 node_cnt 全局索引映射:") + for ntype in node_order: + if ntype not in actual_counts: continue + num = actual_counts[ntype] + node_cnt[ntype] = th.arange(current_idx, current_idx + num) + print(f" - 类型 '{ntype}': 全局索引 {current_idx} - {current_idx + num - 1}") + current_idx += num + num_nodes_global = current_idx + print(f"计算出的全局节点总数: {num_nodes_global}") + + feature_list = [] + feature_dim = -1 + print("按全局顺序提取、拼接并二值化特征:") + for ntype in node_order: + if ntype not in actual_counts: continue + feature_key = None + if 'h' in hg.nodes[ntype].data: + feature_key = 'h' + elif 'feat' in hg.nodes[ntype].data: ## 鲁棒一点 + feature_key = 'feat' + + if feature_key: + features_ntype_raw = hg.nodes[ntype].data[feature_key].clone() + # print(f" - 提取类型 '{ntype}' 原始特征,形状: {features_ntype_raw.shape}, 样本均值: {features_ntype_raw.float().mean().item():.4f}") + features_ntype_binarized = features_ntype_raw + features_ntype_binarized[features_ntype_binarized > 0] = 1.0 + # print(f" 类型 '{ntype}' 二值化后特征,形状: {features_ntype_binarized.shape}, 样本均值: {features_ntype_binarized.float().mean().item():.4f}") + feature_list.append(features_ntype_binarized) + if feature_dim == -1: + feature_dim = features_ntype_binarized.shape[1] + elif feature_dim != features_ntype_binarized.shape[1]: + print( + f"警告: 类型 '{ntype}' 特征维度 ({features_ntype_binarized.shape[1]}) 与之前 ({feature_dim}) 不匹配。") + else: + num_nodes_ntype = actual_counts[ntype] + if feature_dim != -1: + print( + f"警告: 未找到类型 '{ntype}' 的特征 ('h' 或 'feat'),将填充零特征,形状: ({num_nodes_ntype}, {feature_dim})。") + feature_list.append(th.zeros((num_nodes_ntype, feature_dim))) + else: + raise ValueError(f"无法处理类型 '{ntype}' 的缺失特征,因为维度未知。") + + features_globally_ordered_binarized = th.cat(feature_list, dim=0) + print(f"拼接后的全局【二值化】特征张量形状: {features_globally_ordered_binarized.shape}") + if features_globally_ordered_binarized.shape[0] != num_nodes_global: + print(f"警告: 最终特征行数 ({features_globally_ordered_binarized.shape[0]}) 与计算的全局节点总数 ({num_nodes_global}) 不匹配!") + + feature_distance = th.empty((0, 0), device=features_globally_ordered_binarized.device) # 默认空 + if features_globally_ordered_binarized.shape[0] >= num_target_nodes: + target_node_features_binarized = features_globally_ordered_binarized[0:num_target_nodes] + print( + f"为前 {num_target_nodes} 个目标节点计算特征距离 (使用二值化特征),特征形状: {target_node_features_binarized.shape}") + feature_distance = self.pairwise_distance(target_node_features_binarized) + feature_distance = F.normalize(feature_distance, p=2, dim=1) + num_elements = feature_distance.numel() + if num_elements > 0 and hasattr(self.args, + 'edge_rate') and self.args.edge_rate > 0 and self.args.edge_rate < 1: + k = int(num_elements * self.args.edge_rate) + if k >= 1: + k = min(k, num_elements - 1) + if k == 0 and num_elements > 0: k = 1 # 确保 k 至少为1如果矩阵非空 + + if feature_distance.view(-1).shape[0] > 0 and k > 0 and k < feature_distance.view(-1).shape[ + 0]: # k 必须小于元素总数 + kth_val = th.kthvalue(feature_distance.view(-1), k)[0] + mask = (feature_distance > kth_val).float() + feature_distance = feature_distance * mask + print("特征距离已进行归一化和基于 edge_rate 的稀疏化处理。") + elif k >= feature_distance.view(-1).shape[0]: + print( + f"警告: k值({k}) 大于或等于元素数量({feature_distance.view(-1).shape[0]}),不进行kthvalue稀疏化。") + else: + print("警告: 无法有效进行kthvalue稀疏化 (k=0或矩阵为空)。") + else: + print("警告: edge_rate 过低 (k<1),无法稀疏化。") + elif not hasattr(self.args, 'edge_rate'): + print("警告: args 中未定义 edge_rate,跳过距离矩阵稀疏化。") + else: + print("警告: 距离矩阵为空或 edge_rate 无效,跳过稀疏化。") + else: + print( + f"错误: 全局特征行数 ({features_globally_ordered_binarized.shape[0]}) 小于目标节点数 ({num_target_nodes}),无法计算距离。") + + print(f"最终特征距离矩阵形状: {feature_distance.shape}") + return {'features': features_globally_ordered_binarized, 'feature_distance': feature_distance, + 'node_cnt': node_cnt, 'num_nodes_global': num_nodes_global} + + def pairwise_distance(self, x, y=None): + x_norm = (x ** 2).sum(1).view(-1, 1) + if y is not None: + y_t = torch.transpose(y, 0, 1) + y_norm = (y ** 2).sum(1).view(1, -1) + else: + y_t = torch.transpose(x, 0, 1) + y_norm = x_norm.view(1, -1) + dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t) + return torch.clamp(dist, min=0.0) + + def train(self): + cnt_wait = 0; + best = 1e9 + if not os.path.exists(self.args.save_dir): + os.makedirs(self.args.save_dir) + + print("开始训练...") + for epoch in tqdm(range(self.args.nb_epochs), desc="训练进度"): + self.model.train() + self.optimizer.zero_grad() + emb_het, emb_hom = self.model(self.hg, self.features, self.feature_distance) + ## 得到的同质信息和异构信息 + embs_P1 = self.g(emb_het) + embs_P2 = self.g(emb_hom) + ####################################################################### + # The second term in Eq. (10): uniformity loss + intra_c = (embs_P1).T @ (embs_P1).contiguous() + intra_c = torch.exp(F.normalize(intra_c, p=2, dim=1)).sum() + loss_uni = torch.log(intra_c).mean() + + intra_c_2 = (embs_P2).T @ (embs_P2).contiguous() + intra_c_2 = torch.exp(F.normalize(intra_c_2, p=2, dim=1)).sum() + loss_uni += torch.log(intra_c_2).mean() + # C_P = embs_P2.T @ embs_P2 + # C_tilde_P = embs_P1.T @ embs_P1 + # loss_uni = torch.logsumexp(sum_of_cov_matrices.view(-1), dim=0) + ####################################################################### + # The first term in Eq. (10): invariance loss + # 这里不是MSE,但是也能起到类似的效果,而且效果好得多,实际使用行归一化后的整体嵌入集合的互协方差矩阵的对角线元素之和的负数 + inter_c = embs_P1.T @ embs_P2 + inter_c = F.normalize(inter_c, p=2, dim=1) + loss_inv = -torch.diagonal(inter_c).sum() + # loss_inv = torch.nn.functional.mse_loss(embs_P2, embs_P1, reduction='sum') + ####################################################################### + # Projection and Transformation + embs_Q2 = self.g_1(emb_het) + embs_Q1 = self.g_1(emb_hom) + embs_Q1_trans = self.p_1(embs_Q1) + + # The first term in Eq. (11) + inter_c = embs_Q1_trans.T @ embs_Q2 + inter_c = F.normalize(inter_c, p=2, dim=1) + loss_spe_inv = -torch.diagonal(inter_c).sum() + + ####################################################################### + # The second term in Eq. (11) + inter_c = embs_Q1_trans.T @ embs_Q1 + inter_c = F.normalize(inter_c, p=2, dim=1) + loss_spe_nontrival_1 = torch.diagonal(inter_c).sum() + + inter_c_1 = embs_Q1_trans.T @ embs_P2 + inter_c_1 = F.normalize(inter_c_1, p=2, dim=1) + loss_spe_nontrival_2 = torch.diagonal(inter_c_1).sum() + ######################################################################## + + loss_consistency = loss_inv + self.args.gamma * loss_uni + loss_specificity = loss_spe_inv - self.args.eta * (loss_spe_nontrival_1 + loss_spe_nontrival_2) + + loss = loss_consistency + self.args.lambbda * loss_specificity + + loss.backward() + self.optimizer.step() + train_loss = loss.item() + + if (train_loss < best): + best = train_loss + cnt_wait = 0 + args_to_save = {k: v for k, v in vars(self.args).items() if + not callable(v) and not isinstance(v, torch.nn.Module) and not isinstance(v, + dgl.DGLGraph)} + # 移除可能包含 RLock 的对象,例如 logger + if 'logger' in args_to_save: + del args_to_save['logger'] + + torch.save({ + 'epoch': epoch, 'model_state_dict': copy.deepcopy(self.model.state_dict()), + 'g_state_dict': copy.deepcopy(self.g.state_dict()), + 'g_1_state_dict': copy.deepcopy(self.g_1.state_dict()), + 'p_1_state_dict': copy.deepcopy(self.p_1.state_dict()), + 'optimizer_state_dict': self.optimizer.state_dict(), 'loss': train_loss, + 'hyperparameters': args_to_save, # 保存处理后的参数字典 + 'node_cnt': self.node_cnt + }, os.path.join(self.args.save_dir, f'hero_best_model_{self.args.dataset}.pth')) + else: + cnt_wait += 1 + + if (epoch + 1) % 50 == 0: + print( + f"Epoch {epoch + 1}/{self.args.nb_epochs}: 训练损失 {train_loss:.4f}, 等待次数 {cnt_wait}/{self.args.patience}, 当前最佳 {best:.4f}") + + if cnt_wait >= self.args.patience: + print(f"Epoch {epoch + 1}: 连续 {self.args.patience} 个 epoch 损失没有改善,触发早停!") + break + print("训练结束.") + + best_model_path = os.path.join(self.args.save_dir, f'hero_best_model_{self.args.dataset}.pth') + if os.path.exists(best_model_path): + print(f"加载最佳模型: {best_model_path}") + ckpt = torch.load(best_model_path, map_location=self.device) + self.model.load_state_dict(ckpt['model_state_dict']) + self.g.load_state_dict(ckpt['g_state_dict']) + self.g_1.load_state_dict(ckpt['g_1_state_dict']) + self.p_1.load_state_dict(ckpt['p_1_state_dict']) + print("最佳模型参数已加载。") + # 如果需要,可以从 ckpt['hyperparameters'] 恢复部分 args 设置 + # for key, value in ckpt['hyperparameters'].items(): + # setattr(self.args, key, value) + print("使用最佳模型进行评估...") + test_micro_f1, test_macro_f1 = self.evaluate() + print(f"最终测试结果 (最佳模型): Micro-F1 = {test_micro_f1:.5f}, Macro-F1 = {test_macro_f1:.5f}") + return test_macro_f1 + else: + print("错误:未找到保存的最佳模型文件。无法进行最终评估。") + return None + + def _full_test_step(self): + micro_f1, macro_f1 = self.evaluate() + metric = {'test_micro_f1': micro_f1, 'test_macro_f1': macro_f1} + return metric + + @torch.no_grad() + def evaluate(self, split='test'): + self.model.eval(); + self.g.eval(); + self.g_1.eval(); + self.p_1.eval() + graph = self.hg + eval_features = self.features.to(self.device) + eval_feature_distance = self.feature_distance.to(self.device) + + if eval_features is None or eval_feature_distance is None: + print(f"错误: 评估时 features 或 feature_distance 为 None。") + return -1.0, -1.0 + + embs_het, emb_hom = self.model.embed(graph, eval_features, eval_feature_distance) + h_concat = torch.cat((embs_het, emb_hom), dim=1) + embeddings_np = h_concat.detach().cpu().numpy() + + if not hasattr(self, 'task') or self.task is None: + print("错误: 无法访问 'self.task' 来获取标签和划分。") + return -1.0, -1.0 + try: + labels_all = self.task.get_labels().cpu().numpy() + idx_split = None + if split == 'train': + idx_split = self.train_idx if hasattr(self, 'train_idx') else self.task.dataset.train_idx + elif split == 'val': + idx_split = self.val_idx if hasattr(self, 'val_idx') and self.val_idx is not None else getattr( + self.task.dataset, 'val_idx', None) + if idx_split is None: + print(f"警告: '{split}' 集索引未找到,无法评估。") + return 0.0, 0.0 # 或者其他默认值 + elif split == 'test': + idx_split = self.test_idx if hasattr(self, 'test_idx') else self.task.dataset.test_idx + + if idx_split is None: + print(f"错误: 未能获取 '{split}' 集的划分索引。") + return -1.0, -1.0 + + idx_split = np.array(idx_split.cpu().numpy() if torch.is_tensor(idx_split) else idx_split).astype(int) + + if idx_split.size == 0: + print(f"警告: '{split}' 集索引为空。无法进行评估。") + return 0.0, 0.0 + if idx_split.max() >= embeddings_np.shape[0] or idx_split.max() >= len(labels_all): + max_emb_idx = embeddings_np.shape[0] - 1 + max_lbl_idx = len(labels_all) - 1 + print( + f"错误: '{split}' 集最大索引 ({idx_split.max()}) 超出嵌入数量 ({max_emb_idx}) 或标签数量 ({max_lbl_idx})!") + return -1.0, -1.0 + + except Exception as e: + print(f"获取标签或划分时出错: {e}") + import traceback + traceback.print_exc() + return -1.0, -1.0 + + train_idx_for_lr = self.train_idx if hasattr(self, 'train_idx') else self.task.dataset.train_idx + train_idx_for_lr = np.array( + train_idx_for_lr.cpu().numpy() if torch.is_tensor(train_idx_for_lr) else train_idx_for_lr).astype(int) + + if train_idx_for_lr.size == 0: + print("错误: 用于训练分类器的训练集索引为空。") + return -1.0, -1.0 + if train_idx_for_lr.max() >= embeddings_np.shape[0] or train_idx_for_lr.max() >= len(labels_all): + print(f"错误: 用于训练分类器的训练集索引 ({train_idx_for_lr.max()}) 越界!") + return -1.0, -1.0 + + train_embs_for_lr = embeddings_np[train_idx_for_lr] + train_labels_for_lr = labels_all[train_idx_for_lr] + eval_embs_target_split = embeddings_np[idx_split] + eval_labels_target_split = labels_all[idx_split] + + total_micro_f1, total_macro_f1 = 0, 0; + num_runs_actual = 0 + num_runs_total = 5 + for rs in range(num_runs_total): + lr = LogisticRegression(max_iter=1000, random_state=rs, solver='liblinear', C=1.0, n_jobs=-1) + try: + # 检查训练标签是否只有一个类别 + if len(np.unique(train_labels_for_lr)) < 2: + print(f"警告 (Run {rs + 1}): Logistic Regression 的训练标签中只有一个类别,跳过此轮评估。") + continue # 跳过这一轮 + + lr.fit(train_embs_for_lr, train_labels_for_lr) + Y_pred = lr.predict(eval_embs_target_split) + f1_micro = metrics.f1_score(eval_labels_target_split, Y_pred, average='micro') + f1_macro = metrics.f1_score(eval_labels_target_split, Y_pred, average='macro', zero_division=0) + total_micro_f1 += f1_micro; + total_macro_f1 += f1_macro + num_runs_actual += 1 + except ValueError as ve: + print(f"Logistic Regression 训练/预测时出错 (run {rs + 1}): {ve}") + + avg_micro_f1 = total_micro_f1 / num_runs_actual if num_runs_actual > 0 else 0.0 + avg_macro_f1 = total_macro_f1 / num_runs_actual if num_runs_actual > 0 else 0.0 + print( + f"\t[{split.capitalize()} Classification Avg ({num_runs_actual} runs)] Macro-F1 = {avg_macro_f1:.5f}, Micro-F1 = {avg_micro_f1:.5f}") + return avg_micro_f1, avg_macro_f1 \ No newline at end of file