diff --git a/.gitignore b/.gitignore index 3ac28403..e88dbc67 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ sw/nic/build/ sw/nic/build* sw/nic/third-party/libev-4.33/* sw/nic/gpuagent/vendor +.worktrees/ diff --git a/sw/nic/gpuagent/Makefile b/sw/nic/gpuagent/Makefile index f1c30b78..f1c0c16c 100644 --- a/sw/nic/gpuagent/Makefile +++ b/sw/nic/gpuagent/Makefile @@ -224,11 +224,11 @@ gogo-protos: $(OBJ_DIR)/%.o: $(TOPDIR)/%.cc @mkdir -p $(dir $@) # Create the necessary subdirectories in temp - $(CC) $(CFLAGS) $(DEFS) $(INCS_AMD_SMI) -DAMD_SMI -D__FNAME__=__FILE__ -DGPUAGENT_VERSION=\"$(GPUAGENT_VERSION)\" -c $< -o $@ + $(CC) $(CFLAGS) $(DEFS) $(INCS_AMD_SMI) -DAMD_SMI -DENABLE_ESMI_LIB -D__FNAME__=__FILE__ -DGPUAGENT_VERSION=\"$(GPUAGENT_VERSION)\" -c $< -o $@ $(OBJ_DIR)/%.o: $(TOPDIR)/%.c @mkdir -p $(dir $@) # Create the necessary subdirectories in temp - $(CC) $(CFLAGS) $(INCS_AMD_SMI) -DAMD_SMI -D__FNAME__=__FILE__ -DGPUAGENT_VERSION=\"$(GPUAGENT_VERSION)\" -c $< -o $@ + $(CC) $(CFLAGS) $(INCS_AMD_SMI) -DAMD_SMI -DENABLE_ESMI_LIB -D__FNAME__=__FILE__ -DGPUAGENT_VERSION=\"$(GPUAGENT_VERSION)\" -c $< -o $@ $(OBJ_DIR_GIM)/%.o: $(TOPDIR)/%.cc @mkdir -p $(dir $@) # Create the necessary subdirectories in temp diff --git a/sw/nic/gpuagent/api/include/aga_gpu.hpp b/sw/nic/gpuagent/api/include/aga_gpu.hpp index 9c4d3ca2..875495bd 100644 --- a/sw/nic/gpuagent/api/include/aga_gpu.hpp +++ b/sw/nic/gpuagent/api/include/aga_gpu.hpp @@ -813,6 +813,12 @@ typedef struct aga_gpu_stats_s { uint64_t gfx_activity_accumulated; /// memory activity accumulated in % uint64_t mem_activity_accumulated; + /// HSMP accumulation counter, incremented when accumulators update + uint64_t accumulation_counter; + /// accumulated socket XCC busy + uint64_t socket_gfx_busy_accumulated; + /// accumulated HBM bandwidth for all HBM stacks in the socket + uint64_t dram_bandwidth_accumulated; /// XGMI link statistics aga_gpu_xgmi_link_stats_t xgmi_link_stats[AGA_GPU_MAX_XGMI_LINKS]; /// GPU violation statistics diff --git a/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc b/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc index 78b28108..bc575b25 100644 --- a/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc +++ b/sw/nic/gpuagent/api/smi/amdsmi/smi_api.cc @@ -52,6 +52,10 @@ namespace aga { /// status and statistics std::mutex g_gpu_metrics_mutex; std::unordered_map g_gpu_metrics; +/// cache the CPU socket processor handle associated with each GPU, used to +/// read socket level HSMP accumulator metrics +std::mutex g_gpu_cpu_handle_mutex; +std::unordered_map g_gpu_cpu_handle; /// counter resolution in uJ; this is a constant value that we get once during /// init time and use whenever we want to calculate energy accumalated float g_energy_counter_resolution; @@ -1254,6 +1258,36 @@ smi_fill_vram_usage_ (aga_gpu_handle_t gpu_handle, return SDK_RET_OK; } +/// fill socket level HSMP accumulator metrics for the CPU socket associated +/// with this GPU; only available on platforms with HSMP (e.g. APU) +static void +smi_gpu_fill_hsmp_stats_ (aga_gpu_handle_t gpu_handle, aga_gpu_stats_t *stats) +{ +#ifdef ENABLE_ESMI_LIB + amdsmi_status_t status; + amdsmi_processor_handle cpu_handle = NULL; + amdsmi_hsmp_metrics_table_t table = {}; + + { + std::lock_guard lock(g_gpu_cpu_handle_mutex); + auto it = g_gpu_cpu_handle.find(gpu_handle); + if (it != g_gpu_cpu_handle.end()) { + cpu_handle = it->second; + } + } + if (cpu_handle == NULL) { + return; + } + status = amdsmi_get_hsmp_metrics_table(cpu_handle, &table); + if (status != AMDSMI_STATUS_SUCCESS) { + return; + } + stats->accumulation_counter = table.accumulation_counter; + stats->socket_gfx_busy_accumulated = table.socket_gfx_busy_acc; + stats->dram_bandwidth_accumulated = table.dram_bandwidth_acc; +#endif +} + sdk_ret_t smi_gpu_fill_stats (aga_gpu_handle_t gpu_handle, const aga_obj_key_t *gpu_key, @@ -1283,6 +1317,10 @@ smi_gpu_fill_stats (aga_gpu_handle_t gpu_handle, metrics_info = g_gpu_metrics[gpu_handle]; } } + // fill socket level HSMP accumulator metrics; this is independent of the + // GPU metrics table (it reads the CPU socket HSMP table) so it is filled + // here unconditionally, outside the metrics_info structure-size guard below + smi_gpu_fill_hsmp_stats_(gpu_handle, stats); if (metrics_info.common_header.structure_size != 0) { // power and voltage stats->avg_package_power = metrics_info.average_socket_power; @@ -2057,6 +2095,23 @@ smi_discover_gpus (uint32_t *num_gpu, aga_gpu_profile_t *gpu) "err {}", status); return amdsmi_ret_to_sdk_ret(status); } + // enumerate CPU socket handles once; on APU platforms (e.g. MI300A) the + // socket level HSMP accumulator metrics are read from the CPU socket + // handle, which lives in a separate namespace from the GPU socket handles + // and is only reachable via amdsmi_get_cpu_handles() + uint32_t num_cpu = 0; + amdsmi_processor_handle cpu_handles[AGA_MAX_SOCKET] = {}; +#ifdef ENABLE_ESMI_LIB + if (amdsmi_get_cpu_handles(&num_cpu, NULL) == AMDSMI_STATUS_SUCCESS) { + if (num_cpu > AGA_MAX_SOCKET) { + num_cpu = AGA_MAX_SOCKET; + } + if (amdsmi_get_cpu_handles(&num_cpu, &cpu_handles[0]) != + AMDSMI_STATUS_SUCCESS) { + num_cpu = 0; + } + } +#endif for (uint32_t i = 0; i < num_sockets; i++) { // for each socket get the number of processors status = amdsmi_get_processor_handles(socket_handles[i], @@ -2093,6 +2148,12 @@ smi_discover_gpus (uint32_t *num_gpu, aga_gpu_profile_t *gpu) proc_handles[j]); return ret; } + // map this GPU to its CPU socket handle (socket index i) + // for socket level HSMP accumulator metrics + if (i < num_cpu && cpu_handles[i] != NULL) { + std::lock_guard lock(g_gpu_cpu_handle_mutex); + g_gpu_cpu_handle[proc_handles[j]] = cpu_handles[i]; + } (*num_gpu)++; } } diff --git a/sw/nic/gpuagent/api/smi/amdsmi/smi_state.cc b/sw/nic/gpuagent/api/smi/amdsmi/smi_state.cc index d6a51c2f..e614af20 100644 --- a/sw/nic/gpuagent/api/smi/amdsmi/smi_state.cc +++ b/sw/nic/gpuagent/api/smi/amdsmi/smi_state.cc @@ -1555,11 +1555,17 @@ smi_state::init(aga_api_init_params_t *init_params) { amdsmi_status_t status; aga_gpu_profile_t gpu[AGA_MAX_GPU]; - // initialize smi library - status = amdsmi_init(AMDSMI_INIT_AMD_GPUS); + // APUS enables HSMP socket accumulators; fall back to GPUS on non-APU + status = amdsmi_init(AMDSMI_INIT_AMD_APUS); if (unlikely(status != AMDSMI_STATUS_SUCCESS)) { - AGA_TRACE_ERR("Failed to initialize amd smi library, err {}", status); - return amdsmi_ret_to_sdk_ret(status); + AGA_TRACE_WARN("APU smi init failed (err {}), falling back to GPU init", + status); + status = amdsmi_init(AMDSMI_INIT_AMD_GPUS); + if (unlikely(status != AMDSMI_STATUS_SUCCESS)) { + AGA_TRACE_ERR("Failed to initialize amd smi library, err {}", + status); + return amdsmi_ret_to_sdk_ret(status); + } } // discover gpus ret = aga::smi_discover_gpus(&num_gpu_, gpu); diff --git a/sw/nic/gpuagent/cli/cmd/gpu.go b/sw/nic/gpuagent/cli/cmd/gpu.go index 034f9718..be07e91c 100644 --- a/sw/nic/gpuagent/cli/cmd/gpu.go +++ b/sw/nic/gpuagent/cli/cmd/gpu.go @@ -1733,6 +1733,24 @@ func printGPUStats(gpu *aga.GPU, statsOnly bool) { "Memory activity accumulated", stats.GetMemoryActivityAccumulated()) } + if (stats.GetAccumulationCounter() != 0) && + (stats.GetAccumulationCounter() != UINT64_MAX_VAL) { + fmt.Printf(indent+"%-38s : %d\n", + "Accumulation counter", + stats.GetAccumulationCounter()) + } + if (stats.GetSocketGFXBusyAccumulated() != 0) && + (stats.GetSocketGFXBusyAccumulated() != UINT64_MAX_VAL) { + fmt.Printf(indent+"%-38s : %d\n", + "Socket GFX busy accumulated", + stats.GetSocketGFXBusyAccumulated()) + } + if (stats.GetDRAMBandwidthAccumulated() != 0) && + (stats.GetDRAMBandwidthAccumulated() != UINT64_MAX_VAL) { + fmt.Printf(indent+"%-38s : %d\n", + "DRAM bandwidth accumulated", + stats.GetDRAMBandwidthAccumulated()) + } for i, linkStats := range stats.GetXGMILinkStats() { link := "Link " + fmt.Sprintf("%v", i+1) if (linkStats.GetDataRead() != 0) && diff --git a/sw/nic/gpuagent/protos/gpu.proto b/sw/nic/gpuagent/protos/gpu.proto index b0310dc9..dc0912af 100644 --- a/sw/nic/gpuagent/protos/gpu.proto +++ b/sw/nic/gpuagent/protos/gpu.proto @@ -751,6 +751,12 @@ message GPUStats { uint64 GFXActivityAccumulated = 65; // memory activity accumulated in % uint64 MemoryActivityAccumulated = 66; + // accumulated HBM bandwidth for all HBM stacks in the socket + uint64 DRAMBandwidthAccumulated = 89; + // HSMP accumulation counter, incremented when accumulators update + uint64 AccumulationCounter = 90; + // accumulated socket XCC busy + uint64 SocketGFXBusyAccumulated = 91; // GPU XGMI link statistics repeated GPUXGMILinkStats XGMILinkStats = 67; // GPU violation statistics diff --git a/sw/nic/gpuagent/svc/gpu_to_proto.hpp b/sw/nic/gpuagent/svc/gpu_to_proto.hpp index e8b88708..ff242cdf 100644 --- a/sw/nic/gpuagent/svc/gpu_to_proto.hpp +++ b/sw/nic/gpuagent/svc/gpu_to_proto.hpp @@ -781,6 +781,9 @@ aga_gpu_api_stats_to_proto (GPUStats *proto_stats, proto_stats->set_fanspeed(stats->fan_speed); proto_stats->set_gfxactivityaccumulated(stats->gfx_activity_accumulated); proto_stats->set_memoryactivityaccumulated(stats->mem_activity_accumulated); + proto_stats->set_accumulationcounter(stats->accumulation_counter); + proto_stats->set_socketgfxbusyaccumulated(stats->socket_gfx_busy_accumulated); + proto_stats->set_drambandwidthaccumulated(stats->dram_bandwidth_accumulated); for (uint32_t i = 0; i < AGA_GPU_MAX_XGMI_LINKS; i++) { aga_gpu_xgmi_link_stats_to_proto(proto_stats->add_xgmilinkstats(), &stats->xgmi_link_stats[i]); diff --git a/sw/nic/third-party/rocm/amd_smi_lib/x86_64/lib/libamd_smi.so.26.4.0 b/sw/nic/third-party/rocm/amd_smi_lib/x86_64/lib/libamd_smi.so.26.4.0 index d8d57cef..2ab7274e 100755 Binary files a/sw/nic/third-party/rocm/amd_smi_lib/x86_64/lib/libamd_smi.so.26.4.0 and b/sw/nic/third-party/rocm/amd_smi_lib/x86_64/lib/libamd_smi.so.26.4.0 differ