Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion VERSION_NUMBER
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.3
0.1.4
1 change: 1 addition & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ option(GGML_WEBGPU "ggml: use WebGPU"
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
option(GGML_BUILD_PROFILE "ggml: enable ggml trace profiler (GGML_TRACE=1)" OFF)
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_VIRTGPU "ggml: use the VirtGPU/Virglrenderer API Remoting frontend" OFF)
Expand Down
8 changes: 4 additions & 4 deletions ggml/include/ggml-profile.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ void ggml_set_current_token_idx(int idx);
void ggml_trace_log_begin(const char * name, const char * cat, const char * args);
void ggml_trace_log_end(const char * name, const char * cat, const char * args);

void ggml_profile_log_op_begin(struct ggml_tensor * tensor);
void ggml_profile_log_op_end(struct ggml_tensor * tensor);
void ggml_profile_log_op_begin(struct ggml_tensor * tensor, int ith, int nth);
void ggml_profile_log_op_end(struct ggml_tensor * tensor, int ith, int nth);

void ggml_profile_flush_tls(void);
#else
Expand All @@ -31,8 +31,8 @@ void ggml_profile_flush_tls(void);
#define ggml_set_current_token_idx(idx) ;
#define ggml_trace_log_begin(name, cat, args) ;
#define ggml_trace_log_end(name, cat, args) ;
#define ggml_profile_log_op_begin(tensor) ;
#define ggml_profile_log_op_end(tensor) ;
#define ggml_profile_log_op_begin(tensor, ith, nth) ;
#define ggml_profile_log_op_end(tensor, ith, nth) ;
#define ggml_profile_flush_tls() ;
#endif

Expand Down
6 changes: 6 additions & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ add_library(ggml-base
ggml-threading.h
ggml-quants.c
ggml-quants.h
ggml-profile.c
../include/ggml-profile.h
gguf.cpp)

set_target_properties(ggml-base PROPERTIES
Expand All @@ -218,6 +220,10 @@ if (GGML_BACKEND_DL)
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
endif()

if (GGML_BUILD_PROFILE)
target_compile_definitions(ggml-base PUBLIC GGML_BUILD_PROFILE)
endif()

if (GGML_SCHED_NO_REALLOC)
target_compile_definitions(ggml-base PUBLIC GGML_SCHED_NO_REALLOC)
endif()
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)

if (GGML_BUILD_PROFILE)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_BUILD_PROFILE)
endif()

if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
Expand Down
8 changes: 8 additions & 0 deletions ggml/src/ggml-cpu/ggml-cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "traits.h"
#include "ggml-cpu-impl.h"
#include "ggml-impl.h"
#include "ggml-profile.h"
#include "quants.h"
#include "ggml-threading.h"
#include "unary-ops.h"
Expand Down Expand Up @@ -3061,13 +3062,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {

// TODO: move fused-op detection into ggml_graph_plan so fusion decisions are made once at planning time
// Try fused ops, fall back to normal compute
ggml_profile_log_op_begin(node, state->ith, params.nth);

const int n_fused = ggml_cpu_try_fuse_ops(cgraph, node_n, &params, cplan);
if (n_fused > 0) {
node_n += n_fused;
} else {
ggml_compute_forward(&params, node);
}

ggml_profile_log_op_end(node, state->ith, params.nth);

if (state->ith == 0 && cplan->abort_callback &&
cplan->abort_callback(cplan->abort_callback_data)) {
atomic_store_explicit(&tp->abort, node_n + 1, memory_order_relaxed);
Expand All @@ -3077,8 +3082,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (node_n + 1 < cgraph->n_nodes) {
ggml_barrier(state->threadpool);
}

}

ggml_profile_flush_tls();

#ifdef GGML_USE_OPENMP
GGML_PRINT_DEBUG("thread #%d compute-done cplan %p\n", state->ith, (const void *)cplan);
#else
Expand Down
73 changes: 67 additions & 6 deletions ggml/src/ggml-profile.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,16 @@ void ggml_trace_log_end(const char * name, const char * cat, const char * args)
GGML_UNUSED(args);
}

void ggml_profile_log_op_begin(struct ggml_tensor * t) {
void ggml_profile_log_op_begin(struct ggml_tensor * t, int ith, int nth) {
GGML_UNUSED(t);
GGML_UNUSED(ith);
GGML_UNUSED(nth);
}

void ggml_profile_log_op_end(struct ggml_tensor * t) {
void ggml_profile_log_op_end(struct ggml_tensor * t, int ith, int nth) {
GGML_UNUSED(t);
GGML_UNUSED(ith);
GGML_UNUSED(nth);
}

void ggml_profile_flush_tls(void) {
Expand All @@ -51,10 +55,14 @@ void ggml_profile_flush_trace(void) {
static int g_trace_enabled = 0;
static pthread_once_t g_trace_once = PTHREAD_ONCE_INIT;

void ggml_profile_flush_trace(void);

static void ggml_trace_parse_env(void) {
const char * env = getenv("GGML_TRACE");
if (env && (env[0] == '1' || env[0] == 'y' || env[0] == 'Y')) {
g_trace_enabled = 1;
// ensure the trace file gets its closing "]" and is flushed/closed on exit
atexit(ggml_profile_flush_trace);
} else {
g_trace_enabled = 0;
}
Expand Down Expand Up @@ -83,6 +91,7 @@ static FILE * g_trace_file = NULL;

__thread char g_trace_tls_buffer[TRACE_BUFFER_SIZE];
__thread size_t g_trace_tls_offset = 0;
__thread int g_trace_worker_named = 0;

int g_current_token_idx = -1;

Expand Down Expand Up @@ -223,33 +232,85 @@ void ggml_trace_log_end(const char * name, const char * cat, const char * args)
ggml_trace_write(buf);
}

void ggml_profile_log_op_begin(struct ggml_tensor * t) {
static void ggml_trace_log_begin_on_lane(const char * name, const char * cat, const char * args, int pid, int tid) {
if (!TRACE_ENABLED()) {
return;
}

char buf[1024];

snprintf(buf, sizeof(buf),
"{\"name\":\"%s\",\"cat\":\"%s\",\"ph\":\"B\","
"\"ts\":%ld,\"pid\":%d,\"tid\":%d,\"args\":{%s}},\n",
name, cat, ggml_trace_now_us(), pid, tid, args ? args : "");

ggml_trace_write(buf);
}

static void ggml_trace_log_end_on_lane(const char * name, const char * cat, const char * args, int pid, int tid) {
if (!TRACE_ENABLED()) {
return;
}

char buf[1024];

snprintf(buf, sizeof(buf),
"{\"name\":\"%s\",\"cat\":\"%s\",\"ph\":\"E\","
"\"ts\":%ld,\"pid\":%d,\"tid\":%d,\"args\":{%s}},\n",
name, cat, ggml_trace_now_us(), pid, tid, args ? args : "");

ggml_trace_write(buf);
}

static void ggml_trace_log_worker_name(int ith) {
if (!TRACE_ENABLED() || g_trace_worker_named) {
return;
}

char buf[256];
snprintf(buf, sizeof(buf),
"{\"name\":\"thread_name\",\"ph\":\"M\",\"pid\":2,\"tid\":%d,"
"\"args\":{\"name\":\"ggml-worker-%d\"}},\n",
ith, ith);

ggml_trace_write(buf);
g_trace_worker_named = 1;
}

void ggml_profile_log_op_begin(struct ggml_tensor * t, int ith, int nth) {
if (!TRACE_ENABLED()) {
return;
}

ggml_trace_log_worker_name(ith);

char args[1024];
ggml_format_op_args(t, NULL, args, sizeof(args));

size_t args_len = strlen(args);
snprintf(args + args_len, sizeof(args) - args_len, ",\"ith\":%d,\"nth\":%d", ith, nth);

char name[128], tname[64];
ggml_json_escape(ggml_tensor_name_safe(t), tname, sizeof(tname));

snprintf(name, sizeof(name), "%s (%s)", ggml_op_name(t->op), tname);

ggml_trace_log_begin(name, "Operator", args);
ggml_trace_log_begin_on_lane(name, "Operator", args, 2, ith);
}

void ggml_profile_log_op_end(struct ggml_tensor * t) {
void ggml_profile_log_op_end(struct ggml_tensor * t, int ith, int nth) {
if (!TRACE_ENABLED()) {
return;
}

GGML_UNUSED(nth);

char name[128], tname[64];
ggml_json_escape(ggml_tensor_name_safe(t), tname, sizeof(tname));

snprintf(name, sizeof(name), "%s (%s)", ggml_op_name(t->op), tname);

ggml_trace_log_end(name, "Operator", NULL);
ggml_trace_log_end_on_lane(name, "Operator", NULL, 2, ith);
}

void ggml_profile_flush_tls(void) {
Expand Down
Loading