diff --git a/bindings/binding.cc b/bindings/binding.cc index 67f2b802..acbf99a0 100644 --- a/bindings/binding.cc +++ b/bindings/binding.cc @@ -21,6 +21,7 @@ #include "allocation-profile-node.hh" #include "profilers/heap.hh" #include "profilers/wall.hh" +#include "translate-time-profile.hh" #ifdef __linux__ #include @@ -49,6 +50,7 @@ NODE_MODULE_INIT(/* exports, module, context */) { #endif dd::AllocationProfileNodeView::Init(exports); + dd::TimeProfileNodeView::Init(exports); dd::HeapProfiler::Init(exports); dd::WallProfiler::Init(exports); Nan::SetMethod(exports, "getNativeThreadId", GetNativeThreadId); diff --git a/bindings/per-isolate-data.cc b/bindings/per-isolate-data.cc index 424f9c47..5898fce2 100644 --- a/bindings/per-isolate-data.cc +++ b/bindings/per-isolate-data.cc @@ -56,6 +56,10 @@ Nan::Global& PerIsolateData::AllocationNodeConstructor() { return allocation_node_constructor; } +Nan::Global& PerIsolateData::TimeProfileNodeConstructor() { + return time_profile_node_constructor; +} + std::shared_ptr& PerIsolateData::GetHeapProfilerState() { return heap_profiler_state; } diff --git a/bindings/per-isolate-data.hh b/bindings/per-isolate-data.hh index dba9d52a..618aa8a9 100644 --- a/bindings/per-isolate-data.hh +++ b/bindings/per-isolate-data.hh @@ -29,6 +29,7 @@ class PerIsolateData { private: Nan::Global wall_profiler_constructor; Nan::Global allocation_node_constructor; + Nan::Global time_profile_node_constructor; std::shared_ptr heap_profiler_state; PerIsolateData() {} @@ -38,6 +39,7 @@ class PerIsolateData { Nan::Global& WallProfilerConstructor(); Nan::Global& AllocationNodeConstructor(); + Nan::Global& TimeProfileNodeConstructor(); std::shared_ptr& GetHeapProfilerState(); }; diff --git a/bindings/profilers/wall.cc b/bindings/profilers/wall.cc index f5e6cc7f..0f21b56a 100644 --- a/bindings/profilers/wall.cc +++ b/bindings/profilers/wall.cc @@ -942,6 +942,34 @@ NAN_METHOD(WallProfiler::Stop) { info.GetReturnValue().Set(profile); } +// stopAndCollect(restart, callback): callback result +NAN_METHOD(WallProfiler::StopAndCollect) { + if (info.Length() != 2) { + return Nan::ThrowTypeError("stopAndCollect must have two arguments."); + } + if (!info[0]->IsBoolean()) { + return Nan::ThrowTypeError("Restart must be a boolean."); + } + if (!info[1]->IsFunction()) { + return Nan::ThrowTypeError("stopAndCollect requires a callback."); + } + + bool restart = info[0].As()->Value(); + auto callback = info[1].As(); + + WallProfiler* wallProfiler = + Nan::ObjectWrap::Unwrap(info.This()); + + v8::Local result; + auto err = wallProfiler->StopAndCollectImpl(restart, callback, result); + if (!err.success) { + return Nan::ThrowTypeError(err.msg.c_str()); + } + if (!result.IsEmpty()) { + info.GetReturnValue().Set(result); + } +} + bool WallProfiler::waitForSignal(uint64_t targetCallCount) { auto currentCallCount = noCollectCallCount_.load(std::memory_order_relaxed); std::atomic_signal_fence(std::memory_order_acquire); @@ -968,7 +996,8 @@ bool WallProfiler::waitForSignal(uint64_t targetCallCount) { return res >= targetCallCount; } -Result WallProfiler::StopImpl(bool restart, v8::Local& profile) { +template +Result WallProfiler::StopCore(bool restart, ProfileBuilder&& buildProfile) { if (!started_) { return Result{"Stop called on not started profiler."}; } @@ -1030,9 +1059,12 @@ Result WallProfiler::StopImpl(bool restart, v8::Local& profile) { std::memory_order_relaxed); } - if (withContexts_) { - int64_t nonJSThreadsCpuTime{}; + ContextsByNode contextsByNode; + ContextsByNode* contextsByNodePtr = nullptr; + int64_t nonJSThreadsCpuTime = 0; + bool hasCpuTime = false; + if (withContexts_) { if (isMainThread_ && collectCpuTime_) { // account for non-JS threads CPU only in main thread // CPU time of non-JS threads is the difference between process CPU time @@ -1044,18 +1076,14 @@ Result WallProfiler::StopImpl(bool restart, v8::Local& profile) { std::max(processCpu - totalWorkerCpu, ProcessCpuClock::duration{}) .count(); } - auto contextsByNode = + contextsByNode = GetContextsByNode(v8_profile, contexts, startThreadCpuTime); + contextsByNodePtr = &contextsByNode; + hasCpuTime = collectCpuTime_; + } - profile = TranslateTimeProfile(v8_profile, - includeLines_, - &contextsByNode, - collectCpuTime_, - nonJSThreadsCpuTime); + buildProfile(v8_profile, hasCpuTime, nonJSThreadsCpuTime, contextsByNodePtr); - } else { - profile = TranslateTimeProfile(v8_profile, includeLines_); - } v8_profile->Delete(); if (!restart) { @@ -1072,6 +1100,42 @@ Result WallProfiler::StopImpl(bool restart, v8::Local& profile) { return {}; } +Result WallProfiler::StopImpl(bool restart, v8::Local& profile) { + return StopCore(restart, + [&](const v8::CpuProfile* v8_profile, + bool hasCpuTime, + int64_t nonJSThreadsCpuTime, + ContextsByNode* contextsByNodePtr) { + profile = TranslateTimeProfile(v8_profile, + includeLines_, + contextsByNodePtr, + hasCpuTime, + nonJSThreadsCpuTime); + }); +} + +Result WallProfiler::StopAndCollectImpl(bool restart, + v8::Local callback, + v8::Local& result) { + return StopCore( + restart, + [&](const v8::CpuProfile* v8_profile, + bool hasCpuTime, + int64_t nonJSThreadsCpuTime, + ContextsByNode* contextsByNodePtr) { + auto* isolate = Isolate::GetCurrent(); + TimeProfileViewState state{includeLines_, contextsByNodePtr, {}}; + auto profile_view = BuildTimeProfileView( + v8_profile, hasCpuTime, nonJSThreadsCpuTime, state); + v8::Local argv[] = {profile_view}; + auto cb_result = Nan::Call( + callback, isolate->GetCurrentContext()->Global(), 1, argv); + if (!cb_result.IsEmpty()) { + result = cb_result.ToLocalChecked(); + } + }); +} + NAN_MODULE_INIT(WallProfiler::Init) { Local tpl = Nan::New(New); Local className = Nan::New("TimeProfiler").ToLocalChecked(); @@ -1085,6 +1149,7 @@ NAN_MODULE_INIT(WallProfiler::Init) { Nan::SetPrototypeMethod(tpl, "start", Start); Nan::SetPrototypeMethod(tpl, "stop", Stop); + Nan::SetPrototypeMethod(tpl, "stopAndCollect", StopAndCollect); Nan::SetPrototypeMethod(tpl, "dispose", Dispose); Nan::SetPrototypeMethod(tpl, "v8ProfilerStuckEventLoopDetected", diff --git a/bindings/profilers/wall.hh b/bindings/profilers/wall.hh index 7e01f354..a68af759 100644 --- a/bindings/profilers/wall.hh +++ b/bindings/profilers/wall.hh @@ -155,7 +155,12 @@ class WallProfiler : public Nan::ObjectWrap { Result StartImpl(); v8::ProfilerId StartInternal(); + template + Result StopCore(bool restart, ProfileBuilder&& buildProfile); Result StopImpl(bool restart, v8::Local& profile); + Result StopAndCollectImpl(bool restart, + v8::Local callback, + v8::Local& result); CollectionMode collectionMode() { auto res = collectionMode_.load(std::memory_order_relaxed); @@ -185,6 +190,7 @@ class WallProfiler : public Nan::ObjectWrap { static NAN_METHOD(New); static NAN_METHOD(Start); static NAN_METHOD(Stop); + static NAN_METHOD(StopAndCollect); static NAN_METHOD(V8ProfilerStuckEventLoopDetected); static NAN_METHOD(Dispose); static NAN_MODULE_INIT(Init); diff --git a/bindings/translate-time-profile.cc b/bindings/translate-time-profile.cc index 0d85976a..4a915a6f 100644 --- a/bindings/translate-time-profile.cc +++ b/bindings/translate-time-profile.cc @@ -16,11 +16,267 @@ #include "translate-time-profile.hh" #include +#include "per-isolate-data.hh" #include "profile-translator.hh" namespace dd { namespace { + +TimeProfileNodeInfo* AllocNode(TimeProfileViewState* state, + const v8::CpuProfileNode* node, + const v8::CpuProfileNode* metadata_node, + int line_number, + int column_number, + int hit_count, + bool is_line_root = false) { + auto info = std::make_unique(); + info->node = node; + info->metadata_node = metadata_node; + info->line_number = line_number; + info->column_number = column_number; + info->hit_count = hit_count; + info->is_line_root = is_line_root; + info->state = state; + auto* raw = info.get(); + state->owned_nodes.push_back(std::move(info)); + return raw; +} + +// Line-info mode: for a given V8 node, append its line ticks (leaves with +// hit_count > 0) and child calls (intermediate nodes with hit_count 0). +void AppendLineChildren(TimeProfileViewState* state, + const v8::CpuProfileNode* node, + std::vector& out) { + unsigned int hitLineCount = node->GetHitLineCount(); + unsigned int hitCount = node->GetHitCount(); + + if (hitLineCount > 0) { + std::vector entries(hitLineCount); + node->GetLineTicks(&entries[0], hitLineCount); + for (const auto& entry : entries) { + int column = 0; +#if V8_MAJOR_VERSION >= 14 + column = entry.column; +#endif + out.push_back( + AllocNode(state, node, node, entry.line, column, entry.hit_count)); + } + } else if (hitCount > 0) { + out.push_back(AllocNode(state, + node, + node, + node->GetLineNumber(), + node->GetColumnNumber(), + hitCount)); + } + + int32_t count = node->GetChildrenCount(); + for (int32_t i = 0; i < count; i++) { + auto* child = node->GetChild(i); + out.push_back(AllocNode(state, + child, + node, + child->GetLineNumber(), + child->GetColumnNumber(), + 0)); + } +} + +int ResolveHitCount(const v8::CpuProfileNode* node, ContextsByNode* cbn) { + if (!cbn) return node->GetHitCount(); + auto it = cbn->find(node); + return it != cbn->end() ? it->second.hitcount : 0; +} + +int ComputeTotalHitCount(const v8::CpuProfileNode* node, ContextsByNode* cbn) { + int total = ResolveHitCount(node, cbn); + int32_t count = node->GetChildrenCount(); + for (int32_t i = 0; i < count; i++) { + total += ComputeTotalHitCount(node->GetChild(i), cbn); + } + return total; +} + +// WrapNode: create a JS wrapper for a profile node. +// Normal mode stores CpuProfileNode* directly. +// line-info mode stores owned info. +v8::Local WrapNode(const v8::CpuProfileNode* node, + TimeProfileViewState* state) { + auto* isolate = v8::Isolate::GetCurrent(); + auto ctor = + Nan::New(PerIsolateData::For(isolate)->TimeProfileNodeConstructor()); + auto obj = Nan::NewInstance(ctor).ToLocalChecked(); + Nan::SetInternalFieldPointer(obj, 0, const_cast(node)); + Nan::SetInternalFieldPointer(obj, 1, state); + return obj; +} + +v8::Local WrapNode(TimeProfileNodeInfo* info) { + auto* isolate = v8::Isolate::GetCurrent(); + auto ctor = + Nan::New(PerIsolateData::For(isolate)->TimeProfileNodeConstructor()); + auto obj = Nan::NewInstance(ctor).ToLocalChecked(); + Nan::SetInternalFieldPointer(obj, 0, info); + Nan::SetInternalFieldPointer(obj, 1, info->state); + return obj; +} + +// Extracts the two internal fields from a JS wrapper object. +// field 0 represents the node data (depends on mode, see below). +// field 1 TimeProfileViewState* (shared state, always present). +// +// Line-info: field 0 is a TimeProfileNodeInfo* holding synthetic +// line/column/hitCount values. Normal: field 0 is a CpuProfileNode* pointing +// directly to V8. +struct NodeFields { + void* ptr; + TimeProfileViewState* state; + + bool is_line_info() const { return state->include_line_info; } + + TimeProfileNodeInfo* as_info() const { + return static_cast(ptr); + } + + const v8::CpuProfileNode* as_node() const { + return static_cast(ptr); + } +}; + +NodeFields GetFields(const Nan::PropertyCallbackInfo& info) { + return {Nan::GetInternalFieldPointer(info.Holder(), 0), + static_cast( + Nan::GetInternalFieldPointer(info.Holder(), 1))}; +} + +NAN_GETTER(GetName) { + auto fields = GetFields(info); + if (fields.is_line_info()) { + info.GetReturnValue().Set( + fields.as_info()->metadata_node->GetFunctionName()); + } else { + info.GetReturnValue().Set(fields.as_node()->GetFunctionName()); + } +} + +NAN_GETTER(GetScriptName) { + auto fields = GetFields(info); + if (fields.is_line_info()) { + info.GetReturnValue().Set( + fields.as_info()->metadata_node->GetScriptResourceName()); + } else { + info.GetReturnValue().Set(fields.as_node()->GetScriptResourceName()); + } +} + +NAN_GETTER(GetScriptId) { + auto fields = GetFields(info); + if (fields.is_line_info()) { + info.GetReturnValue().Set( + Nan::New(fields.as_info()->metadata_node->GetScriptId())); + } else { + info.GetReturnValue().Set( + Nan::New(fields.as_node()->GetScriptId())); + } +} + +NAN_GETTER(GetLineNumber) { + auto fields = GetFields(info); + if (fields.is_line_info()) { + info.GetReturnValue().Set( + Nan::New(fields.as_info()->line_number)); + } else { + info.GetReturnValue().Set( + Nan::New(fields.as_node()->GetLineNumber())); + } +} + +NAN_GETTER(GetColumnNumber) { + auto fields = GetFields(info); + if (fields.is_line_info()) { + info.GetReturnValue().Set( + Nan::New(fields.as_info()->column_number)); + } else { + info.GetReturnValue().Set( + Nan::New(fields.as_node()->GetColumnNumber())); + } +} + +NAN_GETTER(GetHitCount) { + auto fields = GetFields(info); + if (fields.is_line_info()) { + info.GetReturnValue().Set( + Nan::New(fields.as_info()->hit_count)); + } else { + info.GetReturnValue().Set(Nan::New( + ResolveHitCount(fields.as_node(), fields.state->contexts_by_node))); + } +} + +NAN_GETTER(GetContexts) { + auto fields = GetFields(info); + auto* isolate = v8::Isolate::GetCurrent(); + // Line-info nodes and nodes without context tracking return empty contexts. + if (fields.is_line_info() || !fields.state->contexts_by_node) { + info.GetReturnValue().Set(v8::Array::New(isolate, 0)); + return; + } + auto it = fields.state->contexts_by_node->find(fields.as_node()); + if (it != fields.state->contexts_by_node->end()) { + info.GetReturnValue().Set(it->second.contexts); + } else { + info.GetReturnValue().Set(v8::Array::New(isolate, 0)); + } +} + +NAN_GETTER(GetChildren) { + auto fields = GetFields(info); + auto* isolate = info.GetIsolate(); + auto ctx = isolate->GetCurrentContext(); + + if (fields.is_line_info()) { + std::vector children; + + // Root in line-info mode is flattened from each direct child to preserve + // eager v1 top-level shape. + if (fields.as_info()->is_line_root) { + int32_t count = fields.as_info()->node->GetChildrenCount(); + for (int32_t i = 0; i < count; i++) { + AppendLineChildren( + fields.state, fields.as_info()->node->GetChild(i), children); + } + auto arr = v8::Array::New(isolate, children.size()); + for (size_t i = 0; i < children.size(); i++) { + arr->Set(ctx, i, WrapNode(children[i])).Check(); + } + info.GetReturnValue().Set(arr); + return; + } + + // In line-info mode, leaf nodes (hitCount > 0) have no children. + if (fields.as_info()->hit_count > 0) { + info.GetReturnValue().Set(v8::Array::New(isolate, 0)); + return; + } + + AppendLineChildren(fields.state, fields.as_info()->node, children); + auto arr = v8::Array::New(isolate, children.size()); + for (size_t i = 0; i < children.size(); i++) { + arr->Set(ctx, i, WrapNode(children[i])).Check(); + } + info.GetReturnValue().Set(arr); + } else { + int32_t count = fields.as_node()->GetChildrenCount(); + auto arr = v8::Array::New(isolate, count); + for (int32_t i = 0; i < count; i++) { + arr->Set(ctx, i, WrapNode(fields.as_node()->GetChild(i), fields.state)) + .Check(); + } + info.GetReturnValue().Set(arr); + } +} + class TimeProfileTranslator : ProfileTranslator { private: ContextsByNode* contextsByNode; @@ -236,6 +492,77 @@ class TimeProfileTranslator : ProfileTranslator { }; } // namespace +NAN_MODULE_INIT(TimeProfileNodeView::Init) { + v8::Local tpl = Nan::New(); + tpl->SetClassName(Nan::New("TimeProfileNode").ToLocalChecked()); + tpl->InstanceTemplate()->SetInternalFieldCount(2); + + auto inst = tpl->InstanceTemplate(); + Nan::SetAccessor(inst, Nan::New("name").ToLocalChecked(), GetName); + Nan::SetAccessor( + inst, Nan::New("scriptName").ToLocalChecked(), GetScriptName); + Nan::SetAccessor(inst, Nan::New("scriptId").ToLocalChecked(), GetScriptId); + Nan::SetAccessor( + inst, Nan::New("lineNumber").ToLocalChecked(), GetLineNumber); + Nan::SetAccessor( + inst, Nan::New("columnNumber").ToLocalChecked(), GetColumnNumber); + Nan::SetAccessor(inst, Nan::New("hitCount").ToLocalChecked(), GetHitCount); + Nan::SetAccessor(inst, Nan::New("children").ToLocalChecked(), GetChildren); + Nan::SetAccessor(inst, Nan::New("contexts").ToLocalChecked(), GetContexts); + + PerIsolateData::For(v8::Isolate::GetCurrent()) + ->TimeProfileNodeConstructor() + .Reset(Nan::GetFunction(tpl).ToLocalChecked()); +} + +// Builds a lazy JS profile view. +// For non-line-info, wrappers store raw CpuProfileNode pointers. +// For line-info, owned TimeProfileNodeInfo structs are used for synthetic +// nodes. +v8::Local BuildTimeProfileView(const v8::CpuProfile* profile, + bool has_cpu_time, + int64_t non_js_threads_cpu_time, + TimeProfileViewState& state) { + auto* isolate = v8::Isolate::GetCurrent(); + v8::Local js_profile = v8::Object::New(isolate); + + auto* root_node = profile->GetTopDownRoot(); + + if (state.include_line_info) { + auto* root_info = AllocNode(&state, + root_node, + root_node, + root_node->GetLineNumber(), + root_node->GetColumnNumber(), + 0, + true); + auto root = WrapNode(root_info); + + Nan::Set(js_profile, Nan::New("topDownRoot").ToLocalChecked(), root); + } else { + Nan::Set(js_profile, + Nan::New("topDownRoot").ToLocalChecked(), + WrapNode(root_node, &state)); + } + Nan::Set(js_profile, + Nan::New("startTime").ToLocalChecked(), + Nan::New(profile->GetStartTime())); + Nan::Set(js_profile, + Nan::New("endTime").ToLocalChecked(), + Nan::New(profile->GetEndTime())); + Nan::Set(js_profile, + Nan::New("hasCpuTime").ToLocalChecked(), + Nan::New(has_cpu_time)); + Nan::Set(js_profile, + Nan::New("nonJSThreadsCpuTime").ToLocalChecked(), + Nan::New(non_js_threads_cpu_time)); + Nan::Set(js_profile, + Nan::New("totalHitCount").ToLocalChecked(), + Nan::New( + ComputeTotalHitCount(root_node, state.contexts_by_node))); + return js_profile; +} + v8::Local TranslateTimeProfile(const v8::CpuProfile* profile, bool includeLineInfo, ContextsByNode* contextsByNode, diff --git a/bindings/translate-time-profile.hh b/bindings/translate-time-profile.hh index c83115d5..0f6494cd 100644 --- a/bindings/translate-time-profile.hh +++ b/bindings/translate-time-profile.hh @@ -16,11 +16,51 @@ #pragma once +#include #include +#include +#include +#include #include "contexts.hh" namespace dd { +struct TimeProfileNodeInfo; + +// Shared state for the lazy profile view. +// In line-info mode, owned_nodes keeps synthetic TimeProfileNodeInfo objects +// alive for as long as JS wrappers may reference them. +// In normal mode, owned_nodes stays empty - wrappers point directly to V8 +// nodes. +struct TimeProfileViewState { + bool include_line_info; + ContextsByNode* contexts_by_node; + std::vector> owned_nodes; +}; + +// Line-info mode only: stored in internal field 0 of JS wrappers. +// Needed because line-info nodes have line/column/hitCount values +// and a metadata_node that differs from the traversal node. +struct TimeProfileNodeInfo { + const v8::CpuProfileNode* node; + const v8::CpuProfileNode* metadata_node; + int line_number; + int column_number; + int hit_count; + bool is_line_root; + TimeProfileViewState* state; +}; + +class TimeProfileNodeView { + public: + static NAN_MODULE_INIT(Init); +}; + +v8::Local BuildTimeProfileView(const v8::CpuProfile* profile, + bool has_cpu_time, + int64_t non_js_threads_cpu_time, + TimeProfileViewState& state); + v8::Local TranslateTimeProfile( const v8::CpuProfile* profile, bool includeLineInfo, diff --git a/ts/src/index.ts b/ts/src/index.ts index 92a7ec5c..81cda61d 100644 --- a/ts/src/index.ts +++ b/ts/src/index.ts @@ -34,6 +34,7 @@ export const time = { profile: timeProfiler.profile, start: timeProfiler.start, stop: timeProfiler.stop, + profileV2: timeProfiler.profileV2, getContext: timeProfiler.getContext, setContext: timeProfiler.setContext, runWithContext: timeProfiler.runWithContext, diff --git a/ts/src/profile-serializer.ts b/ts/src/profile-serializer.ts index 7566beef..7b3f0ec9 100644 --- a/ts/src/profile-serializer.ts +++ b/ts/src/profile-serializer.ts @@ -259,7 +259,7 @@ function createAllocationValueType(table: StringTable): ValueType { }); } -function computeTotalHitCount(root: TimeProfileNode): number { +export function computeTotalHitCount(root: TimeProfileNode): number { return ( root.hitCount + (root.children as TimeProfileNode[]).reduce( @@ -366,7 +366,8 @@ export function serializeTimeProfile( // For very short durations, computation becomes meaningless (eg. if there is only one hit), // therefore keep intervalMicros as a lower bound and 2 * intervalMicros as upper bound. if (recomputeSamplingInterval) { - const totalHitCount = computeTotalHitCount(prof.topDownRoot); + const totalHitCount = + prof.totalHitCount ?? computeTotalHitCount(prof.topDownRoot); if (totalHitCount > 0) { intervalMicros = Math.min( Math.max( diff --git a/ts/src/time-profiler.ts b/ts/src/time-profiler.ts index 42d2c1a8..ed2fb4df 100644 --- a/ts/src/time-profiler.ts +++ b/ts/src/time-profiler.ts @@ -27,7 +27,11 @@ import { getNativeThreadId, constants as profilerConstants, } from './time-profiler-bindings'; -import {GenerateTimeLabelsFunction, TimeProfilerMetrics} from './v8-types'; +import { + GenerateTimeLabelsFunction, + TimeProfile, + TimeProfilerMetrics, +} from './v8-types'; import {isMainThread} from 'worker_threads'; import {AsyncLocalStorage} from 'async_hooks'; const {kSampleCount} = profilerConstants; @@ -38,12 +42,45 @@ const DEFAULT_DURATION_MILLIS: Milliseconds = 60000; type Microseconds = number; type Milliseconds = number; -let gProfiler: InstanceType | undefined; +type NativeTimeProfiler = InstanceType & { + stopAndCollect?: ( + restart: boolean, + callback: (profile: TimeProfile) => T, + ) => T; +}; + +let gProfiler: NativeTimeProfiler | undefined; let gStore: AsyncLocalStorage | undefined; let gSourceMapper: SourceMapper | undefined; let gIntervalMicros: Microseconds; let gV8ProfilerStuckEventLoopDetected = 0; +function handleStopRestart() { + if (!gProfiler) { + return; + } + gV8ProfilerStuckEventLoopDetected = + gProfiler.v8ProfilerStuckEventLoopDetected(); + // Workaround for v8 bug, where profiler event processor thread is stuck in + // a loop eating 100% CPU, leading to empty profiles. + // Fully stop and restart the profiler to reset the profile to a valid state. + if (gV8ProfilerStuckEventLoopDetected > 0) { + gProfiler.stop(false); + gProfiler.start(); + } +} + +function handleStopNoRestart() { + gV8ProfilerStuckEventLoopDetected = 0; + gProfiler?.dispose(); + gProfiler = undefined; + gSourceMapper = undefined; + if (gStore !== undefined) { + gStore.disable(); + gStore = undefined; + } +} + /** Make sure to stop profiler before node shuts down, otherwise profiling * signal might cause a crash if it occurs during shutdown */ process.once('exit', () => { @@ -89,6 +126,13 @@ export async function profile(options: TimeProfilerOptions = {}) { return stop(); } +export async function profileV2(options: TimeProfilerOptions = {}) { + options = {...DEFAULT_OPTIONS, ...options}; + start(options); + await setTimeout(options.durationMillis!); + return stopV2(); +} + // Temporarily retained for backwards compatibility with older tracer export function start(options: TimeProfilerOptions = {}) { options = {...DEFAULT_OPTIONS, ...options}; @@ -122,17 +166,9 @@ export function stop( const profile = gProfiler.stop(restart); if (restart) { - gV8ProfilerStuckEventLoopDetected = - gProfiler.v8ProfilerStuckEventLoopDetected(); - // Workaround for v8 bug, where profiler event processor thread is stuck in - // a loop eating 100% CPU, leading to empty profiles. - // Fully stop and restart the profiler to reset the profile to a valid state. - if (gV8ProfilerStuckEventLoopDetected > 0) { - gProfiler.stop(false); - gProfiler.start(); - } + handleStopRestart(); } else { - gV8ProfilerStuckEventLoopDetected = 0; + handleStopNoRestart(); } const serializedProfile = serializeTimeProfile( @@ -143,14 +179,39 @@ export function stop( generateLabels, lowCardinalityLabels, ); - if (!restart) { - gProfiler.dispose(); - gProfiler = undefined; - gSourceMapper = undefined; - if (gStore !== undefined) { - gStore.disable(); - gStore = undefined; - } + return serializedProfile; +} + +/** + * Same as stop() but uses the lazy callback path: serialization happens inside + * a native callback while the V8 profile is still alive. + * This reduces memory overhead. + */ +export function stopV2( + restart = false, + generateLabels?: GenerateTimeLabelsFunction, + lowCardinalityLabels?: string[], +) { + if (!gProfiler) { + throw new Error('Wall profiler is not started'); + } + + const serializedProfile = gProfiler.stopAndCollect( + restart, + (profile: TimeProfile) => + serializeTimeProfile( + profile, + gIntervalMicros, + gSourceMapper, + true, + generateLabels, + lowCardinalityLabels, + ), + ); + if (restart) { + handleStopRestart(); + } else { + handleStopNoRestart(); } return serializedProfile; } diff --git a/ts/src/v8-types.ts b/ts/src/v8-types.ts index 1becf7d2..87fbc333 100644 --- a/ts/src/v8-types.ts +++ b/ts/src/v8-types.ts @@ -25,6 +25,8 @@ export interface TimeProfile { hasCpuTime?: boolean; /** CPU time of non-JS threads, only reported for the main worker thread */ nonJSThreadsCpuTime?: number; + /** Computed in C++ to avoid a full JS tree traversal. */ + totalHitCount?: number; } export interface ProfileNode { diff --git a/ts/test/test-time-profiler.ts b/ts/test/test-time-profiler.ts index 559247f1..87c8e4a5 100644 --- a/ts/test/test-time-profiler.ts +++ b/ts/test/test-time-profiler.ts @@ -16,6 +16,7 @@ import * as sinon from 'sinon'; import {time, getNativeThreadId} from '../src'; +import {profileV2, stopV2} from '../src/time-profiler'; import * as v8TimeProfiler from '../src/time-profiler-bindings'; import {timeProfile, v8TimeProfile} from './profiles-for-tests'; import {hrtime} from 'process'; @@ -24,6 +25,7 @@ import {AssertionError} from 'assert'; import {GenerateTimeLabelsArgs, LabelSet} from '../src/v8-types'; import {satisfies} from 'semver'; import {setTimeout as setTimeoutPromise} from 'timers/promises'; +import {fork} from 'child_process'; import assert from 'assert'; @@ -495,6 +497,145 @@ describe('Time Profiler', () => { }); }); + describe('profileV2', () => { + it('should exclude program and idle time', async () => { + const profile = await time.profileV2(PROFILE_OPTIONS); + assert.ok(profile.stringTable); + assert.equal(profile.stringTable.strings!.indexOf('(program)'), -1); + }); + + it('should preserve line-number root children metadata in lazy view', function () { + if (unsupportedPlatform) { + this.skip(); + } + + function hotPath() { + const end = hrtime.bigint() + 2_000_000n; + while (hrtime.bigint() < end); + } + + const profiler = new v8TimeProfiler.TimeProfiler({ + intervalMicros: 100, + durationMillis: 200, + lineNumbers: true, + withContexts: false, + workaroundV8Bug: false, + collectCpuTime: false, + collectAsyncId: false, + useCPED: false, + isMainThread: true, + }); + + profiler.start(); + try { + const deadline = Date.now() + 200; + while (Date.now() < deadline) { + hotPath(); + } + + let sawRootChildren = false; + let sawChildWithNonRootMetadata = false; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + profiler.stopAndCollect(false, (profile: any) => { + const root = profile.topDownRoot as { + name: string; + scriptName: string; + scriptId: number; + children: Array<{ + name: string; + scriptName: string; + scriptId: number; + }>; + }; + const children = root.children; + + sawRootChildren = children.length > 0; + sawChildWithNonRootMetadata = children.some( + child => + child.name !== root.name || + child.scriptName !== root.scriptName || + child.scriptId !== root.scriptId, + ); + return undefined; + }); + + assert(sawRootChildren, 'Expected root to have children'); + assert( + sawChildWithNonRootMetadata, + 'Line-number lazy root children should not collapse to root metadata', + ); + } finally { + profiler.dispose(); + } + }); + }); + + describe('profileV2 (w/ stubs)', () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const sinonStubs: Array> = []; + const timeProfilerStub = { + start: sinon.stub(), + // stopAndCollect invokes the callback synchronously with the raw profile, + // mirroring what the native binding does. + stopAndCollect: sinon + .stub() + .callsFake( + (_restart: boolean, cb: (p: typeof v8TimeProfile) => unknown) => + cb(v8TimeProfile), + ), + dispose: sinon.stub(), + v8ProfilerStuckEventLoopDetected: sinon.stub().returns(0), + }; + + before(() => { + sinonStubs.push( + sinon.stub(v8TimeProfiler, 'TimeProfiler').returns(timeProfilerStub), + ); + sinonStubs.push(sinon.stub(Date, 'now').returns(0)); + }); + + after(() => { + sinonStubs.forEach(stub => stub.restore()); + }); + + it('should profile during duration and finish profiling after duration', async () => { + let isProfiling = true; + void profileV2(PROFILE_OPTIONS).then(() => { + isProfiling = false; + }); + await setTimeoutPromise(2 * PROFILE_OPTIONS.durationMillis); + assert.strictEqual(false, isProfiling, 'profiler is still running'); + }); + + it('should return a profile equal to the expected profile', async () => { + const profile = await profileV2(PROFILE_OPTIONS); + assert.deepEqual(timeProfile, profile); + }); + + it('should be able to restart when stopping', async () => { + time.start({intervalMicros: PROFILE_OPTIONS.intervalMicros}); + timeProfilerStub.start.resetHistory(); + timeProfilerStub.stopAndCollect.resetHistory(); + + assert.deepEqual(timeProfile, stopV2(true)); + assert.equal( + time.v8ProfilerStuckEventLoopDetected(), + 0, + 'v8 bug detected', + ); + sinon.assert.notCalled(timeProfilerStub.start); + sinon.assert.calledOnce(timeProfilerStub.stopAndCollect); + + timeProfilerStub.start.resetHistory(); + timeProfilerStub.stopAndCollect.resetHistory(); + + assert.deepEqual(timeProfile, stopV2()); + sinon.assert.notCalled(timeProfilerStub.start); + sinon.assert.calledOnce(timeProfilerStub.stopAndCollect); + }); + }); + describe('v8BugWorkaround (w/ stubs)', () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any const sinonStubs: Array> = []; @@ -723,6 +864,50 @@ describe('Time Profiler', () => { }); }); + describe('Memory comparison', () => { + interface WorkerMemoryResult { + initial: number; + afterTraversal: number; + afterHitCount: number; + } + + function measureMemoryInWorker( + version: 'v1' | 'v2', + ): Promise { + return new Promise((resolve, reject) => { + const child = fork('./out/test/time-memory-worker.js', [], { + execArgv: ['--expose-gc'], + }); + + child.on('message', (result: WorkerMemoryResult) => { + resolve(result); + child.kill(); + }); + + child.on('error', reject); + child.send(version); + }); + } + + it('stopAndCollect should use less memory than stop when profile is large', async function () { + if (unsupportedPlatform) { + this.skip(); + } + + const v1 = await measureMemoryInWorker('v1'); + const v2 = await measureMemoryInWorker('v2'); + + console.log('v1 : ', v1.initial, v1.afterTraversal, v1.afterHitCount); + console.log('v2 : ', v2.initial, v2.afterTraversal, v2.afterHitCount); + + // V2 creates almost nothing upfront — lazy wrappers vs full eager tree. + assert.ok( + v2.initial < v1.initial, + `V2 initial should be less: V1=${v1.initial}, V2=${v2.initial}`, + ); + }).timeout(120_000); + }); + describe('getNativeThreadId', () => { it('should return a number', () => { const threadId = getNativeThreadId(); diff --git a/ts/test/time-memory-worker.ts b/ts/test/time-memory-worker.ts new file mode 100644 index 00000000..06de395f --- /dev/null +++ b/ts/test/time-memory-worker.ts @@ -0,0 +1,153 @@ +import {TimeProfiler} from '../src/time-profiler-bindings'; +import {ProfileNode, TimeProfile, TimeProfileNode} from '../src/v8-types'; +import {computeTotalHitCount} from '../src/profile-serializer'; + +const gc = (global as NodeJS.Global & {gc?: () => void}).gc; +if (!gc) { + throw new Error('Run with --expose-gc flag'); +} + +const SCRIPT_PADDING = 'a'.repeat(250); + +function createUniqueFunctions(count: number): Array<() => void> { + const fns: Array<() => void> = []; + for (let i = 0; i < count; i++) { + const fn = new Function( + `//# sourceURL=wide_fn_${i}_${SCRIPT_PADDING}.js\n` + + `var x${i}=0,e${i}=Date.now()+1;while(Date.now() void; + fns.push(fn); + } + return fns; +} + +function createDeepCallChain(chainId: number, depth: number): () => void { + let innermost: (() => void) | null = null; + for (let i = depth - 1; i >= 0; i--) { + const next = innermost; + innermost = new Function( + 'next', + `//# sourceURL=chain_${chainId}_d${i}_${SCRIPT_PADDING}.js\n` + + 'var c=0,e=Date.now()+1;while(Date.now() void; + } + return innermost!; +} + +const CHAIN_STRIDE = 30; + +function generateCpuWork( + wideFns: Array<() => void>, + deepChains: Array<() => void>, + durationMs: number, +): void { + const deadline = Date.now() + durationMs; + let i = 0; + while (Date.now() < deadline) { + wideFns[i % wideFns.length](); + if (i % CHAIN_STRIDE === 0) { + deepChains[(i / CHAIN_STRIDE) % deepChains.length](); + } + i++; + } +} + +const WIDE_FN_COUNT = 5000; +const CHAIN_COUNT = 100; +const CHAIN_DEPTH = 60; + +const PROFILER_OPTIONS = { + intervalMicros: 50, + durationMillis: 20_000, + lineNumbers: true, + withContexts: false, + workaroundV8Bug: false, + collectCpuTime: false, + collectAsyncId: false, + useCPED: false, + isMainThread: true, +}; + +function buildWorkload() { + const wideFns = createUniqueFunctions(WIDE_FN_COUNT); + const deepChains: Array<() => void> = []; + for (let c = 0; c < CHAIN_COUNT; c++) { + deepChains.push(createDeepCallChain(c, CHAIN_DEPTH)); + } + return {wideFns, deepChains}; +} + +function traverseTree(root: TimeProfileNode): void { + const stack: ProfileNode[] = [root]; + while (stack.length > 0) { + const node = stack.pop()!; + for (const child of node.children) { + stack.push(child); + } + } +} + +interface MemoryResult { + initial: number; + afterTraversal: number; + afterHitCount: number; +} + +function measureV1(): MemoryResult { + const {wideFns, deepChains} = buildWorkload(); + const profiler = new TimeProfiler(PROFILER_OPTIONS); + profiler.start(); + generateCpuWork(wideFns, deepChains, PROFILER_OPTIONS.durationMillis); + + gc!(); + const baseline = process.memoryUsage().heapUsed; + + const profile: TimeProfile = profiler.stop(false); + const initial = process.memoryUsage().heapUsed - baseline; + + traverseTree(profile.topDownRoot); + const afterTraversal = process.memoryUsage().heapUsed - baseline; + + // V1: computeTotalHitCount triggers children getters on every node, + // creating JS wrapper objects for a second full tree traversal. + computeTotalHitCount(profile.topDownRoot); + const afterHitCount = process.memoryUsage().heapUsed - baseline; + + profiler.dispose(); + return {initial, afterTraversal, afterHitCount}; +} + +function measureV2(): MemoryResult { + const {wideFns, deepChains} = buildWorkload(); + const profiler = new TimeProfiler(PROFILER_OPTIONS); + profiler.start(); + generateCpuWork(wideFns, deepChains, PROFILER_OPTIONS.durationMillis); + + gc!(); + const baseline = process.memoryUsage().heapUsed; + + const result = profiler.stopAndCollect( + false, + (profile: TimeProfile): MemoryResult => { + const initial = process.memoryUsage().heapUsed - baseline; + + traverseTree(profile.topDownRoot); + const afterTraversal = process.memoryUsage().heapUsed - baseline; + + // V2: totalHitCount is pre-computed in C++ — just a property read, + // no JS tree traversal, no wrapper objects created. + void profile.totalHitCount; + const afterHitCount = process.memoryUsage().heapUsed - baseline; + + return {initial, afterTraversal, afterHitCount}; + }, + ); + + profiler.dispose(); + return result; +} + +process.on('message', (version: 'v1' | 'v2') => { + const result = version === 'v1' ? measureV1() : measureV2(); + process.send!(result); +});