diff --git a/eng/testing/scenarios/BuildWasmAppsJobsListCoreCLR.txt b/eng/testing/scenarios/BuildWasmAppsJobsListCoreCLR.txt index 8267a0181a2a35..336825869e84db 100644 --- a/eng/testing/scenarios/BuildWasmAppsJobsListCoreCLR.txt +++ b/eng/testing/scenarios/BuildWasmAppsJobsListCoreCLR.txt @@ -23,3 +23,4 @@ Wasm.Build.Tests.MemoryTests Wasm.Build.Tests.AppSettingsTests Wasm.Build.Tests.Blazor.AppsettingsTests Wasm.Build.Tests.DownloadThenInitTests +Wasm.Build.Tests.Blazor.EventPipeDiagnosticsTests diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index a1c4debad37ca7..6920b2150cc1af 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -45,6 +45,13 @@ static const char *g_stackTypeString[] = { "I4", "I8", "R4", "R8", "O ", "VT", " const char* CorInfoHelperToName(CorInfoHelpFunc helper); +#ifdef PERFTRACING_DISABLE_THREADS +bool InterpCompiler::s_samplingProfilerEnabled = false; +#ifdef TARGET_BROWSER +bool InterpCompiler::s_browserProfilerEnabled = false; +#endif +#endif // PERFTRACING_DISABLE_THREADS + #if MEASURE_MEM_ALLOC #include @@ -2203,6 +2210,16 @@ InterpCompiler::InterpCompiler(COMP_HANDLE compHnd, DWORD jitFlagsSize = m_compHnd->getJitFlags(&m_corJitFlags, sizeof(m_corJitFlags)); assert(jitFlagsSize == sizeof(m_corJitFlags)); +#ifdef PERFTRACING_DISABLE_THREADS + m_emitSamplingProfiler = s_samplingProfilerEnabled + && InterpConfig.WasmPerformanceInstrumentation().contains(compHnd, m_methodHnd, m_classHnd, &m_methodInfo->args); + +#ifdef TARGET_BROWSER + m_emitBrowserProfiler = s_browserProfilerEnabled + && InterpConfig.WasmPerformanceInstrumentation().contains(compHnd, m_methodHnd, m_classHnd, &m_methodInfo->args); +#endif +#endif // PERFTRACING_DISABLE_THREADS + #ifdef DEBUG m_methodName = ::PrintMethodName(compHnd, m_classHnd, m_methodHnd, &m_methodInfo->args, /* includeAssembly */ false, @@ -2912,7 +2929,13 @@ void InterpCompiler::EmitBranch(InterpOpcode opcode, int32_t ilOffset) // Backwards branch, emit safepoint if (ilOffset < 0) + { AddIns(INTOP_SAFEPOINT); +#ifdef PERFTRACING_DISABLE_THREADS + if (m_emitSamplingProfiler) + AddIns(INTOP_PROF_SAMPLEPOINT); +#endif // PERFTRACING_DISABLE_THREADS + } InterpBasicBlock *pTargetBB = m_ppOffsetToBB[target]; if (pTargetBB == NULL) @@ -5734,6 +5757,13 @@ void InterpCompiler::EmitRet(CORINFO_METHOD_INFO* methodInfo) return; } +#ifdef PERFTRACING_DISABLE_THREADS +#ifdef TARGET_BROWSER + if (m_emitBrowserProfiler) + AddIns(INTOP_PROF_LEAVE); +#endif // TARGET_BROWSER +#endif // PERFTRACING_DISABLE_THREADS + if (m_methodInfo->args.isAsyncCall()) { // We're doing a standard return. Set the continuation return to NULL. @@ -8257,6 +8287,17 @@ void InterpCompiler::GenerateCode(CORINFO_METHOD_INFO* methodInfo) // Safepoint at each method entry. This could be done as part of a call, rather than // adding an opcode. AddIns(INTOP_SAFEPOINT); +#ifdef PERFTRACING_DISABLE_THREADS + if (m_emitSamplingProfiler) + AddIns(INTOP_PROF_SAMPLEPOINT); +#ifdef TARGET_BROWSER + if (m_emitBrowserProfiler) + { + AddIns(INTOP_PROF_ENTER); + m_pLastNewIns->data[0] = GetMethodDataItemIndex(m_methodHnd); + } +#endif // TARGET_BROWSER +#endif // PERFTRACING_DISABLE_THREADS if (m_continuationArgIndex != -1) { diff --git a/src/coreclr/interpreter/compiler.h b/src/coreclr/interpreter/compiler.h index 3b5acb11c47a3a..cdcd1aa2b847c2 100644 --- a/src/coreclr/interpreter/compiler.h +++ b/src/coreclr/interpreter/compiler.h @@ -627,6 +627,12 @@ class InterpCompiler COMP_HANDLE m_compHnd; CORINFO_METHOD_INFO* m_methodInfo; CORJIT_FLAGS m_corJitFlags; +#ifdef PERFTRACING_DISABLE_THREADS + bool m_emitSamplingProfiler; +#ifdef TARGET_BROWSER + bool m_emitBrowserProfiler; +#endif +#endif // PERFTRACING_DISABLE_THREADS void DeclarePointerIsClass(CORINFO_CLASS_HANDLE clsHnd) { @@ -1112,6 +1118,13 @@ class InterpCompiler int32_t* GetCode(int32_t *pCodeSize); +#ifdef PERFTRACING_DISABLE_THREADS + static bool s_samplingProfilerEnabled; +#ifdef TARGET_BROWSER + static bool s_browserProfilerEnabled; +#endif +#endif // PERFTRACING_DISABLE_THREADS + #if MEASURE_MEM_ALLOC // Memory statistics for profiling. using InterpMemStats = MemStats; diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp index c7569a9e103174..0f827b52b79a6d 100644 --- a/src/coreclr/interpreter/eeinterp.cpp +++ b/src/coreclr/interpreter/eeinterp.cpp @@ -29,6 +29,18 @@ extern "C" INTERP_API void jitStartup(ICorJitHost* jitHost) InterpCompiler::initMemStats(); #endif + // Enable profiling instrumentation if DOTNET_WasmPerformanceInstrumentation is set. + // This must happen before any managed code is compiled so all methods get samplepoints. + if (!InterpConfig.WasmPerformanceInstrumentation().isEmpty()) + { +#ifdef PERFTRACING_DISABLE_THREADS + InterpCompiler::s_samplingProfilerEnabled = true; +#ifdef TARGET_BROWSER + InterpCompiler::s_browserProfilerEnabled = true; +#endif +#endif // PERFTRACING_DISABLE_THREADS + } + g_interpInitialized = true; } /*****************************************************************************/ diff --git a/src/coreclr/interpreter/inc/intops.def b/src/coreclr/interpreter/inc/intops.def index f2cdcbbc1b4eb4..0c3822cc16b904 100644 --- a/src/coreclr/interpreter/inc/intops.def +++ b/src/coreclr/interpreter/inc/intops.def @@ -75,6 +75,9 @@ OPDEF(INTOP_LDLOCA, "ldloca", 3, 1, 0, InterpOpInt) OPDEF(INTOP_SWITCH, "switch", 0, 0, 1, InterpOpSwitch) OPDEF(INTOP_SAFEPOINT, "safepoint", 1, 0, 0, InterpOpNoArgs) +OPDEF(INTOP_PROF_SAMPLEPOINT, "prof.samplepoint", 1, 0, 0, InterpOpNoArgs) +OPDEF(INTOP_PROF_ENTER, "prof.enter", 2, 0, 0, InterpOpMethodHandle) +OPDEF(INTOP_PROF_LEAVE, "prof.leave", 1, 0, 0, InterpOpNoArgs) OPDEF(INTOP_BR, "br", 2, 0, 0, InterpOpBranch) OPDEF(INTOP_BRFALSE_I4, "brfalse.i4", 3, 0, 1, InterpOpBranch) diff --git a/src/coreclr/interpreter/interpconfigvalues.h b/src/coreclr/interpreter/interpconfigvalues.h index 19549942875f29..74e8bf1a40e52e 100644 --- a/src/coreclr/interpreter/interpconfigvalues.h +++ b/src/coreclr/interpreter/interpconfigvalues.h @@ -36,6 +36,7 @@ RELEASE_CONFIG_INTEGER(InterpMode, "InterpMode", 0); // Interpreter mode, one of // 3: use interpreter for everything, the full interpreter-only mode, no fallbacks to R2R or JIT whatsoever. Implies DOTNET_ReadyToRun=0, DOTNET_EnableHWIntrinsic=0 RELEASE_CONFIG_INTEGER(DisplayMemStats, "JitMemStats", 0); // Display interpreter memory usage statistics (0=off, 1=summary, 2=detailed per-method) +RELEASE_CONFIG_METHODSET(WasmPerformanceInstrumentation, "WasmPerformanceInstrumentation") // Method filter for WASM performance instrumentation profiler. Uses standard MethodSet pattern format. #undef CONFIG_STRING #undef RELEASE_CONFIG_STRING diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 0c101444bd7918..42eac9603802f2 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -954,6 +954,11 @@ elseif(CLR_CMAKE_TARGET_ARCH_WASM) exceptionhandling.cpp gcinfodecoder.cpp ) + if (CLR_CMAKE_TARGET_BROWSER) + list(APPEND VM_SOURCES_WKS_ARCH + ${ARCH_SOURCES_DIR}/browserprofiler.cpp + ) + endif(CLR_CMAKE_TARGET_BROWSER) set(VM_SOURCES_WKS_GEN ${ARCH_SOURCES_DIR}/callhelpers-interp-to-managed.cpp ${ARCH_SOURCES_DIR}/callhelpers-reverse.cpp diff --git a/src/coreclr/vm/eventing/eventpipe/CMakeLists.txt b/src/coreclr/vm/eventing/eventpipe/CMakeLists.txt index 5822b478b2d63e..4b5d8cb6106809 100644 --- a/src/coreclr/vm/eventing/eventpipe/CMakeLists.txt +++ b/src/coreclr/vm/eventing/eventpipe/CMakeLists.txt @@ -30,6 +30,7 @@ add_custom_command(OUTPUT ${GEN_EVENTPIPE_SOURCES} list(APPEND CORECLR_EVENTPIPE_SHIM_SOURCES ep-rt-coreclr.cpp + ep-rt-coreclr-wasm-sampling.cpp ) list(APPEND CORECLR_EVENTPIPE_SHIM_HEADERS diff --git a/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr-wasm-sampling.cpp b/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr-wasm-sampling.cpp new file mode 100644 index 00000000000000..2f2c04a0cde250 --- /dev/null +++ b/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr-wasm-sampling.cpp @@ -0,0 +1,186 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +#ifdef ENABLE_PERFTRACING + +#include +#include +#include +#include +#include +#include "threadsuspend.h" + +#ifdef TARGET_BROWSER +#include +#endif + + +// State for single-threaded EP sampling profiler. +// On single-threaded WASM, sampling is cooperative: the interpreter calls +// SamplingProfiler_OnSamplepoint() at backward branches (loop iterations) +// and method entry. A skip counter provides a fast path, and when the +// counter expires we check if enough wall-clock time has elapsed to +// justify taking a real sample. + +static EventPipeEvent *s_currentSamplingEvent = nullptr; +static Thread *s_currentSamplingThread = nullptr; + +// Adaptive sampling state. +// s_skipsPerPeriod is the number of samplepoints to skip between actual +// samples. It is adaptively adjusted so that samples occur approximately +// once per s_desiredSampleIntervalMs. +static double s_desiredSampleIntervalMs = 10.0; +static double s_lastSampleTimeMs = 0.0; +static int32_t s_prevSkipsPerPeriod = 1; +static int32_t s_skipsPerPeriod = 1; +static int32_t s_sampleSkipCounter = 0; + +// Returns the current time in milliseconds using the same high-resolution +// timer as EventPipe timestamps (performance.now() on browser WASM). +static double GetCurrentTimeMs() +{ +#ifdef TARGET_BROWSER + return emscripten_get_now(); +#else + return (double)minipal_hires_ticks() * 1000.0 / (double)minipal_hires_tick_frequency(); +#endif +} + +// Recalculates s_skipsPerPeriod based on how long the last period actually +// took relative to the desired interval. This is the same exponential +// moving average approach used by Mono's ep-rt-mono-runtime-provider.c. +static void UpdateSampleFrequency() +{ + double now = GetCurrentTimeMs(); + + if (s_lastSampleTimeMs > 0.0) + { + double elapsed = now - s_lastSampleTimeMs; + if (elapsed > 0.0) + { + double ratio = s_desiredSampleIntervalMs / elapsed; + int32_t newSkips = (int32_t)((double)s_prevSkipsPerPeriod * ratio); + if (newSkips < 1) + newSkips = 1; + if (newSkips > 10000) + newSkips = 10000; + + s_prevSkipsPerPeriod = s_skipsPerPeriod; + s_skipsPerPeriod = newSkips; + } + } + + s_lastSampleTimeMs = now; +} + +#ifndef PERFTRACING_DISABLE_THREADS + +// On multi-threaded builds the sample profiler runs on a dedicated +// thread, so these callbacks are no-ops. + +void ep_rt_coreclr_sample_profiler_enabled(EventPipeEvent *samplingEvent) +{ +} + +void ep_rt_coreclr_sample_profiler_session_enabled(void) +{ +} + +void ep_rt_coreclr_sample_profiler_disabled(void) +{ +} + +#else // PERFTRACING_DISABLE_THREADS + +// The following functions are EP runtime callbacks invoked only on +// single-threaded builds where the regular threaded sample profiler +// cannot run. + +void ep_rt_coreclr_sample_profiler_enabled(EventPipeEvent *samplingEvent) +{ + s_currentSamplingEvent = samplingEvent; + s_currentSamplingThread = GetThread(); + + s_desiredSampleIntervalMs = (double)ep_sample_profiler_get_sampling_rate() / 1000000.0; + + s_lastSampleTimeMs = 0.0; + s_prevSkipsPerPeriod = 1; + s_skipsPerPeriod = 1; + s_sampleSkipCounter = 0; +} + +void ep_rt_coreclr_sample_profiler_session_enabled(void) +{ + if (s_currentSamplingEvent == nullptr || s_currentSamplingThread == nullptr) + return; + + EventPipeStackContents stackContents; + EventPipeStackContents *pStackContents = ep_stack_contents_init(&stackContents); + + uint32_t payloadData = EP_SAMPLE_PROFILER_SAMPLE_TYPE_MANAGED; + + ep_write_sample_profile_event( + s_currentSamplingThread, + s_currentSamplingEvent, + s_currentSamplingThread, + pStackContents, + (uint8_t *)&payloadData, + sizeof(payloadData)); + + ep_stack_contents_fini(pStackContents); +} + +void ep_rt_coreclr_sample_profiler_disabled(void) +{ + s_currentSamplingEvent = nullptr; + s_currentSamplingThread = nullptr; + s_sampleSkipCounter = 0; + s_skipsPerPeriod = 1; +} + +// Called from the interpreter's INTOP_PROF_SAMPLEPOINT handler. +// On single-threaded WASM this is the cooperative sampling entry point. +// On multi-threaded platforms the opcode is never emitted. +extern "C" void SamplingProfiler_OnSamplepoint() +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } CONTRACTL_END; + + if (++s_sampleSkipCounter < s_skipsPerPeriod) + return; + + s_sampleSkipCounter = 0; + + if (s_currentSamplingEvent == nullptr || s_currentSamplingThread == nullptr) + return; + + UpdateSampleFrequency(); + + EventPipeStackContents stackContents; + EventPipeStackContents *pStackContents = ep_stack_contents_init(&stackContents); + + if (ep_rt_coreclr_walk_managed_stack_for_thread(s_currentSamplingThread, pStackContents) + && !ep_stack_contents_is_empty(pStackContents)) + { + uint32_t payloadData = EP_SAMPLE_PROFILER_SAMPLE_TYPE_MANAGED; + + ep_write_sample_profile_event( + s_currentSamplingThread, + s_currentSamplingEvent, + s_currentSamplingThread, + pStackContents, + (uint8_t *)&payloadData, + sizeof(payloadData)); + } + + ep_stack_contents_fini(pStackContents); +} + +#endif // PERFTRACING_DISABLE_THREADS + +#endif // ENABLE_PERFTRACING diff --git a/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.h b/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.h index bc331427218e0b..c32ff8f30bfbc1 100644 --- a/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.h +++ b/src/coreclr/vm/eventing/eventpipe/ep-rt-coreclr.h @@ -51,6 +51,10 @@ #undef EP_ALIGN_UP #define EP_ALIGN_UP(val,align) ALIGN_UP(val,align) +extern void ep_rt_coreclr_sample_profiler_enabled (EventPipeEvent *sampling_event); +extern void ep_rt_coreclr_sample_profiler_session_enabled (void); +extern void ep_rt_coreclr_sample_profiler_disabled (void); + static inline ep_rt_lock_handle_t * @@ -610,7 +614,7 @@ void ep_rt_sample_profiler_enabled (EventPipeEvent *sampling_event) { STATIC_CONTRACT_NOTHROW; - // no-op + ep_rt_coreclr_sample_profiler_enabled (sampling_event); } static @@ -619,7 +623,7 @@ void ep_rt_sample_profiler_session_enabled (void) { STATIC_CONTRACT_NOTHROW; - // no-op + ep_rt_coreclr_sample_profiler_session_enabled (); } static @@ -628,7 +632,7 @@ void ep_rt_sample_profiler_disabled (void) { STATIC_CONTRACT_NOTHROW; - // no-op + ep_rt_coreclr_sample_profiler_disabled (); } static diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 4ef4ad40c9617f..7ef6d91d3506ed 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -12,6 +12,14 @@ #include "gchelpers.inl" #include "arraynative.inl" +#ifdef TARGET_WASM +extern "C" void SamplingProfiler_OnSamplepoint(); +#endif + +#if defined(TARGET_BROWSER) && defined(PERFTRACING_DISABLE_THREADS) +#include "wasm/browserprofiler.h" +#endif + // for numeric_limits #include #include @@ -1943,6 +1951,25 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr ip++; break; +#ifdef PERFTRACING_DISABLE_THREADS + case INTOP_PROF_SAMPLEPOINT: + SamplingProfiler_OnSamplepoint(); + ip++; + break; +#endif // PERFTRACING_DISABLE_THREADS + +#if defined(TARGET_BROWSER) && defined(PERFTRACING_DISABLE_THREADS) + case INTOP_PROF_ENTER: + BrowserProfiler_OnMethodEnter(pMethod->pDataItems[ip[1]]); + ip += 2; + break; + + case INTOP_PROF_LEAVE: + BrowserProfiler_OnMethodLeave(pMethod->methodHnd); + ip++; + break; +#endif // TARGET_BROWSER && PERFTRACING_DISABLE_THREADS + case INTOP_BR: ip += ip[1]; break; @@ -3426,6 +3453,9 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr if (frameNeedsTailcallUpdate) { +#if defined(TARGET_BROWSER) && defined(PERFTRACING_DISABLE_THREADS) + BrowserProfiler_OnMethodLeave(pMethod->methodHnd); +#endif InterpMethod* pTargetMethod = targetIp->Method; UpdateFrameForTailCall(pFrame, targetIp, callArgsAddress); frameNeedsTailcallUpdate = false; @@ -4654,6 +4684,9 @@ do \ // Thus, we need to rethrow it to let it propagate further. throw; } +#if defined(TARGET_BROWSER) && defined(PERFTRACING_DISABLE_THREADS) + BrowserProfiler_OnMethodLeave(pFrame->startIp->Method->methodHnd); +#endif pThreadContext->frameDataAllocator.PopInfo(pFrame); pFrame->ip = 0; pFrame = pFrame->pParent; diff --git a/src/coreclr/vm/wasm/browserprofiler.cpp b/src/coreclr/vm/wasm/browserprofiler.cpp new file mode 100644 index 00000000000000..8c44783322e039 --- /dev/null +++ b/src/coreclr/vm/wasm/browserprofiler.cpp @@ -0,0 +1,183 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +#if defined(TARGET_BROWSER) && defined(PERFTRACING_DISABLE_THREADS) + +#include +#include "method.hpp" +#include "typestring.h" +#include "wasm/browserprofiler.h" + +extern "C" { + void ds_rt_browser_performance_measure(void* pMethodDesc, double start); +} + +static constexpr int MAX_STACK_DEPTH = 600; + +struct ProfilerStackFrame +{ + MethodDesc *pMethod; + double startMs; + bool shouldRecord; +}; + +static ProfilerStackFrame s_profilerStack[MAX_STACK_DEPTH]; +static int s_topStackFrameIndex = -1; + +// Number of method enters that occurred while the shadow stack was already +// full. These frames are not recorded; the counter keeps subsequent leaves +// balanced so the profiler degrades gracefully instead of overflowing. +static int s_overflowDepth = 0; + +// Adaptive recording state — controls how often we actually call +// performance.measure(). The shadow stack always tracks enter/leave for +// correctness, but recording is rate-limited so it occurs approximately +// once per s_desiredRecordIntervalMs. This mirrors the exponential moving +// average approach used by the EventPipe sampling profiler in +// ep-rt-coreclr-wasm-sampling.cpp (kept as a separate copy on purpose). +static double s_desiredRecordIntervalMs = 1.0; +static double s_lastRecordTimeMs = 0.0; +static int32_t s_prevSkipsPerPeriod = 1; +static int32_t s_skipsPerPeriod = 10; +static int32_t s_recordSkipCounter = 0; + +// Recalculates s_skipsPerPeriod based on how long the last period actually +// took relative to the desired interval. +static void UpdateRecordFrequency() +{ + double now = emscripten_get_now(); + + if (s_lastRecordTimeMs > 0.0) + { + double elapsed = now - s_lastRecordTimeMs; + if (elapsed > 0.0) + { + double ratio = s_desiredRecordIntervalMs / elapsed; + int32_t newSkips = (int32_t)((double)s_prevSkipsPerPeriod * ratio); + if (newSkips < 1) + newSkips = 1; + if (newSkips > 10000) + newSkips = 10000; + + s_prevSkipsPerPeriod = s_skipsPerPeriod; + s_skipsPerPeriod = newSkips; + } + } + + s_lastRecordTimeMs = now; +} + +static bool ShouldRecordFrame() +{ + if (++s_recordSkipCounter < s_skipsPerPeriod) + return false; + + s_recordSkipCounter = 0; + UpdateRecordFrequency(); + + return true; +} + +void BrowserProfiler_OnMethodEnter(void *pMethodDesc) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } CONTRACTL_END; + + MethodDesc *pMD = (MethodDesc *)pMethodDesc; + + if (s_topStackFrameIndex + 1 >= MAX_STACK_DEPTH) + { + // Shadow stack is full. Stop recording deeper frames but keep + // counting them so the matching leaves stay balanced. + s_overflowDepth++; + return; + } + + s_topStackFrameIndex++; + ProfilerStackFrame *frame = &s_profilerStack[s_topStackFrameIndex]; + frame->pMethod = pMD; + frame->startMs = emscripten_get_now(); + frame->shouldRecord = ShouldRecordFrame(); +} + +void BrowserProfiler_OnMethodLeave(void *pMethodDesc) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } CONTRACTL_END; + + // Unwind frames that were dropped because the shadow stack was full. + if (s_overflowDepth > 0) + { + s_overflowDepth--; + return; + } + + if (s_topStackFrameIndex < 0) + return; + + // Find the matching frame from the top down. The common case is that the + // top frame matches (O(1)). Scanning downwards makes the shadow stack + // self-healing: if some exit path failed to emit a leave (e.g. an unwind + // route the profiler doesn't hook), the next ancestor leave discards the + // orphaned frames above it instead of leaking them forever. + int idx = s_topStackFrameIndex; + while (idx >= 0 && s_profilerStack[idx].pMethod != (MethodDesc *)pMethodDesc) + idx--; + + // No matching enter was recorded for this method (filtered out, or an + // unbalanced leave). Leave the stack untouched. + if (idx < 0) + return; + + ProfilerStackFrame *frame = &s_profilerStack[idx]; + + if (frame->shouldRecord) + { + // Pass the MethodDesc* to JS, which caches the formatted name by + // pointer and only calls back into SystemJS_GetMethodName() + // on a cache miss. + ds_rt_browser_performance_measure(frame->pMethod, frame->startMs); + + // Mark parent frame for recording so the flame chart nests properly. + if (idx > 0) + s_profilerStack[idx - 1].shouldRecord = true; + } + + // Pop the matched frame along with any orphaned frames above it. + s_topStackFrameIndex = idx - 1; +} + +// Formats the name of a MethodDesc* into a freshly malloc'd UTF-8 string. +// Called from JS only on a cache miss; the JS caller owns the returned +// buffer and must free() it. Returns NULL on allocation failure. +extern "C" const char* SystemJS_GetMethodName(void *pMethodDesc) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + } CONTRACTL_END; + + MethodDesc *pMD = (MethodDesc *)pMethodDesc; + + SString methodName; + TypeString::AppendMethodInternal(methodName, pMD, TypeString::FormatBasic); + + const char *utf8 = methodName.GetUTF8(); + size_t size = strlen(utf8) + 1; + char *result = (char *)malloc(size); + if (result != NULL) + memcpy(result, utf8, size); + + return result; +} + +#endif // TARGET_BROWSER && PERFTRACING_DISABLE_THREADS diff --git a/src/coreclr/vm/wasm/browserprofiler.h b/src/coreclr/vm/wasm/browserprofiler.h new file mode 100644 index 00000000000000..c0de29c97b7be6 --- /dev/null +++ b/src/coreclr/vm/wasm/browserprofiler.h @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef BROWSERPROFILER_H +#define BROWSERPROFILER_H + +#ifdef TARGET_BROWSER + +// Browser DevTools profiler for CoreCLR interpreter on WASM. +// Records method enter/leave events to the browser's Performance tab +// via performance.measure(). Uses a shadow stack to track method timing. +// +// The shadow stack is not thread-safe, so the profiler is only available on +// single-threaded (PERFTRACING_DISABLE_THREADS) builds, matching the +// condition under which INTOP_PROF_ENTER/INTOP_PROF_LEAVE are emitted. +#ifdef PERFTRACING_DISABLE_THREADS + +void BrowserProfiler_OnMethodEnter(void *pMethodDesc); +void BrowserProfiler_OnMethodLeave(void *pMethodDesc); + +// Returns a freshly malloc'd UTF-8 method name for a MethodDesc*. +// Called from JS; the caller owns the returned buffer and must free() it. +extern "C" const char* SystemJS_GetMethodName(void *pMethodDesc); + +#endif // PERFTRACING_DISABLE_THREADS +#endif // TARGET_BROWSER + +#endif // BROWSERPROFILER_H diff --git a/src/mono/browser/build/BrowserWasmApp.CoreCLR.targets b/src/mono/browser/build/BrowserWasmApp.CoreCLR.targets index 068b1bfa856b98..090743e9da38b2 100644 --- a/src/mono/browser/build/BrowserWasmApp.CoreCLR.targets +++ b/src/mono/browser/build/BrowserWasmApp.CoreCLR.targets @@ -58,6 +58,23 @@ <_ExeExt Condition="$([MSBuild]::IsOSPlatform('windows'))">.exe + + + <_WasmPerfInstFilter>$(WasmPerformanceInstrumentation) + <_WasmPerfInstInterval> + <_WasmPerfInstInterval Condition="$(WasmPerformanceInstrumentation.Contains(',interval='))">$(WasmPerformanceInstrumentation.Substring($([MSBuild]::Add($(WasmPerformanceInstrumentation.IndexOf(',interval=')), 10)))) + <_WasmPerfInstFilter Condition="$(WasmPerformanceInstrumentation.Contains(',interval='))">$(WasmPerformanceInstrumentation.Substring(0, $(WasmPerformanceInstrumentation.IndexOf(',interval=')))) + <_WasmPerfInstInterval Condition="'$(_WasmPerfInstInterval)' != '' and $(_WasmPerfInstInterval.Contains(','))">$(_WasmPerfInstInterval.Substring(0, $(_WasmPerfInstInterval.IndexOf(',')))) + <_WasmPerfInstFilter Condition="'$(_WasmPerfInstFilter)' == 'all'">* + + + + + + @@ -92,6 +109,7 @@ flow (in _CoreCLRPrepareForNativeBuild) so we have to inspect both here. --> true true + true diff --git a/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets b/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets index d462dfad666eb0..ab8d6606aba35e 100644 --- a/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets +++ b/src/mono/nuget/Microsoft.NET.Sdk.WebAssembly.Pack/build/Microsoft.NET.Sdk.WebAssembly.Browser.targets @@ -144,10 +144,12 @@ Copyright (c) .NET Foundation. All rights reserved. ComputeWasmVfs; ResolveWasmOutputs; _AddWasmDiagnosticPortsEnvironmentVariable; + _AddWasmPerformanceInstrumentationEnvironmentVariables; $(GeneratePublishWasmBootJsonDependsOn); _AddWasmDiagnosticPortsEnvironmentVariable; + _AddWasmPerformanceInstrumentationEnvironmentVariables; @@ -596,8 +598,25 @@ Copyright (c) .NET Foundation. All rights reserved. <_WasmPerfInstDescriptor Condition="'$(_WasmPerfInstFilter)' != '' and '$(UseMonoRuntime)' == 'false'">$(_WasmPerfInstFilter) - - + + + + + + + + <_WasmPerfInstFilter>$(WasmPerformanceInstrumentation) + <_WasmPerfInstInterval> + <_WasmPerfInstInterval Condition="$(WasmPerformanceInstrumentation.Contains(',interval='))">$(WasmPerformanceInstrumentation.Substring($([MSBuild]::Add($(WasmPerformanceInstrumentation.IndexOf(',interval=')), 10)))) + <_WasmPerfInstFilter Condition="$(WasmPerformanceInstrumentation.Contains(',interval='))">$(WasmPerformanceInstrumentation.Substring(0, $(WasmPerformanceInstrumentation.IndexOf(',interval=')))) + <_WasmPerfInstInterval Condition="'$(_WasmPerfInstInterval)' != '' and $(_WasmPerfInstInterval.Contains(','))">$(_WasmPerfInstInterval.Substring(0, $(_WasmPerfInstInterval.IndexOf(',')))) + <_WasmPerfInstFilter Condition="'$(_WasmPerfInstFilter)' == 'all'">* + + + diff --git a/src/mono/sample/wasm/browser-eventpipe/wwwroot/main.js b/src/mono/sample/wasm/browser-eventpipe/wwwroot/main.js index 4cffa0d8c76d9b..3bfe56b60ce8c8 100644 --- a/src/mono/sample/wasm/browser-eventpipe/wwwroot/main.js +++ b/src/mono/sample/wasm/browser-eventpipe/wwwroot/main.js @@ -15,10 +15,8 @@ try { // dotnet-trace collect --providers Microsoft-Windows-DotNETRuntime:0x1980001:5 -p 41732 // dotnet-gcdump collect -p 41732 // dotnet-counters - //.withEnvironmentVariable("DOTNET_DiagnosticPorts", "download:gcdump") - //.withEnvironmentVariable("DOTNET_DiagnosticPorts", "download:counters") - // .withEnvironmentVariable("DOTNET_DiagnosticPorts", "download:samples") - //.withEnvironmentVariable("DOTNET_DiagnosticPorts", "js://cpu-samples") + .withEnvironmentVariable("DOTNET_EventPipeThreadSamplingRate", "0") + .withEnvironmentVariable("DOTNET_DiagnosticPorts", "js://cpu-samples") //.withEnvironmentVariable("MONO_LOG_LEVEL", "debug") //.withEnvironmentVariable("MONO_LOG_MASK", "all") /*.withEnvironmentVariable("MONO_VERBOSE_METHOD", "System.Threading.Monitor:Exit") @@ -30,6 +28,22 @@ try { .withConfig({ appendElementOnExit: true, exitOnUnhandledError: true }) .create(); + /* Call those from dev tools console to test diagnostics features: + globalThis.getDotnetRuntime(0).collectGcDump(); + globalThis.getDotnetRuntime(0).collectCpuSamples({durationSeconds:10}) + globalThis.getDotnetRuntime(0).collectMetrics({ + durationSeconds: 10, + extraProviders: [ + { + providerName: "WasmHello", + keywords: [0xFFFFFFFF, 0xFFFFFFFF], // all keywords + logLevel: 4, // Informational + arguments: null, + } + ] + }); + */ + setModuleImports("main.js", { Sample: { Test: { diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/EventPipeDiagnosticsTests.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/EventPipeDiagnosticsTests.cs index 7f359c9e864a53..00045911bda7a2 100644 --- a/src/mono/wasm/Wasm.Build.Tests/Blazor/EventPipeDiagnosticsTests.cs +++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/EventPipeDiagnosticsTests.cs @@ -17,7 +17,6 @@ namespace Wasm.Build.Tests.Blazor; -[TestCategory("mono")] public class EventPipeDiagnosticsTests : BlazorWasmTestBase { private static readonly string uploadPattern = "^[a-zA-Z0-9_]+\\.nettrace$"; @@ -28,10 +27,14 @@ public EventPipeDiagnosticsTests(ITestOutputHelper output, SharedBuildPerTestCla _enablePerTestCleanup = true; } + + [Fact] + [TestCategory("native-mono")] + public Task BlazorEventPipeTestWithCpuSamplesAOT() => BlazorEventPipeTestWithCpuSamples(Configuration.Release, aot: true); + [Theory] [InlineData(Configuration.Debug, false)] [InlineData(Configuration.Release, false)] - [InlineData(Configuration.Release, true)] public async Task BlazorEventPipeTestWithCpuSamples(Configuration config, bool aot) { string extraProperties = @" diff --git a/src/native/libs/Common/JavaScript/cross-module/index.ts b/src/native/libs/Common/JavaScript/cross-module/index.ts index 97cb18ca5da174..b1160b42c8e7e6 100644 --- a/src/native/libs/Common/JavaScript/cross-module/index.ts +++ b/src/native/libs/Common/JavaScript/cross-module/index.ts @@ -179,6 +179,7 @@ export function dotnetUpdateInternalsSubscriber() { getWasmMemory: table[0], getWasmTable: table[1], SystemJS_ScheduleDiagnosticServer: table[2], + SystemJS_GetMethodName: table[3], }; Object.assign(interop, interopLocal); } diff --git a/src/native/libs/Common/JavaScript/types/ems-ambient.ts b/src/native/libs/Common/JavaScript/types/ems-ambient.ts index 0b31a52dce3b04..93be8641f71608 100644 --- a/src/native/libs/Common/JavaScript/types/ems-ambient.ts +++ b/src/native/libs/Common/JavaScript/types/ems-ambient.ts @@ -6,7 +6,8 @@ import type { EmscriptenModuleInternal, InternalExchange, InternalExchangeSubscriber, RuntimeAPI, LoaderExports, BrowserUtilsExports, RuntimeExports, VoidPtr, JSMarshalerArguments, CSFnHandle, TypedArray, - MemOffset, CharPtrPtr + MemOffset, CharPtrPtr, + CharPtr } from "../types"; // we want to use the cross-module symbols defined in closure of dotnet.native.js @@ -30,6 +31,7 @@ export type EmsAmbientSymbolsType = EmscriptenModuleInternal & { _SystemJS_ExecuteFinalizationCallback: () => void; _SystemJS_ExecuteDiagnosticServerCallback: () => void; _SystemJS_ScheduleDiagnosticServer: (delayMs: number) => void; + _SystemJS_GetMethodName: (pMethodDesc: number) => CharPtr; _BrowserHost_CreateHostContract: () => VoidPtr; _BrowserHost_InitializeDotnet: (propertiesCount: number, propertyKeys: CharPtrPtr, propertyValues: CharPtrPtr) => number; _BrowserHost_ExecuteAssembly: (mainAssemblyNamePtr: number, argsLength: number, argsPtr: number) => number; diff --git a/src/native/libs/Common/JavaScript/types/exchange.ts b/src/native/libs/Common/JavaScript/types/exchange.ts index e78aea223d4707..3dd1a0b0ea9dbb 100644 --- a/src/native/libs/Common/JavaScript/types/exchange.ts +++ b/src/native/libs/Common/JavaScript/types/exchange.ts @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -import type { CharPtr, EmsAmbientSymbolsType } from "../types"; +import type { EmsAmbientSymbolsType } from "../types"; import type { check, error, info, warn, debug, fastCheck, normalizeException } from "../loader/logging"; import type { resolveRunMainPromise, rejectRunMainPromise, getRunMainPromise, abortStartup } from "../loader/run"; @@ -26,6 +26,7 @@ import type { abortInteropTimers } from "../../../System.Runtime.InteropServices import type { installNativeSymbols, symbolicateStackTrace } from "../../../System.Native.Browser/diagnostics/symbolicate"; import type { SystemJS_ScheduleDiagnosticServer } from "../../../System.Native.Browser/native"; import type { ds_rt_websocket_close, ds_rt_websocket_create, ds_rt_websocket_poll, ds_rt_websocket_recv, ds_rt_websocket_send } from "../../../System.Native.Browser/diagnostics/diagnostic-server"; +import type { ds_rt_browser_performance_measure } from "../../../System.Native.Browser/diagnostics/browser-profiler"; type getWasmMemoryType = () => WebAssembly.Memory; @@ -149,12 +150,14 @@ export type NativeBrowserExports = { getWasmMemory: getWasmMemoryType, getWasmTable: getWasmTableType, SystemJS_ScheduleDiagnosticServer: typeof SystemJS_ScheduleDiagnosticServer, + SystemJS_GetMethodName: EmsAmbientSymbolsType["_SystemJS_GetMethodName"], } export type NativeBrowserExportsTable = [ getWasmMemoryType, getWasmTableType, typeof SystemJS_ScheduleDiagnosticServer, + EmsAmbientSymbolsType["_SystemJS_GetMethodName"], ] export type BrowserUtilsExports = { @@ -197,7 +200,7 @@ export type DiagnosticsExportsTable = [ typeof ds_rt_websocket_poll, typeof ds_rt_websocket_recv, typeof ds_rt_websocket_close, - (namePtr: CharPtr, start: number) => void + typeof ds_rt_browser_performance_measure, ] export type DiagnosticsExports = { @@ -208,5 +211,5 @@ export type DiagnosticsExports = { ds_rt_websocket_poll: typeof ds_rt_websocket_poll, ds_rt_websocket_recv: typeof ds_rt_websocket_recv, ds_rt_websocket_close: typeof ds_rt_websocket_close, - ds_rt_browser_performance_measure: (namePtr: CharPtr, start: number) => void + ds_rt_browser_performance_measure: typeof ds_rt_browser_performance_measure, } diff --git a/src/native/libs/System.Native.Browser/diagnostics/browser-profiler.ts b/src/native/libs/System.Native.Browser/diagnostics/browser-profiler.ts new file mode 100644 index 00000000000000..0ced04e93f8822 --- /dev/null +++ b/src/native/libs/System.Native.Browser/diagnostics/browser-profiler.ts @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +import type { VoidPtr } from "../types"; +import { Module, dotnetNativeBrowserExports } from "./cross-module"; +import { ENVIRONMENT_IS_WEB } from "./per-module"; + + +const hasMeasure = globalThis.performance && typeof globalThis.performance.measure === "function"; + +// Cache of formatted method names keyed by MethodDesc* pointer. We only call +// back into the runtime (SystemJS_GetMethodName) and decode the UTF-8 +// string on a cache miss. +const methodNameCache = new Map(); + +export function ds_rt_browser_performance_measure(methodPtr: VoidPtr, start: number): void { + if (!hasMeasure) { + return; + } + + try { + const key = methodPtr as unknown as number; + let fnName = methodNameCache.get(key); + if (fnName === undefined) { + const namePtr = dotnetNativeBrowserExports.SystemJS_GetMethodName(key); + fnName = Module.UTF8ToString(namePtr); + Module._free(namePtr as unknown as VoidPtr); + methodNameCache.set(key, fnName); + } + // NodeJs accepts startTime, browsers accepts start + const options = ENVIRONMENT_IS_WEB ? { start: start } : { startTime: start }; + globalThis.performance.measure(fnName, options); + } catch { + // Ignore + } +} diff --git a/src/native/libs/System.Native.Browser/diagnostics/index.ts b/src/native/libs/System.Native.Browser/diagnostics/index.ts index d4e0a52338de21..62cfe9b8a58466 100644 --- a/src/native/libs/System.Native.Browser/diagnostics/index.ts +++ b/src/native/libs/System.Native.Browser/diagnostics/index.ts @@ -1,13 +1,12 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -import type { DiagnosticsExportsTable, InternalExchange, DiagnosticsExports, CharPtr } from "./types"; +import type { DiagnosticsExportsTable, InternalExchange, DiagnosticsExports } from "./types"; import { InternalExchangeIndex } from "../types"; import GitHash from "consts:gitHash"; -import { ENVIRONMENT_IS_WEB } from "./per-module"; -import { dotnetApi, dotnetUpdateInternals, dotnetUpdateInternalsSubscriber, Module } from "./cross-module"; +import { dotnetApi, dotnetUpdateInternals, dotnetUpdateInternalsSubscriber } from "./cross-module"; import { registerExit } from "./exit"; import { installNativeSymbols, symbolicateStackTrace } from "./symbolicate"; import { installLoggingProxy } from "./console-proxy"; @@ -15,6 +14,7 @@ import { collectMetrics } from "./dotnet-counters"; import { collectGcDump } from "./dotnet-gcdump"; import { collectCpuSamples } from "./dotnet-cpu-profiler"; import { connectDSRouter, ds_rt_websocket_close, ds_rt_websocket_create, ds_rt_websocket_poll, ds_rt_websocket_recv, ds_rt_websocket_send, initializeDS } from "./diagnostic-server"; +import { ds_rt_browser_performance_measure } from "./browser-profiler"; export function dotnetInitializeModule(internals: InternalExchange): void { if (!Array.isArray(internals)) throw new Error("Expected internals to be an array"); @@ -25,19 +25,6 @@ export function dotnetInitializeModule(internals: InternalExchange): void { if (runtimeApi.runtimeBuildInfo.gitHash && runtimeApi.runtimeBuildInfo.gitHash !== GitHash) { throw new Error(`Mismatched git hashes between loader and runtime. Loader: ${runtimeApi.runtimeBuildInfo.gitHash}, Diagnostics: ${GitHash}`); } - const ds_rt_browser_performance_measure = - globalThis.performance && typeof globalThis.performance.measure === "function" - ? (namePtr: CharPtr, start: number) => { - try { - const fnName = Module.UTF8ToString(namePtr); - // NodeJs accepts startTime, browsers accepts start - const options = ENVIRONMENT_IS_WEB ? { start: start } : { startTime: start }; - globalThis.performance.measure(fnName, options); - } catch { - // Ignore - } - } - : () => { }; internals[InternalExchangeIndex.DiagnosticsExportsTable] = diagnosticsExportsToTable({ symbolicateStackTrace, diff --git a/src/native/libs/System.Native.Browser/libSystem.Native.Browser.footer.js b/src/native/libs/System.Native.Browser/libSystem.Native.Browser.footer.js index 3db32ff3ffbe51..2a19fa50ad77aa 100644 --- a/src/native/libs/System.Native.Browser/libSystem.Native.Browser.footer.js +++ b/src/native/libs/System.Native.Browser/libSystem.Native.Browser.footer.js @@ -25,6 +25,7 @@ function libDotnetFactory() { "SystemJS_ExecuteFinalizationCallback", "SystemJS_ExecuteDiagnosticServerCallback", "SystemJS_ScheduleDiagnosticServer", + "SystemJS_GetMethodName", "__funcs_on_exit", ]; const mergeDotnet = { diff --git a/src/native/libs/System.Native.Browser/native/diagnostics.ts b/src/native/libs/System.Native.Browser/native/diagnostics.ts index 5ce37cb515a75b..cda20a11e5caa9 100644 --- a/src/native/libs/System.Native.Browser/native/diagnostics.ts +++ b/src/native/libs/System.Native.Browser/native/diagnostics.ts @@ -24,6 +24,6 @@ export function ds_rt_websocket_close(clientSocket: number): number { return dotnetDiagnosticsExports.ds_rt_websocket_close(clientSocket); } -export function ds_rt_browser_performance_measure(namePtr: CharPtr, start: number): void { - return dotnetDiagnosticsExports.ds_rt_browser_performance_measure(namePtr, start); +export function ds_rt_browser_performance_measure(methodPtr: VoidPtr, start: number): void { + return dotnetDiagnosticsExports.ds_rt_browser_performance_measure(methodPtr, start); } diff --git a/src/native/libs/System.Native.Browser/native/index.ts b/src/native/libs/System.Native.Browser/native/index.ts index 0e5a8d68619685..d436df5b34a934 100644 --- a/src/native/libs/System.Native.Browser/native/index.ts +++ b/src/native/libs/System.Native.Browser/native/index.ts @@ -29,6 +29,7 @@ export function dotnetInitializeModule(internals: InternalExchange): void { getWasmMemory, getWasmTable, SystemJS_ScheduleDiagnosticServer: _ems_._SystemJS_ScheduleDiagnosticServer, + SystemJS_GetMethodName: (pMethodDesc: number) => _ems_._SystemJS_GetMethodName(pMethodDesc), }); _ems_.dotnetUpdateInternals(internals, _ems_.dotnetUpdateInternalsSubscriber); @@ -41,6 +42,7 @@ export function dotnetInitializeModule(internals: InternalExchange): void { map.getWasmMemory, map.getWasmTable, map.SystemJS_ScheduleDiagnosticServer, + map.SystemJS_GetMethodName, ]; }