mudgangster

Tiny, scriptable MUD client
Log | Files | Refs | README

TracyProfiler.cpp (87655B)


      1 #ifdef TRACY_ENABLE
      2 
      3 #ifdef _WIN32
      4 #  ifndef NOMINMAX
      5 #    define NOMINMAX
      6 #  endif
      7 #  include <winsock2.h>
      8 #  include <windows.h>
      9 #  include <tlhelp32.h>
     10 #  include <inttypes.h>
     11 #  include <intrin.h>
     12 #else
     13 #  include <sys/time.h>
     14 #  include <sys/param.h>
     15 #endif
     16 
     17 #ifdef __CYGWIN__
     18 #  include <windows.h>
     19 #  include <unistd.h>
     20 #  include <tlhelp32.h>
     21 #endif
     22 
     23 #ifdef _GNU_SOURCE
     24 #  include <errno.h>
     25 #endif
     26 
     27 #ifdef __linux__
     28 #  include <dirent.h>
     29 #  include <signal.h>
     30 #  include <pthread.h>
     31 #  include <sys/types.h>
     32 #  include <sys/syscall.h>
     33 #endif
     34 
     35 #if defined __APPLE__ || defined BSD
     36 #  include <sys/types.h>
     37 #  include <sys/sysctl.h>
     38 #endif
     39 
     40 #include <algorithm>
     41 #include <assert.h>
     42 #include <atomic>
     43 #include <chrono>
     44 #include <limits>
     45 #include <new>
     46 #include <stdlib.h>
     47 #include <string.h>
     48 #include <thread>
     49 
     50 #include "../common/TracyAlign.hpp"
     51 #include "../common/TracyProtocol.hpp"
     52 #include "../common/TracySocket.hpp"
     53 #include "../common/TracySystem.hpp"
     54 #include "../common/tracy_lz4.hpp"
     55 #include "tracy_rpmalloc.hpp"
     56 #include "TracyCallstack.hpp"
     57 #include "TracyDxt1.hpp"
     58 #include "TracyScoped.hpp"
     59 #include "TracyProfiler.hpp"
     60 #include "TracyThread.hpp"
     61 #include "TracyArmCpuTable.hpp"
     62 #include "TracySysTrace.hpp"
     63 #include "../TracyC.h"
     64 
     65 #ifdef __APPLE__
     66 #  define TRACY_DELAYED_INIT
     67 #else
     68 #  ifdef __GNUC__
     69 #    define init_order( val ) __attribute__ ((init_priority(val)))
     70 #  else
     71 #    define init_order(x)
     72 #  endif
     73 #endif
     74 
     75 #if defined TRACY_HW_TIMER && __ARM_ARCH >= 6 && !defined TARGET_OS_IOS
     76 #  include <signal.h>
     77 #  include <setjmp.h>
     78 #endif
     79 
     80 #if defined _WIN32 || defined __CYGWIN__
     81 #  include <lmcons.h>
     82 extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW );
     83 #else
     84 #  include <unistd.h>
     85 #  include <limits.h>
     86 #endif
     87 #if defined __APPLE__
     88 #  include "TargetConditionals.h"
     89 #endif
     90 #if defined __linux__
     91 #  include <sys/sysinfo.h>
     92 #  include <sys/utsname.h>
     93 #endif
     94 
     95 #if !defined _WIN32 && !defined __CYGWIN__ && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
     96 #  include <cpuid.h>
     97 #endif
     98 
     99 #if !( ( ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA ) || defined __linux__ )
    100 #  include <mutex>
    101 #endif
    102 
    103 namespace tracy
    104 {
    105 
    106 #ifndef TRACY_DELAYED_INIT
    107 namespace
    108 {
    109 #  if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA
    110     BOOL CALLBACK InitOnceCallback( PINIT_ONCE /*initOnce*/, PVOID /*Parameter*/, PVOID* /*Context*/)
    111     {
    112         rpmalloc_initialize();
    113         return TRUE;
    114     }
    115     INIT_ONCE InitOnce = INIT_ONCE_STATIC_INIT;
    116 #  elif defined __linux__
    117     void InitOnceCallback()
    118     {
    119         rpmalloc_initialize();
    120     }
    121     pthread_once_t once_control = PTHREAD_ONCE_INIT;
    122 #  else
    123     void InitOnceCallback()
    124     {
    125         rpmalloc_initialize();
    126     }
    127     std::once_flag once_flag;
    128 #  endif
    129 }
    130 
    131 struct RPMallocInit
    132 {
    133     RPMallocInit()
    134     {
    135 #  if ( defined _WIN32 || defined __CYGWIN__ ) && _WIN32_WINNT >= _WIN32_WINNT_VISTA
    136         InitOnceExecuteOnce( &InitOnce, InitOnceCallback, nullptr, nullptr );
    137 #  elif defined __linux__
    138         pthread_once( &once_control, InitOnceCallback );
    139 #  else
    140         std::call_once( once_flag, InitOnceCallback );
    141 #  endif
    142         rpmalloc_thread_initialize();
    143     }
    144 };
    145 
    146 struct InitTimeWrapper
    147 {
    148     int64_t val;
    149 };
    150 
    151 struct ProducerWrapper
    152 {
    153     tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
    154 };
    155 
    156 struct ThreadHandleWrapper
    157 {
    158     uint64_t val;
    159 };
    160 #endif
    161 
    162 
    163 #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
    164 static inline void CpuId( uint32_t* regs, uint32_t leaf )
    165 {
    166 #if defined _WIN32 || defined __CYGWIN__
    167     __cpuidex( (int*)regs, leaf, 0 );
    168 #else
    169     __get_cpuid( leaf, regs, regs+1, regs+2, regs+3 );
    170 #endif
    171 }
    172 
    173 static void InitFailure( const char* msg )
    174 {
    175 #if defined _WIN32 || defined __CYGWIN__
    176     bool hasConsole = false;
    177     bool reopen = false;
    178     const auto attached = AttachConsole( ATTACH_PARENT_PROCESS );
    179     if( attached )
    180     {
    181         hasConsole = true;
    182         reopen = true;
    183     }
    184     else
    185     {
    186         const auto err = GetLastError();
    187         if( err == ERROR_ACCESS_DENIED )
    188         {
    189             hasConsole = true;
    190         }
    191     }
    192     if( hasConsole )
    193     {
    194         fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
    195         if( reopen )
    196         {
    197             freopen( "CONOUT$", "w", stderr );
    198             fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
    199         }
    200     }
    201     else
    202     {
    203         MessageBoxA( nullptr, msg, "Tracy Profiler initialization failure", MB_ICONSTOP );
    204     }
    205 #else
    206     fprintf( stderr, "Tracy Profiler initialization failure: %s\n", msg );
    207 #endif
    208     exit( 0 );
    209 }
    210 
    211 static int64_t SetupHwTimer()
    212 {
    213     uint32_t regs[4];
    214     CpuId( regs, 0x80000001 );
    215     if( !( regs[3] & ( 1 << 27 ) ) ) InitFailure( "CPU doesn't support RDTSCP instruction." );
    216     CpuId( regs, 0x80000007 );
    217     if( !( regs[3] & ( 1 << 8 ) ) )
    218     {
    219         const char* noCheck = getenv( "TRACY_NO_INVARIANT_CHECK" );
    220         if( !noCheck || noCheck[0] != '1' )
    221         {
    222             InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*." );
    223         }
    224     }
    225 
    226     return Profiler::GetTime();
    227 }
    228 #else
    229 static int64_t SetupHwTimer()
    230 {
    231     return Profiler::GetTime();
    232 }
    233 #endif
    234 
    235 static const char* GetProcessName()
    236 {
    237     const char* processName = "unknown";
    238 #ifdef _WIN32
    239     static char buf[_MAX_PATH];
    240     GetModuleFileNameA( nullptr, buf, _MAX_PATH );
    241     const char* ptr = buf;
    242     while( *ptr != '\0' ) ptr++;
    243     while( ptr > buf && *ptr != '\\' && *ptr != '/' ) ptr--;
    244     if( ptr > buf ) ptr++;
    245     processName = ptr;
    246 #elif defined __ANDROID__
    247 #  if __ANDROID_API__ >= 21
    248     auto buf = getprogname();
    249     if( buf ) processName = buf;
    250 #  endif
    251 #elif defined _GNU_SOURCE || defined __CYGWIN__
    252     processName = program_invocation_short_name;
    253 #elif defined __APPLE__ || defined BSD
    254     auto buf = getprogname();
    255     if( buf ) processName = buf;
    256 #endif
    257     return processName;
    258 }
    259 
    260 static uint32_t GetHex( char*& ptr, int skip )
    261 {
    262     uint32_t ret;
    263     ptr += skip;
    264     char* end;
    265     if( ptr[0] == '0' && ptr[1] == 'x' )
    266     {
    267         ptr += 2;
    268         ret = strtol( ptr, &end, 16 );
    269     }
    270     else
    271     {
    272         ret = strtol( ptr, &end, 10 );
    273     }
    274     ptr = end;
    275     return ret;
    276 }
    277 
    278 static const char* GetHostInfo()
    279 {
    280     static char buf[1024];
    281     auto ptr = buf;
    282 #if defined _WIN32 || defined __CYGWIN__
    283 #  ifdef UNICODE
    284     t_RtlGetVersion RtlGetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandle( L"ntdll.dll" ), "RtlGetVersion" );
    285 #  else
    286     t_RtlGetVersion RtlGetVersion = (t_RtlGetVersion)GetProcAddress( GetModuleHandle( "ntdll.dll" ), "RtlGetVersion" );
    287 #  endif
    288 
    289     if( !RtlGetVersion )
    290     {
    291 #  ifdef __CYGWIN__
    292         ptr += sprintf( ptr, "OS: Windows (Cygwin)\n" );
    293 #  elif defined __MINGW32__
    294         ptr += sprintf( ptr, "OS: Windows (MingW)\n" );
    295 #  else
    296         ptr += sprintf( ptr, "OS: Windows\n" );
    297 #  endif
    298     }
    299     else
    300     {
    301         RTL_OSVERSIONINFOW ver = { sizeof( RTL_OSVERSIONINFOW ) };
    302         RtlGetVersion( &ver );
    303 
    304 #  ifdef __CYGWIN__
    305         ptr += sprintf( ptr, "OS: Windows %i.%i.%i (Cygwin)\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber );
    306 #  elif defined __MINGW32__
    307         ptr += sprintf( ptr, "OS: Windows %i.%i.%i (MingW)\n", (int)ver.dwMajorVersion, (int)ver.dwMinorVersion, (int)ver.dwBuildNumber );
    308 #  else
    309         ptr += sprintf( ptr, "OS: Windows %i.%i.%i\n", ver.dwMajorVersion, ver.dwMinorVersion, ver.dwBuildNumber );
    310 #  endif
    311     }
    312 #elif defined __linux__
    313     struct utsname utsName;
    314     uname( &utsName );
    315 #  if defined __ANDROID__
    316     ptr += sprintf( ptr, "OS: Linux %s (Android)\n", utsName.release );
    317 #  else
    318     ptr += sprintf( ptr, "OS: Linux %s\n", utsName.release );
    319 #  endif
    320 #elif defined __APPLE__
    321 #  if TARGET_OS_IPHONE == 1
    322     ptr += sprintf( ptr, "OS: Darwin (iOS)\n" );
    323 #  elif TARGET_OS_MAC == 1
    324     ptr += sprintf( ptr, "OS: Darwin (OSX)\n" );
    325 #  else
    326     ptr += sprintf( ptr, "OS: Darwin (unknown)\n" );
    327 #  endif
    328 #elif defined __DragonFly__
    329     ptr += sprintf( ptr, "OS: BSD (DragonFly)\n" );
    330 #elif defined __FreeBSD__
    331     ptr += sprintf( ptr, "OS: BSD (FreeBSD)\n" );
    332 #elif defined __NetBSD__
    333     ptr += sprintf( ptr, "OS: BSD (NetBSD)\n" );
    334 #elif defined __OpenBSD__
    335     ptr += sprintf( ptr, "OS: BSD (OpenBSD)\n" );
    336 #else
    337     ptr += sprintf( ptr, "OS: unknown\n" );
    338 #endif
    339 
    340 #if defined _MSC_VER
    341 #  if defined __clang__
    342     ptr += sprintf( ptr, "Compiler: MSVC clang-cl %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ );
    343 #  else
    344     ptr += sprintf( ptr, "Compiler: MSVC %i\n", _MSC_VER );
    345 #  endif
    346 #elif defined __clang__
    347     ptr += sprintf( ptr, "Compiler: clang %i.%i.%i\n", __clang_major__, __clang_minor__, __clang_patchlevel__ );
    348 #elif defined __GNUC__
    349     ptr += sprintf( ptr, "Compiler: gcc %i.%i\n", __GNUC__, __GNUC_MINOR__ );
    350 #else
    351     ptr += sprintf( ptr, "Compiler: unknown\n" );
    352 #endif
    353 
    354 #if defined _WIN32 || defined __CYGWIN__
    355 #  ifndef __CYGWIN__
    356     InitWinSock();
    357 #  endif
    358     char hostname[512];
    359     gethostname( hostname, 512 );
    360 
    361     DWORD userSz = UNLEN+1;
    362     char user[UNLEN+1];
    363     GetUserNameA( user, &userSz );
    364 
    365     ptr += sprintf( ptr, "User: %s@%s\n", user, hostname );
    366 #else
    367     char hostname[_POSIX_HOST_NAME_MAX]{};
    368     char user[_POSIX_LOGIN_NAME_MAX]{};
    369 
    370     gethostname( hostname, _POSIX_HOST_NAME_MAX );
    371 #  if defined __ANDROID__
    372     const auto login = getlogin();
    373     if( login )
    374     {
    375         strcpy( user, login );
    376     }
    377     else
    378     {
    379         memcpy( user, "(?)", 4 );
    380     }
    381 #  else
    382     getlogin_r( user, _POSIX_LOGIN_NAME_MAX );
    383 #  endif
    384 
    385     ptr += sprintf( ptr, "User: %s@%s\n", user, hostname );
    386 #endif
    387 
    388 #if defined __i386 || defined _M_IX86
    389     ptr += sprintf( ptr, "Arch: x86\n" );
    390 #elif defined __x86_64__ || defined _M_X64
    391     ptr += sprintf( ptr, "Arch: x64\n" );
    392 #elif defined __aarch64__
    393     ptr += sprintf( ptr, "Arch: ARM64\n" );
    394 #elif defined __ARM_ARCH
    395     ptr += sprintf( ptr, "Arch: ARM\n" );
    396 #else
    397     ptr += sprintf( ptr, "Arch: unknown\n" );
    398 #endif
    399 
    400 #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
    401     uint32_t regs[4];
    402     char cpuModel[4*4*3];
    403     auto modelPtr = cpuModel;
    404     for( uint32_t i=0x80000002; i<0x80000005; ++i )
    405     {
    406 #  if defined _WIN32 || defined __CYGWIN__
    407         __cpuidex( (int*)regs, i, 0 );
    408 #  else
    409         int zero = 0;
    410         asm volatile ( "cpuid" : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3]) : "a" (i), "c" (zero) );
    411 #  endif
    412         memcpy( modelPtr, regs, sizeof( regs ) ); modelPtr += sizeof( regs );
    413     }
    414 
    415     ptr += sprintf( ptr, "CPU: %s\n", cpuModel );
    416 #elif defined __linux__ && defined __ARM_ARCH
    417     bool cpuFound = false;
    418     FILE* fcpuinfo = fopen( "/proc/cpuinfo", "rb" );
    419     if( fcpuinfo )
    420     {
    421         enum { BufSize = 4*1024 };
    422         char buf[BufSize];
    423         const auto sz = fread( buf, 1, BufSize, fcpuinfo );
    424         fclose( fcpuinfo );
    425         const auto end = buf + sz;
    426         auto cptr = buf;
    427 
    428         uint32_t impl = 0;
    429         uint32_t var = 0;
    430         uint32_t part = 0;
    431         uint32_t rev = 0;
    432 
    433         while( end - cptr > 20 )
    434         {
    435             while( end - cptr > 20 && memcmp( cptr, "CPU ", 4 ) != 0 )
    436             {
    437                 cptr += 4;
    438                 while( end - cptr > 20 && *cptr != '\n' ) cptr++;
    439                 cptr++;
    440             }
    441             if( end - cptr <= 20 ) break;
    442             cptr += 4;
    443             if( memcmp( cptr, "implementer\t: ", 14 ) == 0 )
    444             {
    445                 if( impl != 0 ) break;
    446                 impl = GetHex( cptr, 14 );
    447             }
    448             else if( memcmp( cptr, "variant\t: ", 10 ) == 0 ) var = GetHex( cptr, 10 );
    449             else if( memcmp( cptr, "part\t: ", 7 ) == 0 ) part = GetHex( cptr, 7 );
    450             else if( memcmp( cptr, "revision\t: ", 11 ) == 0 ) rev = GetHex( cptr, 11 );
    451             while( *cptr != '\n' && *cptr != '\0' ) cptr++;
    452             cptr++;
    453         }
    454 
    455         if( impl != 0 || var != 0 || part != 0 || rev != 0 )
    456         {
    457             cpuFound = true;
    458             ptr += sprintf( ptr, "CPU: %s%s r%ip%i\n", DecodeArmImplementer( impl ), DecodeArmPart( impl, part ), var, rev );
    459         }
    460     }
    461     if( !cpuFound )
    462     {
    463         ptr += sprintf( ptr, "CPU: unknown\n" );
    464     }
    465 #elif defined __APPLE__ && TARGET_OS_IPHONE == 1
    466     {
    467         size_t sz;
    468         sysctlbyname( "hw.machine", nullptr, &sz, nullptr, 0 );
    469         auto str = (char*)tracy_malloc( sz );
    470         sysctlbyname( "hw.machine", str, &sz, nullptr, 0 );
    471         ptr += sprintf( ptr, "Device: %s\n", DecodeIosDevice( str ) );
    472         tracy_free( str );
    473     }
    474 #else
    475     ptr += sprintf( ptr, "CPU: unknown\n" );
    476 #endif
    477 
    478     ptr += sprintf( ptr, "CPU cores: %i\n", std::thread::hardware_concurrency() );
    479 
    480 #if defined _WIN32 || defined __CYGWIN__
    481     MEMORYSTATUSEX statex;
    482     statex.dwLength = sizeof( statex );
    483     GlobalMemoryStatusEx( &statex );
    484 #  ifdef _MSC_VER
    485     ptr += sprintf( ptr, "RAM: %I64u MB\n", statex.ullTotalPhys / 1024 / 1024 );
    486 #  else
    487     ptr += sprintf( ptr, "RAM: %llu MB\n", statex.ullTotalPhys / 1024 / 1024 );
    488 #  endif
    489 #elif defined __linux__
    490     struct sysinfo sysInfo;
    491     sysinfo( &sysInfo );
    492     ptr += sprintf( ptr, "RAM: %lu MB\n", sysInfo.totalram / 1024 / 1024 );
    493 #elif defined __APPLE__
    494     size_t memSize;
    495     size_t sz = sizeof( memSize );
    496     sysctlbyname( "hw.memsize", &memSize, &sz, nullptr, 0 );
    497     ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 );
    498 #elif defined BSD
    499     size_t memSize;
    500     size_t sz = sizeof( memSize );
    501     sysctlbyname( "hw.physmem", &memSize, &sz, nullptr, 0 );
    502     ptr += sprintf( ptr, "RAM: %zu MB\n", memSize / 1024 / 1024 );
    503 #else
    504     ptr += sprintf( ptr, "RAM: unknown\n" );
    505 #endif
    506 
    507     return buf;
    508 }
    509 
    510 static uint64_t GetPid()
    511 {
    512 #if defined _WIN32 || defined __CYGWIN__
    513     return uint64_t( GetCurrentProcessId() );
    514 #else
    515     return uint64_t( getpid() );
    516 #endif
    517 }
    518 
    519 static BroadcastMessage& GetBroadcastMessage( const char* procname, size_t pnsz, int& len )
    520 {
    521     static BroadcastMessage msg;
    522 
    523     msg.broadcastVersion = BroadcastVersion;
    524     msg.protocolVersion = ProtocolVersion;
    525 
    526     memcpy( msg.programName, procname, pnsz );
    527     memset( msg.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz );
    528 
    529     len = int( offsetof( BroadcastMessage, programName ) + pnsz + 1 );
    530     return msg;
    531 }
    532 
    533 #if defined _WIN32 || defined __CYGWIN__
    534 static DWORD s_profilerThreadId = 0;
    535 static char s_crashText[1024];
    536 
    537 LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
    538 {
    539     const unsigned ec = pExp->ExceptionRecord->ExceptionCode;
    540     auto msgPtr = s_crashText;
    541     switch( ec )
    542     {
    543     case EXCEPTION_ACCESS_VIOLATION:
    544         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ACCESS_VIOLATION (0x%x). ", ec );
    545         switch( pExp->ExceptionRecord->ExceptionInformation[0] )
    546         {
    547         case 0:
    548             msgPtr += sprintf( msgPtr, "Read violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
    549             break;
    550         case 1:
    551             msgPtr += sprintf( msgPtr, "Write violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
    552             break;
    553         case 8:
    554             msgPtr += sprintf( msgPtr, "DEP violation at address 0x%" PRIxPTR ".", pExp->ExceptionRecord->ExceptionInformation[1] );
    555             break;
    556         default:
    557             break;
    558         }
    559         break;
    560     case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
    561         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ARRAY_BOUNDS_EXCEEDED (0x%x). ", ec );
    562         break;
    563     case EXCEPTION_DATATYPE_MISALIGNMENT:
    564         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_DATATYPE_MISALIGNMENT (0x%x). ", ec );
    565         break;
    566     case EXCEPTION_FLT_DIVIDE_BY_ZERO:
    567         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_FLT_DIVIDE_BY_ZERO (0x%x). ", ec );
    568         break;
    569     case EXCEPTION_ILLEGAL_INSTRUCTION:
    570         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_ILLEGAL_INSTRUCTION (0x%x). ", ec );
    571         break;
    572     case EXCEPTION_IN_PAGE_ERROR:
    573         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_IN_PAGE_ERROR (0x%x). ", ec );
    574         break;
    575     case EXCEPTION_INT_DIVIDE_BY_ZERO:
    576         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_INT_DIVIDE_BY_ZERO (0x%x). ", ec );
    577         break;
    578     case EXCEPTION_PRIV_INSTRUCTION:
    579         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_PRIV_INSTRUCTION (0x%x). ", ec );
    580         break;
    581     case EXCEPTION_STACK_OVERFLOW:
    582         msgPtr += sprintf( msgPtr, "Exception EXCEPTION_STACK_OVERFLOW (0x%x). ", ec );
    583         break;
    584     default:
    585         return EXCEPTION_CONTINUE_SEARCH;
    586     }
    587 
    588     {
    589         Magic magic;
    590         auto token = GetToken();
    591         auto& tail = token->get_tail_index();
    592         auto item = token->enqueue_begin( magic );
    593         MemWrite( &item->hdr.type, QueueType::CrashReport );
    594         item->crashReport.time = Profiler::GetTime();
    595         item->crashReport.text = (uint64_t)s_crashText;
    596         tail.store( magic + 1, std::memory_order_release );
    597 
    598         GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
    599     }
    600 
    601     HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 );
    602     if( h == INVALID_HANDLE_VALUE ) return EXCEPTION_CONTINUE_SEARCH;
    603 
    604     THREADENTRY32 te = { sizeof( te ) };
    605     if( !Thread32First( h, &te ) )
    606     {
    607         CloseHandle( h );
    608         return EXCEPTION_CONTINUE_SEARCH;
    609     }
    610 
    611     const auto pid = GetCurrentProcessId();
    612     const auto tid = GetCurrentThreadId();
    613 
    614     do
    615     {
    616         if( te.th32OwnerProcessID == pid && te.th32ThreadID != tid && te.th32ThreadID != s_profilerThreadId )
    617         {
    618             HANDLE th = OpenThread( THREAD_SUSPEND_RESUME, FALSE, te.th32ThreadID );
    619             if( th != INVALID_HANDLE_VALUE )
    620             {
    621                 SuspendThread( th );
    622                 CloseHandle( th );
    623             }
    624         }
    625     }
    626     while( Thread32Next( h, &te ) );
    627     CloseHandle( h );
    628 
    629     {
    630         Magic magic;
    631         auto token = GetToken();
    632         auto& tail = token->get_tail_index();
    633         auto item = token->enqueue_begin( magic );
    634         MemWrite( &item->hdr.type, QueueType::Crash );
    635         tail.store( magic + 1, std::memory_order_release );
    636     }
    637 
    638     std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
    639     GetProfiler().RequestShutdown();
    640     while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); };
    641 
    642     TerminateProcess( GetCurrentProcess(), 1 );
    643 
    644     return EXCEPTION_CONTINUE_SEARCH;
    645 }
    646 #endif
    647 
    648 #ifdef __linux__
    649 static long s_profilerTid = 0;
    650 static char s_crashText[1024];
    651 static std::atomic<bool> s_alreadyCrashed( false );
    652 
    653 static void ThreadFreezer( int /*signal*/ )
    654 {
    655     for(;;) sleep( 1000 );
    656 }
    657 
    658 static inline void HexPrint( char*& ptr, uint64_t val )
    659 {
    660     if( val == 0 )
    661     {
    662         *ptr++ = '0';
    663         return;
    664     }
    665 
    666     static const char HexTable[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
    667     char buf[16];
    668     auto bptr = buf;
    669 
    670     do
    671     {
    672         *bptr++ = HexTable[val%16];
    673         val /= 16;
    674     }
    675     while( val > 0 );
    676 
    677     do
    678     {
    679         *ptr++ = *--bptr;
    680     }
    681     while( bptr != buf );
    682 }
    683 
    684 static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
    685 {
    686     bool expected = false;
    687     if( !s_alreadyCrashed.compare_exchange_strong( expected, true ) ) ThreadFreezer( signal );
    688 
    689     auto msgPtr = s_crashText;
    690     switch( signal )
    691     {
    692     case SIGILL:
    693         strcpy( msgPtr, "Illegal Instruction.\n" );
    694         while( *msgPtr ) msgPtr++;
    695         switch( info->si_code )
    696         {
    697         case ILL_ILLOPC:
    698             strcpy( msgPtr, "Illegal opcode.\n" );
    699             break;
    700         case ILL_ILLOPN:
    701             strcpy( msgPtr, "Illegal operand.\n" );
    702             break;
    703         case ILL_ILLADR:
    704             strcpy( msgPtr, "Illegal addressing mode.\n" );
    705             break;
    706         case ILL_ILLTRP:
    707             strcpy( msgPtr, "Illegal trap.\n" );
    708             break;
    709         case ILL_PRVOPC:
    710             strcpy( msgPtr, "Privileged opcode.\n" );
    711             break;
    712         case ILL_PRVREG:
    713             strcpy( msgPtr, "Privileged register.\n" );
    714             break;
    715         case ILL_COPROC:
    716             strcpy( msgPtr, "Coprocessor error.\n" );
    717             break;
    718         case ILL_BADSTK:
    719             strcpy( msgPtr, "Internal stack error.\n" );
    720             break;
    721         default:
    722             break;
    723         }
    724         break;
    725     case SIGFPE:
    726         strcpy( msgPtr, "Floating-point exception.\n" );
    727         while( *msgPtr ) msgPtr++;
    728         switch( info->si_code )
    729         {
    730         case FPE_INTDIV:
    731             strcpy( msgPtr, "Integer divide by zero.\n" );
    732             break;
    733         case FPE_INTOVF:
    734             strcpy( msgPtr, "Integer overflow.\n" );
    735             break;
    736         case FPE_FLTDIV:
    737             strcpy( msgPtr, "Floating-point divide by zero.\n" );
    738             break;
    739         case FPE_FLTOVF:
    740             strcpy( msgPtr, "Floating-point overflow.\n" );
    741             break;
    742         case FPE_FLTUND:
    743             strcpy( msgPtr, "Floating-point underflow.\n" );
    744             break;
    745         case FPE_FLTRES:
    746             strcpy( msgPtr, "Floating-point inexact result.\n" );
    747             break;
    748         case FPE_FLTINV:
    749             strcpy( msgPtr, "Floating-point invalid operation.\n" );
    750             break;
    751         case FPE_FLTSUB:
    752             strcpy( msgPtr, "Subscript out of range.\n" );
    753             break;
    754         default:
    755             break;
    756         }
    757         break;
    758     case SIGSEGV:
    759         strcpy( msgPtr, "Invalid memory reference.\n" );
    760         while( *msgPtr ) msgPtr++;
    761         switch( info->si_code )
    762         {
    763         case SEGV_MAPERR:
    764             strcpy( msgPtr, "Address not mapped to object.\n" );
    765             break;
    766         case SEGV_ACCERR:
    767             strcpy( msgPtr, "Invalid permissions for mapped object.\n" );
    768             break;
    769 #  ifdef SEGV_BNDERR
    770         case SEGV_BNDERR:
    771             strcpy( msgPtr, "Failed address bound checks.\n" );
    772             break;
    773 #  endif
    774 #  ifdef SEGV_PKUERR
    775         case SEGV_PKUERR:
    776             strcpy( msgPtr, "Access was denied by memory protection keys.\n" );
    777             break;
    778 #  endif
    779         default:
    780             break;
    781         }
    782         break;
    783     case SIGPIPE:
    784         strcpy( msgPtr, "Broken pipe.\n" );
    785         while( *msgPtr ) msgPtr++;
    786         break;
    787     case SIGBUS:
    788         strcpy( msgPtr, "Bus error.\n" );
    789         while( *msgPtr ) msgPtr++;
    790         switch( info->si_code )
    791         {
    792         case BUS_ADRALN:
    793             strcpy( msgPtr, "Invalid address alignment.\n" );
    794             break;
    795         case BUS_ADRERR:
    796             strcpy( msgPtr, "Nonexistent physical address.\n" );
    797             break;
    798         case BUS_OBJERR:
    799             strcpy( msgPtr, "Object-specific hardware error.\n" );
    800             break;
    801         case BUS_MCEERR_AR:
    802             strcpy( msgPtr, "Hardware memory error consumed on a machine check; action required.\n" );
    803             break;
    804         case BUS_MCEERR_AO:
    805             strcpy( msgPtr, "Hardware memory error detected in process but not consumed; action optional.\n" );
    806             break;
    807         default:
    808             break;
    809         }
    810         break;
    811     default:
    812         abort();
    813     }
    814     while( *msgPtr ) msgPtr++;
    815 
    816     if( signal != SIGPIPE )
    817     {
    818         strcpy( msgPtr, "Fault address: 0x" );
    819         while( *msgPtr ) msgPtr++;
    820         HexPrint( msgPtr, uint64_t( info->si_addr ) );
    821         *msgPtr++ = '\n';
    822     }
    823 
    824     {
    825         Magic magic;
    826         auto token = GetToken();
    827         auto& tail = token->get_tail_index();
    828         auto item = token->enqueue_begin( magic );
    829         MemWrite( &item->hdr.type, QueueType::CrashReport );
    830         item->crashReport.time = Profiler::GetTime();
    831         item->crashReport.text = (uint64_t)s_crashText;
    832         tail.store( magic + 1, std::memory_order_release );
    833 
    834         GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
    835     }
    836 
    837     DIR* dp = opendir( "/proc/self/task" );
    838     if( !dp ) abort();
    839 
    840     const auto selfTid = syscall( SYS_gettid );
    841 
    842     struct dirent* ep;
    843     while( ( ep = readdir( dp ) ) != nullptr )
    844     {
    845         if( ep->d_name[0] == '.' ) continue;
    846         int tid = atoi( ep->d_name );
    847         if( tid != selfTid && tid != s_profilerTid )
    848         {
    849             syscall( SYS_tkill, tid, SIGPWR );
    850         }
    851     }
    852     closedir( dp );
    853 
    854     {
    855         Magic magic;
    856         auto token = GetToken();
    857         auto& tail = token->get_tail_index();
    858         auto item = token->enqueue_begin( magic );
    859         MemWrite( &item->hdr.type, QueueType::Crash );
    860         tail.store( magic + 1, std::memory_order_release );
    861     }
    862 
    863     std::this_thread::sleep_for( std::chrono::milliseconds( 500 ) );
    864     GetProfiler().RequestShutdown();
    865     while( !GetProfiler().HasShutdownFinished() ) { std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); };
    866 
    867     abort();
    868 }
    869 #endif
    870 
    871 
    872 enum { QueuePrealloc = 256 * 1024 };
    873 
    874 static Profiler* s_instance;
    875 static Thread* s_thread;
    876 static Thread* s_compressThread;
    877 
    878 #ifdef TRACY_HAS_SYSTEM_TRACING
    879 static Thread* s_sysTraceThread = nullptr;
    880 #endif
    881 
    882 #ifdef TRACY_DELAYED_INIT
    883 struct ThreadNameData;
    884 TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
    885 
    886 struct RPMallocInit { RPMallocInit() { rpmalloc_initialize(); } };
    887 
    888 TRACY_API void InitRPMallocThread()
    889 {
    890     rpmalloc_initialize();
    891     rpmalloc_thread_initialize();
    892 }
    893 
    894 struct ProfilerData
    895 {
    896     int64_t initTime = SetupHwTimer();
    897     RPMallocInit rpmalloc_init;
    898     moodycamel::ConcurrentQueue<QueueItem> queue;
    899     Profiler profiler;
    900     std::atomic<uint32_t> lockCounter { 0 };
    901     std::atomic<uint8_t> gpuCtxCounter { 0 };
    902     std::atomic<ThreadNameData*> threadNameData { nullptr };
    903 };
    904 
    905 struct ProducerWrapper
    906 {
    907     ProducerWrapper( ProfilerData& data ) : detail( data.queue ), ptr( data.queue.get_explicit_producer( detail ) ) {}
    908     moodycamel::ProducerToken detail;
    909     tracy::moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptr;
    910 };
    911 
    912 struct ProfilerThreadData
    913 {
    914     ProfilerThreadData( ProfilerData& data ) : token( data ), gpuCtx( { nullptr } ) {}
    915     RPMallocInit rpmalloc_init;
    916     ProducerWrapper token;
    917     GpuCtxWrapper gpuCtx;
    918 #  ifdef TRACY_ON_DEMAND
    919     LuaZoneState luaZoneState;
    920 #  endif
    921 };
    922 
    923 static std::atomic<int> profilerDataLock { 0 };
    924 static std::atomic<ProfilerData*> profilerData { nullptr };
    925 
    926 static ProfilerData& GetProfilerData()
    927 {
    928     auto ptr = profilerData.load( std::memory_order_acquire );
    929     if( !ptr )
    930     {
    931         int expected = 0;
    932         while( !profilerDataLock.compare_exchange_strong( expected, 1, std::memory_order_release, std::memory_order_relaxed ) ) { expected = 0; }
    933         ptr = profilerData.load( std::memory_order_acquire );
    934         if( !ptr )
    935         {
    936             ptr = (ProfilerData*)malloc( sizeof( ProfilerData ) );
    937             new (ptr) ProfilerData();
    938             profilerData.store( ptr, std::memory_order_release );
    939         }
    940         profilerDataLock.store( 0, std::memory_order_release );
    941     }
    942     return *ptr;
    943 }
    944 
    945 static ProfilerThreadData& GetProfilerThreadData()
    946 {
    947     thread_local ProfilerThreadData data( GetProfilerData() );
    948     return data;
    949 }
    950 
    951 TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return GetProfilerThreadData().token.ptr; }
    952 TRACY_API Profiler& GetProfiler() { return GetProfilerData().profiler; }
    953 TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return GetProfilerData().queue; }
    954 TRACY_API int64_t GetInitTime() { return GetProfilerData().initTime; }
    955 TRACY_API std::atomic<uint32_t>& GetLockCounter() { return GetProfilerData().lockCounter; }
    956 TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return GetProfilerData().gpuCtxCounter; }
    957 TRACY_API GpuCtxWrapper& GetGpuCtx() { return GetProfilerThreadData().gpuCtx; }
    958 TRACY_API uint64_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
    959 TRACY_API std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().threadNameData; }
    960 
    961 #  ifdef TRACY_ON_DEMAND
    962 TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; }
    963 #  endif
    964 
    965 #else
    966 TRACY_API void InitRPMallocThread()
    967 {
    968     rpmalloc_thread_initialize();
    969 }
    970 
    971 // MSVC static initialization order solution. gcc/clang uses init_order() to avoid all this.
    972 
    973 // 1a. But s_queue is needed for initialization of variables in point 2.
    974 extern moodycamel::ConcurrentQueue<QueueItem> s_queue;
    975 
    976 thread_local RPMallocInit init_order(106) s_rpmalloc_thread_init;
    977 
    978 // 2. If these variables would be in the .CRT$XCB section, they would be initialized only in main thread.
    979 thread_local moodycamel::ProducerToken init_order(107) s_token_detail( s_queue );
    980 thread_local ProducerWrapper init_order(108) s_token { s_queue.get_explicit_producer( s_token_detail ) };
    981 thread_local ThreadHandleWrapper init_order(104) s_threadHandle { detail::GetThreadHandleImpl() };
    982 
    983 #  ifdef _MSC_VER
    984 // 1. Initialize these static variables before all other variables.
    985 #    pragma warning( disable : 4075 )
    986 #    pragma init_seg( ".CRT$XCB" )
    987 #  endif
    988 
    989 static InitTimeWrapper init_order(101) s_initTime { SetupHwTimer() };
    990 static RPMallocInit init_order(102) s_rpmalloc_init;
    991 moodycamel::ConcurrentQueue<QueueItem> init_order(103) s_queue( QueuePrealloc );
    992 std::atomic<uint32_t> init_order(104) s_lockCounter( 0 );
    993 std::atomic<uint8_t> init_order(104) s_gpuCtxCounter( 0 );
    994 
    995 thread_local GpuCtxWrapper init_order(104) s_gpuCtx { nullptr };
    996 
    997 struct ThreadNameData;
    998 static std::atomic<ThreadNameData*> init_order(104) s_threadNameDataInstance( nullptr );
    999 std::atomic<ThreadNameData*>& s_threadNameData = s_threadNameDataInstance;
   1000 
   1001 #  ifdef TRACY_ON_DEMAND
   1002 thread_local LuaZoneState init_order(104) s_luaZoneState { 0, false };
   1003 #  endif
   1004 
   1005 static Profiler init_order(105) s_profiler;
   1006 
   1007 TRACY_API moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* GetToken() { return s_token.ptr; }
   1008 TRACY_API Profiler& GetProfiler() { return s_profiler; }
   1009 TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue() { return s_queue; }
   1010 TRACY_API int64_t GetInitTime() { return s_initTime.val; }
   1011 TRACY_API std::atomic<uint32_t>& GetLockCounter() { return s_lockCounter; }
   1012 TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter() { return s_gpuCtxCounter; }
   1013 TRACY_API GpuCtxWrapper& GetGpuCtx() { return s_gpuCtx; }
   1014 #  ifdef __CYGWIN__
   1015 // Hackfix for cygwin reporting memory frees without matching allocations. WTF?
   1016 TRACY_API uint64_t GetThreadHandle() { return detail::GetThreadHandleImpl(); }
   1017 #  else
   1018 TRACY_API uint64_t GetThreadHandle() { return s_threadHandle.val; }
   1019 #  endif
   1020 
   1021 TRACY_API std::atomic<ThreadNameData*>& GetThreadNameData() { return s_threadNameData; }
   1022 
   1023 #  ifdef TRACY_ON_DEMAND
   1024 TRACY_API LuaZoneState& GetLuaZoneState() { return s_luaZoneState; }
   1025 #  endif
   1026 #endif
   1027 
   1028 enum { BulkSize = TargetFrameSize / QueueItemSize };
   1029 
   1030 Profiler::Profiler()
   1031     : m_timeBegin( 0 )
   1032     , m_mainThread( detail::GetThreadHandleImpl() )
   1033     , m_epoch( std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count() )
   1034     , m_shutdown( false )
   1035     , m_shutdownManual( false )
   1036     , m_shutdownFinished( false )
   1037     , m_sock( nullptr )
   1038     , m_broadcast( nullptr )
   1039     , m_noExit( false )
   1040     , m_zoneId( 1 )
   1041     , m_stream( LZ4_createStream() )
   1042     , m_buffer( (char*)tracy_malloc( TargetFrameSize*3 ) )
   1043     , m_bufferOffset( 0 )
   1044     , m_bufferStart( 0 )
   1045     , m_itemBuf( (QueueItem*)tracy_malloc( sizeof( QueueItem ) * BulkSize ) )
   1046     , m_lz4Buf( (char*)tracy_malloc( LZ4Size + sizeof( lz4sz_t ) ) )
   1047     , m_serialQueue( 1024*1024 )
   1048     , m_serialDequeue( 1024*1024 )
   1049     , m_fiQueue( 16 )
   1050     , m_fiDequeue( 16 )
   1051     , m_frameCount( 0 )
   1052 #ifdef TRACY_ON_DEMAND
   1053     , m_isConnected( false )
   1054     , m_connectionId( 0 )
   1055     , m_deferredQueue( 64*1024 )
   1056 #endif
   1057     , m_paramCallback( nullptr )
   1058 {
   1059     assert( !s_instance );
   1060     s_instance = this;
   1061 
   1062 #ifndef TRACY_DELAYED_INIT
   1063 #  ifdef _MSC_VER
   1064     // 3. But these variables need to be initialized in main thread within the .CRT$XCB section. Do it here.
   1065     s_token_detail = moodycamel::ProducerToken( s_queue );
   1066     s_token = ProducerWrapper { s_queue.get_explicit_producer( s_token_detail ) };
   1067     s_threadHandle = ThreadHandleWrapper { m_mainThread };
   1068 #  endif
   1069 #endif
   1070 
   1071     CalibrateTimer();
   1072     CalibrateDelay();
   1073 
   1074 #ifndef TRACY_NO_EXIT
   1075     const char* noExitEnv = getenv( "TRACY_NO_EXIT" );
   1076     if( noExitEnv && noExitEnv[0] == '1' )
   1077     {
   1078         m_noExit = true;
   1079     }
   1080 #endif
   1081 
   1082     s_thread = (Thread*)tracy_malloc( sizeof( Thread ) );
   1083     new(s_thread) Thread( LaunchWorker, this );
   1084 
   1085     s_compressThread = (Thread*)tracy_malloc( sizeof( Thread ) );
   1086     new(s_compressThread) Thread( LaunchCompressWorker, this );
   1087 
   1088 #ifdef TRACY_HAS_SYSTEM_TRACING
   1089     if( SysTraceStart() )
   1090     {
   1091         s_sysTraceThread = (Thread*)tracy_malloc( sizeof( Thread ) );
   1092         new(s_sysTraceThread) Thread( SysTraceWorker, nullptr );
   1093     }
   1094 #endif
   1095 
   1096 #if defined _WIN32 || defined __CYGWIN__
   1097     s_profilerThreadId = GetThreadId( s_thread->Handle() );
   1098     AddVectoredExceptionHandler( 1, CrashFilter );
   1099 #endif
   1100 
   1101 #ifdef __linux__
   1102     struct sigaction threadFreezer = {};
   1103     threadFreezer.sa_handler = ThreadFreezer;
   1104     sigaction( SIGPWR, &threadFreezer, nullptr );
   1105 
   1106     struct sigaction crashHandler = {};
   1107     crashHandler.sa_sigaction = CrashHandler;
   1108     crashHandler.sa_flags = SA_SIGINFO;
   1109     sigaction( SIGILL, &crashHandler, nullptr );
   1110     sigaction( SIGFPE, &crashHandler, nullptr );
   1111     sigaction( SIGSEGV, &crashHandler, nullptr );
   1112     sigaction( SIGPIPE, &crashHandler, nullptr );
   1113     sigaction( SIGBUS, &crashHandler, nullptr );
   1114 #endif
   1115 
   1116 #ifdef TRACY_HAS_CALLSTACK
   1117     InitCallstack();
   1118 #endif
   1119 
   1120     m_timeBegin.store( GetTime(), std::memory_order_relaxed );
   1121 }
   1122 
   1123 Profiler::~Profiler()
   1124 {
   1125     m_shutdown.store( true, std::memory_order_relaxed );
   1126 
   1127 #ifdef TRACY_HAS_SYSTEM_TRACING
   1128     if( s_sysTraceThread )
   1129     {
   1130         SysTraceStop();
   1131         s_sysTraceThread->~Thread();
   1132         tracy_free( s_sysTraceThread );
   1133     }
   1134 #endif
   1135 
   1136     s_compressThread->~Thread();
   1137     tracy_free( s_compressThread );
   1138     s_thread->~Thread();
   1139     tracy_free( s_thread );
   1140 
   1141     tracy_free( m_lz4Buf );
   1142     tracy_free( m_itemBuf );
   1143     tracy_free( m_buffer );
   1144     LZ4_freeStream( (LZ4_stream_t*)m_stream );
   1145 
   1146     if( m_sock )
   1147     {
   1148         m_sock->~Socket();
   1149         tracy_free( m_sock );
   1150     }
   1151 
   1152     if( m_broadcast )
   1153     {
   1154         m_broadcast->~UdpBroadcast();
   1155         tracy_free( m_broadcast );
   1156     }
   1157 
   1158     assert( s_instance );
   1159     s_instance = nullptr;
   1160 }
   1161 
   1162 bool Profiler::ShouldExit()
   1163 {
   1164     return s_instance->m_shutdown.load( std::memory_order_relaxed );
   1165 }
   1166 
   1167 void Profiler::Worker()
   1168 {
   1169 #ifdef __linux__
   1170     s_profilerTid = syscall( SYS_gettid );
   1171 #endif
   1172 
   1173     SetThreadName( "Tracy Profiler" );
   1174 
   1175 #ifdef TRACY_PORT
   1176     const auto port = TRACY_PORT;
   1177 #else
   1178     const auto port = 8086;
   1179 #endif
   1180 
   1181     while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
   1182 
   1183     rpmalloc_thread_initialize();
   1184 
   1185     const auto procname = GetProcessName();
   1186     const auto pnsz = std::min<size_t>( strlen( procname ), WelcomeMessageProgramNameSize - 1 );
   1187 
   1188     const auto hostinfo = GetHostInfo();
   1189     const auto hisz = std::min<size_t>( strlen( hostinfo ), WelcomeMessageHostInfoSize - 1 );
   1190 
   1191     const uint64_t pid = GetPid();
   1192 
   1193 #ifdef TRACY_ON_DEMAND
   1194     uint8_t onDemand = 1;
   1195 #else
   1196     uint8_t onDemand = 0;
   1197 #endif
   1198 
   1199 #ifdef __APPLE__
   1200     uint8_t isApple = 1;
   1201 #else
   1202     uint8_t isApple = 0;
   1203 #endif
   1204 
   1205     WelcomeMessage welcome;
   1206     MemWrite( &welcome.timerMul, m_timerMul );
   1207     MemWrite( &welcome.initBegin, GetInitTime() );
   1208     MemWrite( &welcome.initEnd, m_timeBegin.load( std::memory_order_relaxed ) );
   1209     MemWrite( &welcome.delay, m_delay );
   1210     MemWrite( &welcome.resolution, m_resolution );
   1211     MemWrite( &welcome.epoch, m_epoch );
   1212     MemWrite( &welcome.pid, pid );
   1213     MemWrite( &welcome.onDemand, onDemand );
   1214     MemWrite( &welcome.isApple, isApple );
   1215     memcpy( welcome.programName, procname, pnsz );
   1216     memset( welcome.programName + pnsz, 0, WelcomeMessageProgramNameSize - pnsz );
   1217     memcpy( welcome.hostInfo, hostinfo, hisz );
   1218     memset( welcome.hostInfo + hisz, 0, WelcomeMessageHostInfoSize - hisz );
   1219 
   1220     moodycamel::ConsumerToken token( GetQueue() );
   1221 
   1222     ListenSocket listen;
   1223     if( !listen.Listen( port, 8 ) )
   1224     {
   1225         for(;;)
   1226         {
   1227             if( ShouldExit() )
   1228             {
   1229                 m_shutdownFinished.store( true, std::memory_order_relaxed );
   1230                 return;
   1231             }
   1232 
   1233             ClearQueues( token );
   1234         }
   1235     }
   1236 
   1237 #ifndef TRACY_NO_BROADCAST
   1238     m_broadcast = (UdpBroadcast*)tracy_malloc( sizeof( UdpBroadcast ) );
   1239     new(m_broadcast) UdpBroadcast();
   1240     if( !m_broadcast->Open( "255.255.255.255", port ) )
   1241     {
   1242         m_broadcast->~UdpBroadcast();
   1243         tracy_free( m_broadcast );
   1244         m_broadcast = nullptr;
   1245     }
   1246 #endif
   1247 
   1248     int broadcastLen = 0;
   1249     auto& broadcastMsg = GetBroadcastMessage( procname, pnsz, broadcastLen );
   1250     uint64_t lastBroadcast = 0;
   1251 
   1252     // Connections loop.
   1253     // Each iteration of the loop handles whole connection. Multiple iterations will only
   1254     // happen in the on-demand mode or when handshake fails.
   1255     for(;;)
   1256     {
   1257         // Wait for incoming connection
   1258         for(;;)
   1259         {
   1260 #ifndef TRACY_NO_EXIT
   1261             if( !m_noExit && ShouldExit() )
   1262             {
   1263                 m_shutdownFinished.store( true, std::memory_order_relaxed );
   1264                 return;
   1265             }
   1266 #endif
   1267             m_sock = listen.Accept();
   1268             if( m_sock ) break;
   1269 #ifndef TRACY_ON_DEMAND
   1270             ProcessSysTime();
   1271 #endif
   1272 
   1273             if( m_broadcast )
   1274             {
   1275                 const auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
   1276                 if( t - lastBroadcast > 3000000000 )  // 3s
   1277                 {
   1278                     lastBroadcast = t;
   1279                     const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count();
   1280                     broadcastMsg.activeTime = uint32_t( ts - m_epoch );
   1281                     m_broadcast->Send( port, &broadcastMsg, broadcastLen );
   1282                 }
   1283             }
   1284         }
   1285 
   1286         // Handshake
   1287         {
   1288             char shibboleth[HandshakeShibbolethSize];
   1289             auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 2000 );
   1290             if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 )
   1291             {
   1292                 m_sock->~Socket();
   1293                 tracy_free( m_sock );
   1294                 m_sock = nullptr;
   1295                 continue;
   1296             }
   1297 
   1298             uint32_t protocolVersion;
   1299             res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 2000 );
   1300             if( !res )
   1301             {
   1302                 m_sock->~Socket();
   1303                 tracy_free( m_sock );
   1304                 m_sock = nullptr;
   1305                 continue;
   1306             }
   1307 
   1308             if( protocolVersion != ProtocolVersion )
   1309             {
   1310                 HandshakeStatus status = HandshakeProtocolMismatch;
   1311                 m_sock->Send( &status, sizeof( status ) );
   1312                 m_sock->~Socket();
   1313                 tracy_free( m_sock );
   1314                 m_sock = nullptr;
   1315                 continue;
   1316             }
   1317         }
   1318 
   1319 #ifdef TRACY_ON_DEMAND
   1320         const auto currentTime = GetTime();
   1321         ClearQueues( token );
   1322         m_connectionId.fetch_add( 1, std::memory_order_release );
   1323         m_isConnected.store( true, std::memory_order_release );
   1324 #endif
   1325 
   1326         HandshakeStatus handshake = HandshakeWelcome;
   1327         m_sock->Send( &handshake, sizeof( handshake ) );
   1328 
   1329         LZ4_resetStream( (LZ4_stream_t*)m_stream );
   1330         m_sock->Send( &welcome, sizeof( welcome ) );
   1331 
   1332         m_threadCtx = 0;
   1333         m_refTimeSerial = 0;
   1334         m_refTimeCtx = 0;
   1335         m_refTimeGpu = 0;
   1336 
   1337 #ifdef TRACY_ON_DEMAND
   1338         OnDemandPayloadMessage onDemand;
   1339         onDemand.frames = m_frameCount.load( std::memory_order_relaxed );
   1340         onDemand.currentTime = currentTime;
   1341 
   1342         m_sock->Send( &onDemand, sizeof( onDemand ) );
   1343 
   1344         m_deferredLock.lock();
   1345         for( auto& item : m_deferredQueue )
   1346         {
   1347             const auto idx = MemRead<uint8_t>( &item.hdr.idx );
   1348             if( (QueueType)idx == QueueType::MessageAppInfo )
   1349             {
   1350                 uint64_t ptr = MemRead<uint64_t>( &item.message.text );
   1351                 SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
   1352             }
   1353             AppendData( &item, QueueDataSize[idx] );
   1354         }
   1355         m_deferredLock.unlock();
   1356 #endif
   1357 
   1358         // Main communications loop
   1359         int keepAlive = 0;
   1360         for(;;)
   1361         {
   1362             ProcessSysTime();
   1363             const auto status = Dequeue( token );
   1364             const auto serialStatus = DequeueSerial();
   1365             if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
   1366             {
   1367                 break;
   1368             }
   1369             else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty )
   1370             {
   1371                 if( ShouldExit() ) break;
   1372                 if( m_bufferOffset != m_bufferStart )
   1373                 {
   1374                     if( !CommitData() ) break;
   1375                 }
   1376                 if( keepAlive == 500 )
   1377                 {
   1378                     QueueItem ka;
   1379                     ka.hdr.type = QueueType::KeepAlive;
   1380                     AppendData( &ka, QueueDataSize[ka.hdr.idx] );
   1381                     if( !CommitData() ) break;
   1382 
   1383                     keepAlive = 0;
   1384                 }
   1385                 else
   1386                 {
   1387                     keepAlive++;
   1388                     std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
   1389                 }
   1390             }
   1391             else
   1392             {
   1393                 keepAlive = 0;
   1394             }
   1395 
   1396             bool connActive = true;
   1397             while( m_sock->HasData() && connActive )
   1398             {
   1399                 connActive = HandleServerQuery();
   1400             }
   1401             if( !connActive ) break;
   1402         }
   1403         if( ShouldExit() ) break;
   1404 
   1405 #ifdef TRACY_ON_DEMAND
   1406         m_isConnected.store( false, std::memory_order_release );
   1407         m_bufferOffset = 0;
   1408         m_bufferStart = 0;
   1409 #endif
   1410 
   1411         m_sock->~Socket();
   1412         tracy_free( m_sock );
   1413         m_sock = nullptr;
   1414 
   1415 #ifndef TRACY_ON_DEMAND
   1416         // Client is no longer available here. Accept incoming connections, but reject handshake.
   1417         for(;;)
   1418         {
   1419             if( ShouldExit() )
   1420             {
   1421                 m_shutdownFinished.store( true, std::memory_order_relaxed );
   1422                 return;
   1423             }
   1424 
   1425             ClearQueues( token );
   1426 
   1427             m_sock = listen.Accept();
   1428             if( m_sock )
   1429             {
   1430                 char shibboleth[HandshakeShibbolethSize];
   1431                 auto res = m_sock->ReadRaw( shibboleth, HandshakeShibbolethSize, 1000 );
   1432                 if( !res || memcmp( shibboleth, HandshakeShibboleth, HandshakeShibbolethSize ) != 0 )
   1433                 {
   1434                     m_sock->~Socket();
   1435                     tracy_free( m_sock );
   1436                     m_sock = nullptr;
   1437                     continue;
   1438                 }
   1439 
   1440                 uint32_t protocolVersion;
   1441                 res = m_sock->ReadRaw( &protocolVersion, sizeof( protocolVersion ), 1000 );
   1442                 if( !res )
   1443                 {
   1444                     m_sock->~Socket();
   1445                     tracy_free( m_sock );
   1446                     m_sock = nullptr;
   1447                     continue;
   1448                 }
   1449 
   1450                 HandshakeStatus status = HandshakeNotAvailable;
   1451                 m_sock->Send( &status, sizeof( status ) );
   1452                 m_sock->~Socket();
   1453                 tracy_free( m_sock );
   1454             }
   1455         }
   1456 #endif
   1457     }
   1458     // End of connections loop
   1459 
   1460     // Client is exiting. Send items remaining in queues.
   1461     for(;;)
   1462     {
   1463         const auto status = Dequeue( token );
   1464         const auto serialStatus = DequeueSerial();
   1465         if( status == DequeueStatus::ConnectionLost || serialStatus == DequeueStatus::ConnectionLost )
   1466         {
   1467             m_shutdownFinished.store( true, std::memory_order_relaxed );
   1468             return;
   1469         }
   1470         else if( status == DequeueStatus::QueueEmpty && serialStatus == DequeueStatus::QueueEmpty )
   1471         {
   1472             if( m_bufferOffset != m_bufferStart ) CommitData();
   1473             break;
   1474         }
   1475 
   1476         while( m_sock->HasData() )
   1477         {
   1478             if( !HandleServerQuery() )
   1479             {
   1480                 m_shutdownFinished.store( true, std::memory_order_relaxed );
   1481                 return;
   1482             }
   1483         }
   1484     }
   1485 
   1486     // Send client termination notice to the server
   1487     QueueItem terminate;
   1488     MemWrite( &terminate.hdr.type, QueueType::Terminate );
   1489     if( !SendData( (const char*)&terminate, 1 ) )
   1490     {
   1491         m_shutdownFinished.store( true, std::memory_order_relaxed );
   1492         return;
   1493     }
   1494     // Handle remaining server queries
   1495     { // XXX diesel changes
   1496         if( m_bufferOffset != m_bufferStart ) CommitData();
   1497         std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
   1498     }
   1499     for(;;)
   1500     {
   1501         if( m_sock->HasData() )
   1502         {
   1503             while( m_sock->HasData() )
   1504             {
   1505                 if( !HandleServerQuery() )
   1506                 {
   1507                     m_shutdownFinished.store( true, std::memory_order_relaxed );
   1508                     return;
   1509                 }
   1510             }
   1511             while( Dequeue( token ) == DequeueStatus::DataDequeued ) {}
   1512             while( DequeueSerial() == DequeueStatus::DataDequeued ) {}
   1513             if( m_bufferOffset != m_bufferStart )
   1514             {
   1515                 if( !CommitData() )
   1516                 {
   1517                     m_shutdownFinished.store( true, std::memory_order_relaxed );
   1518                     return;
   1519                 }
   1520             }
   1521         }
   1522         else
   1523         {
   1524             // XXX diesel changes
   1525             // if( m_bufferOffset != m_bufferStart ) CommitData();
   1526             // std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
   1527             m_shutdownFinished.store( true, std::memory_order_relaxed );
   1528             return;
   1529         }
   1530     }
   1531 }
   1532 
   1533 void Profiler::CompressWorker()
   1534 {
   1535     SetThreadName( "Tracy DXT1" );
   1536     while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
   1537     rpmalloc_thread_initialize();
   1538     for(;;)
   1539     {
   1540         const auto shouldExit = ShouldExit();
   1541 
   1542         {
   1543             bool lockHeld = true;
   1544             while( !m_fiLock.try_lock() )
   1545             {
   1546                 if( m_shutdownManual.load( std::memory_order_relaxed ) )
   1547                 {
   1548                     lockHeld = false;
   1549                     break;
   1550                 }
   1551             }
   1552             if( !m_fiQueue.empty() ) m_fiQueue.swap( m_fiDequeue );
   1553             if( lockHeld )
   1554             {
   1555                 m_fiLock.unlock();
   1556             }
   1557         }
   1558 
   1559         const auto sz = m_fiDequeue.size();
   1560         if( sz > 0 )
   1561         {
   1562             auto fi = m_fiDequeue.data();
   1563             auto end = fi + sz;
   1564             while( fi != end )
   1565             {
   1566                 const auto w = fi->w;
   1567                 const auto h = fi->h;
   1568                 const auto csz = size_t( w * h / 2 );
   1569                 auto etc1buf = (char*)tracy_malloc( csz );
   1570                 CompressImageDxt1( (const char*)fi->image, etc1buf, w, h );
   1571                 tracy_free( fi->image );
   1572 
   1573                 Magic magic;
   1574                 auto token = GetToken();
   1575                 auto& tail = token->get_tail_index();
   1576                 auto item = token->enqueue_begin( magic );
   1577                 MemWrite( &item->hdr.type, QueueType::FrameImage );
   1578                 MemWrite( &item->frameImage.image, (uint64_t)etc1buf );
   1579                 MemWrite( &item->frameImage.frame, fi->frame );
   1580                 MemWrite( &item->frameImage.w, w );
   1581                 MemWrite( &item->frameImage.h, h );
   1582                 uint8_t flip = fi->flip;
   1583                 MemWrite( &item->frameImage.flip, flip );
   1584                 tail.store( magic + 1, std::memory_order_release );
   1585 
   1586                 fi++;
   1587             }
   1588             m_fiDequeue.clear();
   1589         }
   1590         else
   1591         {
   1592             std::this_thread::sleep_for( std::chrono::milliseconds( 20 ) );
   1593         }
   1594 
   1595         if( shouldExit )
   1596         {
   1597             return;
   1598         }
   1599     }
   1600 }
   1601 
   1602 static void FreeAssociatedMemory( const QueueItem& item )
   1603 {
   1604     if( item.hdr.idx >= (int)QueueType::Terminate ) return;
   1605 
   1606     uint64_t ptr;
   1607     switch( item.hdr.type )
   1608     {
   1609     case QueueType::ZoneText:
   1610     case QueueType::ZoneName:
   1611         ptr = MemRead<uint64_t>( &item.zoneText.text );
   1612         tracy_free( (void*)ptr );
   1613         break;
   1614     case QueueType::Message:
   1615     case QueueType::MessageColor:
   1616     case QueueType::MessageCallstack:
   1617     case QueueType::MessageColorCallstack:
   1618 #ifndef TRACY_ON_DEMAND
   1619     case QueueType::MessageAppInfo:
   1620 #endif
   1621         ptr = MemRead<uint64_t>( &item.message.text );
   1622         tracy_free( (void*)ptr );
   1623         break;
   1624     case QueueType::ZoneBeginAllocSrcLoc:
   1625     case QueueType::ZoneBeginAllocSrcLocCallstack:
   1626         ptr = MemRead<uint64_t>( &item.zoneBegin.srcloc );
   1627         tracy_free( (void*)ptr );
   1628         break;
   1629     case QueueType::CallstackMemory:
   1630         ptr = MemRead<uint64_t>( &item.callstackMemory.ptr );
   1631         tracy_free( (void*)ptr );
   1632         break;
   1633     case QueueType::Callstack:
   1634         ptr = MemRead<uint64_t>( &item.callstack.ptr );
   1635         tracy_free( (void*)ptr );
   1636         break;
   1637     case QueueType::CallstackAlloc:
   1638         ptr = MemRead<uint64_t>( &item.callstackAlloc.nativePtr );
   1639         tracy_free( (void*)ptr );
   1640         ptr = MemRead<uint64_t>( &item.callstackAlloc.ptr );
   1641         tracy_free( (void*)ptr );
   1642         break;
   1643     case QueueType::FrameImage:
   1644         ptr = MemRead<uint64_t>( &item.frameImage.image );
   1645         tracy_free( (void*)ptr );
   1646         break;
   1647 #ifdef TRACY_ON_DEMAND
   1648     case QueueType::MessageAppInfo:
   1649         // Don't free memory associated with deferred messages.
   1650         break;
   1651 #endif
   1652     default:
   1653         break;
   1654     }
   1655 }
   1656 
   1657 void Profiler::ClearQueues( moodycamel::ConsumerToken& token )
   1658 {
   1659     for(;;)
   1660     {
   1661         const auto sz = GetQueue().try_dequeue_bulk( token, m_itemBuf, BulkSize );
   1662         if( sz == 0 ) break;
   1663         for( size_t i=0; i<sz; i++ ) FreeAssociatedMemory( m_itemBuf[i] );
   1664     }
   1665 
   1666     ClearSerial();
   1667 }
   1668 
   1669 void Profiler::ClearSerial()
   1670 {
   1671     bool lockHeld = true;
   1672     while( !m_serialLock.try_lock() )
   1673     {
   1674         if( m_shutdownManual.load( std::memory_order_relaxed ) )
   1675         {
   1676             lockHeld = false;
   1677             break;
   1678         }
   1679     }
   1680     for( auto& v : m_serialQueue ) FreeAssociatedMemory( v );
   1681     m_serialQueue.clear();
   1682     if( lockHeld )
   1683     {
   1684         m_serialLock.unlock();
   1685     }
   1686 
   1687     for( auto& v : m_serialDequeue ) FreeAssociatedMemory( v );
   1688     m_serialDequeue.clear();
   1689 }
   1690 
   1691 Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
   1692 {
   1693     uint64_t threadId;
   1694     const auto sz = GetQueue().try_dequeue_bulk_single( token, m_itemBuf, BulkSize, threadId );
   1695     if( sz > 0 )
   1696     {
   1697         if( threadId != m_threadCtx )
   1698         {
   1699             QueueItem item;
   1700             MemWrite( &item.hdr.type, QueueType::ThreadContext );
   1701             MemWrite( &item.threadCtx.thread, threadId );
   1702             if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return DequeueStatus::ConnectionLost;
   1703             m_threadCtx = threadId;
   1704             m_refTimeThread = 0;
   1705         }
   1706 
   1707         auto end = m_itemBuf + sz;
   1708         auto item = m_itemBuf;
   1709         while( item != end )
   1710         {
   1711             uint64_t ptr;
   1712             const auto idx = MemRead<uint8_t>( &item->hdr.idx );
   1713             if( idx < (int)QueueType::Terminate )
   1714             {
   1715                 switch( (QueueType)idx )
   1716                 {
   1717                 case QueueType::ZoneText:
   1718                 case QueueType::ZoneName:
   1719                     ptr = MemRead<uint64_t>( &item->zoneText.text );
   1720                     SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
   1721                     tracy_free( (void*)ptr );
   1722                     break;
   1723                 case QueueType::Message:
   1724                 case QueueType::MessageColor:
   1725                 case QueueType::MessageCallstack:
   1726                 case QueueType::MessageColorCallstack:
   1727                     ptr = MemRead<uint64_t>( &item->message.text );
   1728                     SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
   1729                     tracy_free( (void*)ptr );
   1730                     break;
   1731                 case QueueType::MessageAppInfo:
   1732                     ptr = MemRead<uint64_t>( &item->message.text );
   1733                     SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
   1734 #ifndef TRACY_ON_DEMAND
   1735                     tracy_free( (void*)ptr );
   1736 #endif
   1737                     break;
   1738                 case QueueType::ZoneBeginAllocSrcLoc:
   1739                 case QueueType::ZoneBeginAllocSrcLocCallstack:
   1740                 {
   1741                     int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
   1742                     int64_t dt = t - m_refTimeThread;
   1743                     m_refTimeThread = t;
   1744                     MemWrite( &item->zoneBegin.time, dt );
   1745                     ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
   1746                     SendSourceLocationPayload( ptr );
   1747                     tracy_free( (void*)ptr );
   1748                     break;
   1749                 }
   1750                 case QueueType::Callstack:
   1751                     ptr = MemRead<uint64_t>( &item->callstack.ptr );
   1752                     SendCallstackPayload( ptr );
   1753                     tracy_free( (void*)ptr );
   1754                     break;
   1755                 case QueueType::CallstackAlloc:
   1756                     ptr = MemRead<uint64_t>( &item->callstackAlloc.nativePtr );
   1757                     if( ptr != 0 )
   1758                     {
   1759                         CutCallstack( (void*)ptr, "lua_pcall" );
   1760                         SendCallstackPayload( ptr );
   1761                         tracy_free( (void*)ptr );
   1762                     }
   1763                     ptr = MemRead<uint64_t>( &item->callstackAlloc.ptr );
   1764                     SendCallstackAlloc( ptr );
   1765                     tracy_free( (void*)ptr );
   1766                     break;
   1767                 case QueueType::FrameImage:
   1768                 {
   1769                     ptr = MemRead<uint64_t>( &item->frameImage.image );
   1770                     const auto w = MemRead<uint16_t>( &item->frameImage.w );
   1771                     const auto h = MemRead<uint16_t>( &item->frameImage.h );
   1772                     const auto csz = size_t( w * h / 2 );
   1773                     SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
   1774                     tracy_free( (void*)ptr );
   1775                     break;
   1776                 }
   1777                 case QueueType::ZoneBegin:
   1778                 case QueueType::ZoneBeginCallstack:
   1779                 {
   1780                     int64_t t = MemRead<int64_t>( &item->zoneBegin.time );
   1781                     int64_t dt = t - m_refTimeThread;
   1782                     m_refTimeThread = t;
   1783                     MemWrite( &item->zoneBegin.time, dt );
   1784                     break;
   1785                 }
   1786                 case QueueType::ZoneEnd:
   1787                 {
   1788                     int64_t t = MemRead<int64_t>( &item->zoneEnd.time );
   1789                     int64_t dt = t - m_refTimeThread;
   1790                     m_refTimeThread = t;
   1791                     MemWrite( &item->zoneEnd.time, dt );
   1792                     break;
   1793                 }
   1794                 case QueueType::GpuZoneBegin:
   1795                 case QueueType::GpuZoneBeginCallstack:
   1796                 {
   1797                     int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
   1798                     int64_t dt = t - m_refTimeThread;
   1799                     m_refTimeThread = t;
   1800                     MemWrite( &item->gpuZoneBegin.cpuTime, dt );
   1801                     break;
   1802                 }
   1803                 case QueueType::GpuZoneEnd:
   1804                 {
   1805                     int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
   1806                     int64_t dt = t - m_refTimeThread;
   1807                     m_refTimeThread = t;
   1808                     MemWrite( &item->gpuZoneEnd.cpuTime, dt );
   1809                     break;
   1810                 }
   1811                 case QueueType::PlotData:
   1812                 {
   1813                     int64_t t = MemRead<int64_t>( &item->plotData.time );
   1814                     int64_t dt = t - m_refTimeThread;
   1815                     m_refTimeThread = t;
   1816                     MemWrite( &item->plotData.time, dt );
   1817                     break;
   1818                 }
   1819                 case QueueType::ContextSwitch:
   1820                 {
   1821                     int64_t t = MemRead<int64_t>( &item->contextSwitch.time );
   1822                     int64_t dt = t - m_refTimeCtx;
   1823                     m_refTimeCtx = t;
   1824                     MemWrite( &item->contextSwitch.time, dt );
   1825                     break;
   1826                 }
   1827                 case QueueType::ThreadWakeup:
   1828                 {
   1829                     int64_t t = MemRead<int64_t>( &item->threadWakeup.time );
   1830                     int64_t dt = t - m_refTimeCtx;
   1831                     m_refTimeCtx = t;
   1832                     MemWrite( &item->threadWakeup.time, dt );
   1833                     break;
   1834                 }
   1835                 case QueueType::GpuTime:
   1836                 {
   1837                     int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
   1838                     int64_t dt = t - m_refTimeGpu;
   1839                     m_refTimeGpu = t;
   1840                     MemWrite( &item->gpuTime.gpuTime, dt );
   1841                     break;
   1842                 }
   1843                 default:
   1844                     assert( false );
   1845                     break;
   1846                 }
   1847             }
   1848             if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost;
   1849             item++;
   1850         }
   1851     }
   1852     else
   1853     {
   1854         return DequeueStatus::QueueEmpty;
   1855     }
   1856     return DequeueStatus::DataDequeued;
   1857 }
   1858 
   1859 Profiler::DequeueStatus Profiler::DequeueContextSwitches( tracy::moodycamel::ConsumerToken& token, int64_t& timeStop )
   1860 {
   1861     const auto sz = GetQueue().try_dequeue_bulk( token, m_itemBuf, BulkSize );
   1862     if( sz > 0 )
   1863     {
   1864         auto end = m_itemBuf + sz;
   1865         auto item = m_itemBuf;
   1866         while( item != end )
   1867         {
   1868             FreeAssociatedMemory( *item );
   1869             const auto idx = MemRead<uint8_t>( &item->hdr.idx );
   1870             if( idx == (uint8_t)QueueType::ContextSwitch )
   1871             {
   1872                 const auto csTime = MemRead<int64_t>( &item->contextSwitch.time );
   1873                 if( csTime > timeStop )
   1874                 {
   1875                     timeStop = -1;
   1876                     return DequeueStatus::DataDequeued;
   1877                 }
   1878                 int64_t dt = csTime - m_refTimeCtx;
   1879                 m_refTimeCtx = csTime;
   1880                 MemWrite( &item->contextSwitch.time, dt );
   1881                 if( !AppendData( item, QueueDataSize[(int)QueueType::ContextSwitch] ) ) return DequeueStatus::ConnectionLost;
   1882             }
   1883             else if( idx == (uint8_t)QueueType::ThreadWakeup )
   1884             {
   1885                 const auto csTime = MemRead<int64_t>( &item->threadWakeup.time );
   1886                 if( csTime > timeStop )
   1887                 {
   1888                     timeStop = -1;
   1889                     return DequeueStatus::DataDequeued;
   1890                 }
   1891                 int64_t dt = csTime - m_refTimeCtx;
   1892                 m_refTimeCtx = csTime;
   1893                 MemWrite( &item->threadWakeup.time, dt );
   1894                 if( !AppendData( item, QueueDataSize[(int)QueueType::ThreadWakeup] ) ) return DequeueStatus::ConnectionLost;
   1895             }
   1896             item++;
   1897         }
   1898     }
   1899     else
   1900     {
   1901         return DequeueStatus::QueueEmpty;
   1902     }
   1903     return DequeueStatus::DataDequeued;
   1904 }
   1905 
   1906 Profiler::DequeueStatus Profiler::DequeueSerial()
   1907 {
   1908     {
   1909         bool lockHeld = true;
   1910         while( !m_serialLock.try_lock() )
   1911         {
   1912             if( m_shutdownManual.load( std::memory_order_relaxed ) )
   1913             {
   1914                 lockHeld = false;
   1915                 break;
   1916             }
   1917         }
   1918         if( !m_serialQueue.empty() ) m_serialQueue.swap( m_serialDequeue );
   1919         if( lockHeld )
   1920         {
   1921             m_serialLock.unlock();
   1922         }
   1923     }
   1924 
   1925     const auto sz = m_serialDequeue.size();
   1926     if( sz > 0 )
   1927     {
   1928         auto item = m_serialDequeue.data();
   1929         auto end = item + sz;
   1930         while( item != end )
   1931         {
   1932             uint64_t ptr;
   1933             const auto idx = MemRead<uint8_t>( &item->hdr.idx );
   1934             if( idx < (int)QueueType::Terminate )
   1935             {
   1936                 switch( (QueueType)idx )
   1937                 {
   1938                 case QueueType::CallstackMemory:
   1939                     ptr = MemRead<uint64_t>( &item->callstackMemory.ptr );
   1940                     SendCallstackPayload( ptr );
   1941                     tracy_free( (void*)ptr );
   1942                     break;
   1943                 case QueueType::LockWait:
   1944                 case QueueType::LockSharedWait:
   1945                 {
   1946                     int64_t t = MemRead<int64_t>( &item->lockWait.time );
   1947                     int64_t dt = t - m_refTimeSerial;
   1948                     m_refTimeSerial = t;
   1949                     MemWrite( &item->lockWait.time, dt );
   1950                     break;
   1951                 }
   1952                 case QueueType::LockObtain:
   1953                 case QueueType::LockSharedObtain:
   1954                 {
   1955                     int64_t t = MemRead<int64_t>( &item->lockObtain.time );
   1956                     int64_t dt = t - m_refTimeSerial;
   1957                     m_refTimeSerial = t;
   1958                     MemWrite( &item->lockObtain.time, dt );
   1959                     break;
   1960                 }
   1961                 case QueueType::LockRelease:
   1962                 case QueueType::LockSharedRelease:
   1963                 {
   1964                     int64_t t = MemRead<int64_t>( &item->lockRelease.time );
   1965                     int64_t dt = t - m_refTimeSerial;
   1966                     m_refTimeSerial = t;
   1967                     MemWrite( &item->lockRelease.time, dt );
   1968                     break;
   1969                 }
   1970                 case QueueType::MemAlloc:
   1971                 case QueueType::MemAllocCallstack:
   1972                 {
   1973                     int64_t t = MemRead<int64_t>( &item->memAlloc.time );
   1974                     int64_t dt = t - m_refTimeSerial;
   1975                     m_refTimeSerial = t;
   1976                     MemWrite( &item->memAlloc.time, dt );
   1977                     break;
   1978                 }
   1979                 case QueueType::MemFree:
   1980                 case QueueType::MemFreeCallstack:
   1981                 {
   1982                     int64_t t = MemRead<int64_t>( &item->memFree.time );
   1983                     int64_t dt = t - m_refTimeSerial;
   1984                     m_refTimeSerial = t;
   1985                     MemWrite( &item->memFree.time, dt );
   1986                     break;
   1987                 }
   1988                 case QueueType::GpuZoneBeginSerial:
   1989                 case QueueType::GpuZoneBeginCallstackSerial:
   1990                 {
   1991                     int64_t t = MemRead<int64_t>( &item->gpuZoneBegin.cpuTime );
   1992                     int64_t dt = t - m_refTimeSerial;
   1993                     m_refTimeSerial = t;
   1994                     MemWrite( &item->gpuZoneBegin.cpuTime, dt );
   1995                     break;
   1996                 }
   1997                 case QueueType::GpuZoneEndSerial:
   1998                 {
   1999                     int64_t t = MemRead<int64_t>( &item->gpuZoneEnd.cpuTime );
   2000                     int64_t dt = t - m_refTimeSerial;
   2001                     m_refTimeSerial = t;
   2002                     MemWrite( &item->gpuZoneEnd.cpuTime, dt );
   2003                     break;
   2004                 }
   2005                 case QueueType::GpuTime:
   2006                 {
   2007                     int64_t t = MemRead<int64_t>( &item->gpuTime.gpuTime );
   2008                     int64_t dt = t - m_refTimeGpu;
   2009                     m_refTimeGpu = t;
   2010                     MemWrite( &item->gpuTime.gpuTime, dt );
   2011                     break;
   2012                 }
   2013                 default:
   2014                     assert( false );
   2015                     break;
   2016                 }
   2017             }
   2018             if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost;
   2019             item++;
   2020         }
   2021         m_serialDequeue.clear();
   2022     }
   2023     else
   2024     {
   2025         return DequeueStatus::QueueEmpty;
   2026     }
   2027     return DequeueStatus::DataDequeued;
   2028 }
   2029 
   2030 bool Profiler::AppendData( const void* data, size_t len )
   2031 {
   2032     const auto ret = NeedDataSize( len );
   2033     AppendDataUnsafe( data, len );
   2034     return ret;
   2035 }
   2036 
   2037 bool Profiler::CommitData()
   2038 {
   2039     bool ret = SendData( m_buffer + m_bufferStart, m_bufferOffset - m_bufferStart );
   2040     if( m_bufferOffset > TargetFrameSize * 2 ) m_bufferOffset = 0;
   2041     m_bufferStart = m_bufferOffset;
   2042     return ret;
   2043 }
   2044 
   2045 bool Profiler::NeedDataSize( size_t len )
   2046 {
   2047     assert( len <= TargetFrameSize );
   2048     bool ret = true;
   2049     if( m_bufferOffset - m_bufferStart + len > TargetFrameSize )
   2050     {
   2051         ret = CommitData();
   2052     }
   2053     return ret;
   2054 }
   2055 
   2056 bool Profiler::SendData( const char* data, size_t len )
   2057 {
   2058     const lz4sz_t lz4sz = LZ4_compress_fast_continue( (LZ4_stream_t*)m_stream, data, m_lz4Buf + sizeof( lz4sz_t ), (int)len, LZ4Size, 1 );
   2059     memcpy( m_lz4Buf, &lz4sz, sizeof( lz4sz ) );
   2060     return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1;
   2061 }
   2062 
   2063 void Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
   2064 {
   2065     assert( type == QueueType::StringData ||
   2066             type == QueueType::ThreadName ||
   2067             type == QueueType::CustomStringData ||
   2068             type == QueueType::PlotName ||
   2069             type == QueueType::FrameName ||
   2070             type == QueueType::ExternalName ||
   2071             type == QueueType::ExternalThreadName );
   2072 
   2073     QueueItem item;
   2074     MemWrite( &item.hdr.type, type );
   2075     MemWrite( &item.stringTransfer.ptr, str );
   2076 
   2077     auto len = strlen( ptr );
   2078     assert( len <= std::numeric_limits<uint16_t>::max() );
   2079     auto l16 = uint16_t( len );
   2080 
   2081     NeedDataSize( QueueDataSize[(int)type] + sizeof( l16 ) + l16 );
   2082 
   2083     AppendDataUnsafe( &item, QueueDataSize[(int)type] );
   2084     AppendDataUnsafe( &l16, sizeof( l16 ) );
   2085     AppendDataUnsafe( ptr, l16 );
   2086 }
   2087 
   2088 void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type )
   2089 {
   2090     assert( type == QueueType::FrameImageData );
   2091 
   2092     QueueItem item;
   2093     MemWrite( &item.hdr.type, type );
   2094     MemWrite( &item.stringTransfer.ptr, str );
   2095 
   2096     assert( len <= std::numeric_limits<uint32_t>::max() );
   2097     assert( QueueDataSize[(int)type] + sizeof( uint32_t ) + len <= TargetFrameSize );
   2098     auto l32 = uint32_t( len );
   2099 
   2100     NeedDataSize( QueueDataSize[(int)type] + sizeof( l32 ) + l32 );
   2101 
   2102     AppendDataUnsafe( &item, QueueDataSize[(int)type] );
   2103     AppendDataUnsafe( &l32, sizeof( l32 ) );
   2104     AppendDataUnsafe( ptr, l32 );
   2105 }
   2106 
   2107 void Profiler::SendSourceLocation( uint64_t ptr )
   2108 {
   2109     auto srcloc = (const SourceLocationData*)ptr;
   2110     QueueItem item;
   2111     MemWrite( &item.hdr.type, QueueType::SourceLocation );
   2112     MemWrite( &item.srcloc.name, (uint64_t)srcloc->name );
   2113     MemWrite( &item.srcloc.file, (uint64_t)srcloc->file );
   2114     MemWrite( &item.srcloc.function, (uint64_t)srcloc->function );
   2115     MemWrite( &item.srcloc.line, srcloc->line );
   2116     MemWrite( &item.srcloc.r, uint8_t( ( srcloc->color       ) & 0xFF ) );
   2117     MemWrite( &item.srcloc.g, uint8_t( ( srcloc->color >> 8  ) & 0xFF ) );
   2118     MemWrite( &item.srcloc.b, uint8_t( ( srcloc->color >> 16 ) & 0xFF ) );
   2119     AppendData( &item, QueueDataSize[(int)QueueType::SourceLocation] );
   2120 }
   2121 
   2122 void Profiler::SendSourceLocationPayload( uint64_t _ptr )
   2123 {
   2124     auto ptr = (const char*)_ptr;
   2125 
   2126     QueueItem item;
   2127     MemWrite( &item.hdr.type, QueueType::SourceLocationPayload );
   2128     MemWrite( &item.stringTransfer.ptr, _ptr );
   2129 
   2130     const auto len = *((uint32_t*)ptr);
   2131     assert( len <= std::numeric_limits<uint16_t>::max() );
   2132     assert( len > 4 );
   2133     const auto l16 = uint16_t( len - 4 );
   2134 
   2135     NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( l16 ) + l16 );
   2136 
   2137     AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] );
   2138     AppendDataUnsafe( &l16, sizeof( l16 ) );
   2139     AppendDataUnsafe( ptr + 4, l16 );
   2140 }
   2141 
   2142 void Profiler::SendCallstackPayload( uint64_t _ptr )
   2143 {
   2144     auto ptr = (uintptr_t*)_ptr;
   2145 
   2146     QueueItem item;
   2147     MemWrite( &item.hdr.type, QueueType::CallstackPayload );
   2148     MemWrite( &item.stringTransfer.ptr, _ptr );
   2149 
   2150     const auto sz = *ptr++;
   2151     const auto len = sz * sizeof( uint64_t );
   2152     const auto l16 = uint16_t( len );
   2153 
   2154     NeedDataSize( QueueDataSize[(int)QueueType::CallstackPayload] + sizeof( l16 ) + l16 );
   2155 
   2156     AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackPayload] );
   2157     AppendDataUnsafe( &l16, sizeof( l16 ) );
   2158 
   2159     if( compile_time_condition<sizeof( uintptr_t ) == sizeof( uint64_t )>::value )
   2160     {
   2161         AppendDataUnsafe( ptr, sizeof( uint64_t ) * sz );
   2162     }
   2163     else
   2164     {
   2165         for( uintptr_t i=0; i<sz; i++ )
   2166         {
   2167             const auto val = uint64_t( *ptr++ );
   2168             AppendDataUnsafe( &val, sizeof( uint64_t ) );
   2169         }
   2170     }
   2171 }
   2172 
   2173 void Profiler::SendCallstackAlloc( uint64_t _ptr )
   2174 {
   2175     auto ptr = (const char*)_ptr;
   2176 
   2177     QueueItem item;
   2178     MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload );
   2179     MemWrite( &item.stringTransfer.ptr, _ptr );
   2180 
   2181     const auto len = *((uint32_t*)ptr);
   2182     assert( len <= std::numeric_limits<uint16_t>::max() );
   2183     const auto l16 = uint16_t( len );
   2184 
   2185     NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( l16 ) + l16 );
   2186 
   2187     AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] );
   2188     AppendDataUnsafe( &l16, sizeof( l16 ) );
   2189     AppendDataUnsafe( ptr + 4, l16 );
   2190 }
   2191 
   2192 void Profiler::SendCallstackFrame( uint64_t ptr )
   2193 {
   2194 #ifdef TRACY_HAS_CALLSTACK
   2195     const auto frameData = DecodeCallstackPtr( ptr );
   2196 
   2197     {
   2198         QueueItem item;
   2199         MemWrite( &item.hdr.type, QueueType::CallstackFrameSize );
   2200         MemWrite( &item.callstackFrameSize.ptr, ptr );
   2201         MemWrite( &item.callstackFrameSize.size, frameData.size );
   2202 
   2203         AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrameSize] );
   2204     }
   2205 
   2206     for( uint8_t i=0; i<frameData.size; i++ )
   2207     {
   2208         const auto& frame = frameData.data[i];
   2209 
   2210         SendString( uint64_t( frame.name ), frame.name, QueueType::CustomStringData );
   2211         SendString( uint64_t( frame.file ), frame.file, QueueType::CustomStringData );
   2212 
   2213         QueueItem item;
   2214         MemWrite( &item.hdr.type, QueueType::CallstackFrame );
   2215         MemWrite( &item.callstackFrame.name, (uint64_t)frame.name );
   2216         MemWrite( &item.callstackFrame.file, (uint64_t)frame.file );
   2217         MemWrite( &item.callstackFrame.line, frame.line );
   2218 
   2219         AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] );
   2220 
   2221         tracy_free( (void*)frame.name );
   2222         tracy_free( (void*)frame.file );
   2223     }
   2224 #endif
   2225 }
   2226 
   2227 
   2228 static bool DontExit() { return false; }
   2229 
   2230 bool Profiler::HandleServerQuery()
   2231 {
   2232     uint8_t type;
   2233     if( !m_sock->Read( &type, sizeof( type ), 10, DontExit ) ) return false;
   2234 
   2235     uint64_t ptr;
   2236     if( !m_sock->Read( &ptr, sizeof( ptr ), 10, DontExit ) ) return false;
   2237 
   2238     switch( type )
   2239     {
   2240     case ServerQueryString:
   2241         SendString( ptr, (const char*)ptr, QueueType::StringData );
   2242         break;
   2243     case ServerQueryThreadString:
   2244         if( ptr == m_mainThread )
   2245         {
   2246             SendString( ptr, "Main thread", QueueType::ThreadName );
   2247         }
   2248         else
   2249         {
   2250             SendString( ptr, GetThreadName( ptr ), QueueType::ThreadName );
   2251         }
   2252         break;
   2253     case ServerQuerySourceLocation:
   2254         SendSourceLocation( ptr );
   2255         break;
   2256     case ServerQueryPlotName:
   2257         SendString( ptr, (const char*)ptr, QueueType::PlotName );
   2258         break;
   2259     case ServerQueryTerminate:
   2260         return false;
   2261     case ServerQueryCallstackFrame:
   2262         SendCallstackFrame( ptr );
   2263         break;
   2264     case ServerQueryFrameName:
   2265         SendString( ptr, (const char*)ptr, QueueType::FrameName );
   2266         break;
   2267     case ServerQueryDisconnect:
   2268         HandleDisconnect();
   2269         return false;
   2270 #ifdef TRACY_HAS_SYSTEM_TRACING
   2271     case ServerQueryExternalName:
   2272         SysTraceSendExternalName( ptr );
   2273         break;
   2274 #endif
   2275     case ServerQueryParameter:
   2276         HandleParameter( ptr );
   2277         break;
   2278     default:
   2279         assert( false );
   2280         break;
   2281     }
   2282 
   2283     return true;
   2284 }
   2285 
   2286 void Profiler::HandleDisconnect()
   2287 {
   2288     moodycamel::ConsumerToken token( GetQueue() );
   2289 
   2290 #ifdef TRACY_HAS_SYSTEM_TRACING
   2291     if( s_sysTraceThread )
   2292     {
   2293         auto timestamp = GetTime();
   2294         for(;;)
   2295         {
   2296             const auto status = DequeueContextSwitches( token, timestamp );
   2297             if( status == DequeueStatus::ConnectionLost )
   2298             {
   2299                 return;
   2300             }
   2301             else if( status == DequeueStatus::QueueEmpty )
   2302             {
   2303                 if( m_bufferOffset != m_bufferStart )
   2304                 {
   2305                     if( !CommitData() ) return;
   2306                 }
   2307             }
   2308             if( timestamp < 0 )
   2309             {
   2310                 if( m_bufferOffset != m_bufferStart )
   2311                 {
   2312                     if( !CommitData() ) return;
   2313                 }
   2314                 break;
   2315             }
   2316             ClearSerial();
   2317             if( m_sock->HasData() )
   2318             {
   2319                 while( m_sock->HasData() )
   2320                 {
   2321                     if( !HandleServerQuery() ) return;
   2322                 }
   2323                 if( m_bufferOffset != m_bufferStart )
   2324                 {
   2325                     if( !CommitData() ) return;
   2326                 }
   2327             }
   2328             else
   2329             {
   2330                 if( m_bufferOffset != m_bufferStart )
   2331                 {
   2332                     if( !CommitData() ) return;
   2333                 }
   2334                 std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
   2335             }
   2336         }
   2337     }
   2338 #endif
   2339 
   2340     QueueItem terminate;
   2341     MemWrite( &terminate.hdr.type, QueueType::Terminate );
   2342     if( !SendData( (const char*)&terminate, 1 ) ) return;
   2343     for(;;)
   2344     {
   2345         ClearQueues( token );
   2346         if( m_sock->HasData() )
   2347         {
   2348             while( m_sock->HasData() )
   2349             {
   2350                 if( !HandleServerQuery() ) return;
   2351             }
   2352             if( m_bufferOffset != m_bufferStart )
   2353             {
   2354                 if( !CommitData() ) return;
   2355             }
   2356         }
   2357         else
   2358         {
   2359             if( m_bufferOffset != m_bufferStart )
   2360             {
   2361                 if( !CommitData() ) return;
   2362             }
   2363             std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
   2364         }
   2365     }
   2366 }
   2367 
   2368 void Profiler::CalibrateTimer()
   2369 {
   2370 #ifdef TRACY_HW_TIMER
   2371 #  if !defined TARGET_OS_IOS && __ARM_ARCH >= 6
   2372     m_timerMul = 1.;
   2373 #  else
   2374     std::atomic_signal_fence( std::memory_order_acq_rel );
   2375     const auto t0 = std::chrono::high_resolution_clock::now();
   2376     const auto r0 = GetTime();
   2377     std::atomic_signal_fence( std::memory_order_acq_rel );
   2378     std::this_thread::sleep_for( std::chrono::milliseconds( 200 ) );
   2379     std::atomic_signal_fence( std::memory_order_acq_rel );
   2380     const auto t1 = std::chrono::high_resolution_clock::now();
   2381     const auto r1 = GetTime();
   2382     std::atomic_signal_fence( std::memory_order_acq_rel );
   2383 
   2384     const auto dt = std::chrono::duration_cast<std::chrono::nanoseconds>( t1 - t0 ).count();
   2385     const auto dr = r1 - r0;
   2386 
   2387     m_timerMul = double( dt ) / double( dr );
   2388 #  endif
   2389 #else
   2390     m_timerMul = 1.;
   2391 #endif
   2392 }
   2393 
   2394 void Profiler::CalibrateDelay()
   2395 {
   2396     enum { Iterations = 50000 };
   2397 
   2398     auto mindiff = std::numeric_limits<int64_t>::max();
   2399     for( int i=0; i<Iterations * 10; i++ )
   2400     {
   2401         const auto t0i = GetTime();
   2402         const auto t1i = GetTime();
   2403         const auto dti = t1i - t0i;
   2404         if( dti > 0 && dti < mindiff ) mindiff = dti;
   2405     }
   2406     m_resolution = mindiff;
   2407 
   2408 #ifdef TRACY_DELAYED_INIT
   2409     m_delay = m_resolution;
   2410 #else
   2411     enum { Events = Iterations * 2 };   // start + end
   2412     static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
   2413 
   2414     moodycamel::ProducerToken ptoken_detail( GetQueue() );
   2415     moodycamel::ConcurrentQueue<QueueItem>::ExplicitProducer* ptoken = GetQueue().get_explicit_producer( ptoken_detail );
   2416     static const tracy::SourceLocationData __tracy_source_location { nullptr, __FUNCTION__,  __FILE__, (uint32_t)__LINE__, 0 };
   2417     const auto t0 = GetTime();
   2418     for( int i=0; i<Iterations; i++ )
   2419     {
   2420         {
   2421             Magic magic;
   2422             auto& tail = ptoken->get_tail_index();
   2423             auto item = ptoken->enqueue_begin( magic );
   2424             MemWrite( &item->hdr.type, QueueType::ZoneBegin );
   2425             MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
   2426             MemWrite( &item->zoneBegin.srcloc, (uint64_t)&__tracy_source_location );
   2427             tail.store( magic + 1, std::memory_order_release );
   2428         }
   2429         {
   2430             Magic magic;
   2431             auto& tail = ptoken->get_tail_index();
   2432             auto item = ptoken->enqueue_begin( magic );
   2433             MemWrite( &item->hdr.type, QueueType::ZoneEnd );
   2434             MemWrite( &item->zoneEnd.time, GetTime() );
   2435             tail.store( magic + 1, std::memory_order_release );
   2436         }
   2437     }
   2438     const auto t1 = GetTime();
   2439     const auto dt = t1 - t0;
   2440     m_delay = dt / Events;
   2441 
   2442     enum { Bulk = 1000 };
   2443     moodycamel::ConsumerToken token( GetQueue() );
   2444     int left = Events;
   2445     QueueItem item[Bulk];
   2446     while( left != 0 )
   2447     {
   2448         const auto sz = GetQueue().try_dequeue_bulk( token, item, std::min( left, (int)Bulk ) );
   2449         assert( sz > 0 );
   2450         left -= (int)sz;
   2451     }
   2452     assert( GetQueue().size_approx() == 0 );
   2453 #endif
   2454 }
   2455 
   2456 void Profiler::SendCallstack( int depth, const char* skipBefore )
   2457 {
   2458 #ifdef TRACY_HAS_CALLSTACK
   2459     auto ptr = Callstack( depth );
   2460     CutCallstack( ptr, skipBefore );
   2461 
   2462     Magic magic;
   2463     auto token = GetToken();
   2464     auto& tail = token->get_tail_index();
   2465     auto item = token->enqueue_begin( magic );
   2466     MemWrite( &item->hdr.type, QueueType::Callstack );
   2467     MemWrite( &item->callstack.ptr, ptr );
   2468     tail.store( magic + 1, std::memory_order_release );
   2469 #endif
   2470 }
   2471 
   2472 void Profiler::CutCallstack( void* callstack, const char* skipBefore )
   2473 {
   2474 #ifdef TRACY_HAS_CALLSTACK
   2475     auto data = (uintptr_t*)callstack;
   2476     const auto sz = *data++;
   2477     uintptr_t i;
   2478     for( i=0; i<sz; i++ )
   2479     {
   2480         auto name = DecodeCallstackPtrFast( uint64_t( data[i] ) );
   2481         const bool found = strcmp( name, skipBefore ) == 0;
   2482         if( found )
   2483         {
   2484             i++;
   2485             break;
   2486         }
   2487     }
   2488 
   2489     if( i != sz )
   2490     {
   2491         memmove( data, data + i, ( sz - i ) * sizeof( uintptr_t* ) );
   2492         *--data = sz - i;
   2493     }
   2494 #endif
   2495 }
   2496 
   2497 #ifdef TRACY_HAS_SYSTIME
   2498 void Profiler::ProcessSysTime()
   2499 {
   2500     if( m_shutdown.load( std::memory_order_relaxed ) ) return;
   2501     auto t = std::chrono::high_resolution_clock::now().time_since_epoch().count();
   2502     if( t - m_sysTimeLast > 100000000 )    // 100 ms
   2503     {
   2504         auto sysTime = m_sysTime.Get();
   2505         if( sysTime >= 0 )
   2506         {
   2507             m_sysTimeLast = t;
   2508 
   2509             Magic magic;
   2510             auto token = GetToken();
   2511             auto& tail = token->get_tail_index();
   2512             auto item = token->enqueue_begin( magic );
   2513             MemWrite( &item->hdr.type, QueueType::SysTimeReport );
   2514             MemWrite( &item->sysTime.time, GetTime() );
   2515             MemWrite( &item->sysTime.sysTime, sysTime );
   2516             tail.store( magic + 1, std::memory_order_release );
   2517         }
   2518     }
   2519 }
   2520 #endif
   2521 
   2522 void Profiler::ParameterSetup( uint32_t idx, const char* name, bool isBool, int32_t val )
   2523 {
   2524     tracy::Magic magic;
   2525     auto token = tracy::GetToken();
   2526     auto& tail = token->get_tail_index();
   2527     auto item = token->enqueue_begin( magic );
   2528     tracy::MemWrite( &item->hdr.type, tracy::QueueType::ParamSetup );
   2529     tracy::MemWrite( &item->paramSetup.idx, idx );
   2530     tracy::MemWrite( &item->paramSetup.name, (uint64_t)name );
   2531     tracy::MemWrite( &item->paramSetup.isBool, (uint8_t)isBool );
   2532     tracy::MemWrite( &item->paramSetup.val, val );
   2533 
   2534 #ifdef TRACY_ON_DEMAND
   2535     GetProfiler().DeferItem( *item );
   2536 #endif
   2537 
   2538     tail.store( magic + 1, std::memory_order_release );
   2539 }
   2540 
   2541 void Profiler::HandleParameter( uint64_t payload )
   2542 {
   2543     assert( m_paramCallback );
   2544     const auto idx = uint32_t( payload >> 32 );
   2545     const auto val = int32_t( payload & 0xFFFFFFFF );
   2546     m_paramCallback( idx, val );
   2547 }
   2548 
   2549 }
   2550 
   2551 #ifdef __cplusplus
   2552 extern "C" {
   2553 #endif
   2554 
   2555 TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active )
   2556 {
   2557     ___tracy_c_zone_context ctx;
   2558 #ifdef TRACY_ON_DEMAND
   2559     ctx.active = active && tracy::GetProfiler().IsConnected();
   2560 #else
   2561     ctx.active = active;
   2562 #endif
   2563     if( !ctx.active ) return ctx;
   2564     const auto id = tracy::GetProfiler().GetNextZoneId();
   2565     ctx.id = id;
   2566 
   2567 #ifndef TRACY_NO_VERIFY
   2568     {
   2569         tracy::Magic magic;
   2570         auto token = tracy::GetToken();
   2571         auto& tail = token->get_tail_index();
   2572         auto item = token->enqueue_begin( magic );
   2573         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
   2574         tracy::MemWrite( &item->zoneValidation.id, id );
   2575         tail.store( magic + 1, std::memory_order_release );
   2576     }
   2577 #endif
   2578     {
   2579         tracy::Magic magic;
   2580         auto token = tracy::GetToken();
   2581         auto& tail = token->get_tail_index();
   2582         auto item = token->enqueue_begin( magic );
   2583         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBegin );
   2584         tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
   2585         tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
   2586         tail.store( magic + 1, std::memory_order_release );
   2587     }
   2588     return ctx;
   2589 }
   2590 
   2591 TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active )
   2592 {
   2593     ___tracy_c_zone_context ctx;
   2594 #ifdef TRACY_ON_DEMAND
   2595     ctx.active = active && tracy::GetProfiler().IsConnected();
   2596 #else
   2597     ctx.active = active;
   2598 #endif
   2599     if( !ctx.active ) return ctx;
   2600     const auto id = tracy::GetProfiler().GetNextZoneId();
   2601     ctx.id = id;
   2602 
   2603 #ifndef TRACY_NO_VERIFY
   2604     {
   2605         tracy::Magic magic;
   2606         auto token = tracy::GetToken();
   2607         auto& tail = token->get_tail_index();
   2608         auto item = token->enqueue_begin( magic );
   2609         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
   2610         tracy::MemWrite( &item->zoneValidation.id, id );
   2611         tail.store( magic + 1, std::memory_order_release );
   2612     }
   2613 #endif
   2614     {
   2615         tracy::Magic magic;
   2616         auto token = tracy::GetToken();
   2617         auto& tail = token->get_tail_index();
   2618         auto item = token->enqueue_begin( magic );
   2619         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneBeginCallstack );
   2620         tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
   2621         tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
   2622         tail.store( magic + 1, std::memory_order_release );
   2623     }
   2624 
   2625     tracy::GetProfiler().SendCallstack( depth );
   2626     return ctx;
   2627 }
   2628 
   2629 TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx )
   2630 {
   2631     if( !ctx.active ) return;
   2632 #ifndef TRACY_NO_VERIFY
   2633     {
   2634         tracy::Magic magic;
   2635         auto token = tracy::GetToken();
   2636         auto& tail = token->get_tail_index();
   2637         auto item = token->enqueue_begin( magic );
   2638         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
   2639         tracy::MemWrite( &item->zoneValidation.id, ctx.id );
   2640         tail.store( magic + 1, std::memory_order_release );
   2641     }
   2642 #endif
   2643     {
   2644         tracy::Magic magic;
   2645         auto token = tracy::GetToken();
   2646         auto& tail = token->get_tail_index();
   2647         auto item = token->enqueue_begin( magic );
   2648         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneEnd );
   2649         tracy::MemWrite( &item->zoneEnd.time, tracy::Profiler::GetTime() );
   2650         tail.store( magic + 1, std::memory_order_release );
   2651     }
   2652 }
   2653 
   2654 TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size )
   2655 {
   2656     if( !ctx.active ) return;
   2657     auto ptr = (char*)tracy::tracy_malloc( size+1 );
   2658     memcpy( ptr, txt, size );
   2659     ptr[size] = '\0';
   2660 #ifndef TRACY_NO_VERIFY
   2661     {
   2662         tracy::Magic magic;
   2663         auto token = tracy::GetToken();
   2664         auto& tail = token->get_tail_index();
   2665         auto item = token->enqueue_begin( magic );
   2666         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
   2667         tracy::MemWrite( &item->zoneValidation.id, ctx.id );
   2668         tail.store( magic + 1, std::memory_order_release );
   2669     }
   2670 #endif
   2671     {
   2672         tracy::Magic magic;
   2673         auto token = tracy::GetToken();
   2674         auto& tail = token->get_tail_index();
   2675         auto item = token->enqueue_begin( magic );
   2676         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneText );
   2677         tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr );
   2678         tail.store( magic + 1, std::memory_order_release );
   2679     }
   2680 }
   2681 
   2682 TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size )
   2683 {
   2684     if( !ctx.active ) return;
   2685     auto ptr = (char*)tracy::tracy_malloc( size+1 );
   2686     memcpy( ptr, txt, size );
   2687     ptr[size] = '\0';
   2688 #ifndef TRACY_NO_VERIFY
   2689     {
   2690         tracy::Magic magic;
   2691         auto token = tracy::GetToken();
   2692         auto& tail = token->get_tail_index();
   2693         auto item = token->enqueue_begin( magic );
   2694         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneValidation );
   2695         tracy::MemWrite( &item->zoneValidation.id, ctx.id );
   2696         tail.store( magic + 1, std::memory_order_release );
   2697     }
   2698 #endif
   2699     {
   2700         tracy::Magic magic;
   2701         auto token = tracy::GetToken();
   2702         auto& tail = token->get_tail_index();
   2703         auto item = token->enqueue_begin( magic );
   2704         tracy::MemWrite( &item->hdr.type, tracy::QueueType::ZoneName );
   2705         tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr );
   2706         tail.store( magic + 1, std::memory_order_release );
   2707     }
   2708 }
   2709 
   2710 TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size ) { tracy::Profiler::MemAlloc( ptr, size ); }
   2711 TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth ); }
   2712 TRACY_API void ___tracy_emit_memory_free( const void* ptr ) { tracy::Profiler::MemFree( ptr ); }
   2713 TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth ) { tracy::Profiler::MemFreeCallstack( ptr, depth ); }
   2714 TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); }
   2715 TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); }
   2716 TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); }
   2717 TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ) { tracy::Profiler::SendFrameImage( image, w, h, offset, flip ); }
   2718 TRACY_API void ___tracy_emit_plot( const char* name, double val ) { tracy::Profiler::PlotData( name, val ); }
   2719 TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ) { tracy::Profiler::Message( txt, size, callstack ); }
   2720 TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy::Profiler::Message( txt, callstack ); }
   2721 TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
   2722 TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); }
   2723 TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); }
   2724 
   2725 #ifdef __cplusplus
   2726 }
   2727 #endif
   2728 
   2729 #endif