TracyOpenGL.hpp (10178B)
1 #ifndef __TRACYOPENGL_HPP__ 2 #define __TRACYOPENGL_HPP__ 3 4 // Include this file after you include OpenGL 3.2 headers. 5 6 #if !defined TRACY_ENABLE || defined __APPLE__ 7 8 #define TracyGpuContext 9 #define TracyGpuNamedZone(x,y) 10 #define TracyGpuNamedZoneC(x,y,z) 11 #define TracyGpuZone(x) 12 #define TracyGpuZoneC(x,y) 13 #define TracyGpuCollect 14 15 #define TracyGpuNamedZoneS(x,y,z) 16 #define TracyGpuNamedZoneCS(x,y,z,w) 17 #define TracyGpuZoneS(x,y) 18 #define TracyGpuZoneCS(x,y,z) 19 20 namespace tracy 21 { 22 struct SourceLocationData; 23 class GpuCtxScope 24 { 25 public: 26 GpuCtxScope( const SourceLocationData* ) {} 27 GpuCtxScope( const SourceLocationData*, int depth ) {} 28 }; 29 } 30 31 #else 32 33 #include <atomic> 34 #include <assert.h> 35 #include <stdlib.h> 36 37 #include "Tracy.hpp" 38 #include "client/TracyProfiler.hpp" 39 #include "client/TracyCallstack.hpp" 40 #include "common/TracyAlign.hpp" 41 #include "common/TracyAlloc.hpp" 42 43 #if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT 44 # define GL_TIMESTAMP GL_TIMESTAMP_EXT 45 # define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT 46 # define glGetQueryObjectiv glGetQueryObjectivEXT 47 # define glGetQueryObjectui64v glGetQueryObjectui64vEXT 48 # define glQueryCounter glQueryCounterEXT 49 #endif 50 51 #define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx; 52 #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK 53 # define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK ); 54 # define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK ); 55 # define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK ) 56 # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK ) 57 #else 58 # define TracyGpuNamedZone( varname, name ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) ); 59 # define TracyGpuNamedZoneC( varname, name, color ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__) ); 60 # define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name ) 61 # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color ) 62 #endif 63 #define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect(); 64 65 #ifdef TRACY_HAS_CALLSTACK 66 # define TracyGpuNamedZoneS( varname, name, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth ); 67 # define TracyGpuNamedZoneCS( varname, name, color, depth ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth ); 68 # define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth ) 69 # define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth ) 70 #else 71 # define TracyGpuNamedZoneS( varname, name, depth ) TracyGpuNamedZone( varname, name ) 72 # define TracyGpuNamedZoneCS( varname, name, color, depth ) TracyGpuNamedZoneC( varname, name, color ) 73 # define TracyGpuZoneS( name, depth ) TracyGpuZone( name ) 74 # define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color ) 75 #endif 76 77 namespace tracy 78 { 79 80 class GpuCtx 81 { 82 friend class GpuCtxScope; 83 84 enum { QueryCount = 64 * 1024 }; 85 86 public: 87 GpuCtx() 88 : m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) 89 , m_head( 0 ) 90 , m_tail( 0 ) 91 { 92 assert( m_context != 255 ); 93 94 glGenQueries( QueryCount, m_query ); 95 96 int64_t tgpu; 97 glGetInteger64v( GL_TIMESTAMP, &tgpu ); 98 int64_t tcpu = Profiler::GetTime(); 99 100 GLint bits; 101 glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits ); 102 103 const float period = 1.f; 104 Magic magic; 105 const auto thread = GetThreadHandle(); 106 auto token = GetToken(); 107 auto& tail = token->get_tail_index(); 108 auto item = token->enqueue_begin( magic ); 109 MemWrite( &item->hdr.type, QueueType::GpuNewContext ); 110 MemWrite( &item->gpuNewContext.cpuTime, tcpu ); 111 MemWrite( &item->gpuNewContext.gpuTime, tgpu ); 112 MemWrite( &item->gpuNewContext.thread, thread ); 113 MemWrite( &item->gpuNewContext.period, period ); 114 MemWrite( &item->gpuNewContext.context, m_context ); 115 MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); 116 117 #ifdef TRACY_ON_DEMAND 118 GetProfiler().DeferItem( *item ); 119 #endif 120 121 tail.store( magic + 1, std::memory_order_release ); 122 } 123 124 void Collect() 125 { 126 ZoneScopedC( Color::Red4 ); 127 128 if( m_tail == m_head ) return; 129 130 #ifdef TRACY_ON_DEMAND 131 if( !GetProfiler().IsConnected() ) 132 { 133 m_head = m_tail = 0; 134 return; 135 } 136 #endif 137 138 Magic magic; 139 auto token = GetToken(); 140 auto& tail = token->get_tail_index(); 141 142 while( m_tail != m_head ) 143 { 144 GLint available; 145 glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available ); 146 if( !available ) return; 147 148 uint64_t time; 149 glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time ); 150 151 auto item = token->enqueue_begin( magic ); 152 MemWrite( &item->hdr.type, QueueType::GpuTime ); 153 MemWrite( &item->gpuTime.gpuTime, (int64_t)time ); 154 MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail ); 155 MemWrite( &item->gpuTime.context, m_context ); 156 tail.store( magic + 1, std::memory_order_release ); 157 158 m_tail = ( m_tail + 1 ) % QueryCount; 159 } 160 } 161 162 private: 163 tracy_force_inline unsigned int NextQueryId() 164 { 165 const auto id = m_head; 166 m_head = ( m_head + 1 ) % QueryCount; 167 assert( m_head != m_tail ); 168 return id; 169 } 170 171 tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id ) 172 { 173 return m_query[id]; 174 } 175 176 tracy_force_inline uint8_t GetId() const 177 { 178 return m_context; 179 } 180 181 unsigned int m_query[QueryCount]; 182 uint8_t m_context; 183 184 unsigned int m_head; 185 unsigned int m_tail; 186 }; 187 188 class GpuCtxScope 189 { 190 public: 191 tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc ) 192 #ifdef TRACY_ON_DEMAND 193 : m_active( GetProfiler().IsConnected() ) 194 #endif 195 { 196 #ifdef TRACY_ON_DEMAND 197 if( !m_active ) return; 198 #endif 199 const auto queryId = GetGpuCtx().ptr->NextQueryId(); 200 glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); 201 202 Magic magic; 203 auto token = GetToken(); 204 auto& tail = token->get_tail_index(); 205 auto item = token->enqueue_begin( magic ); 206 MemWrite( &item->hdr.type, QueueType::GpuZoneBegin ); 207 MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); 208 MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); 209 memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); 210 MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); 211 MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); 212 tail.store( magic + 1, std::memory_order_release ); 213 } 214 215 tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth ) 216 #ifdef TRACY_ON_DEMAND 217 : m_active( GetProfiler().IsConnected() ) 218 #endif 219 { 220 #ifdef TRACY_ON_DEMAND 221 if( !m_active ) return; 222 #endif 223 const auto queryId = GetGpuCtx().ptr->NextQueryId(); 224 glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); 225 226 Magic magic; 227 const auto thread = GetThreadHandle(); 228 auto token = GetToken(); 229 auto& tail = token->get_tail_index(); 230 auto item = token->enqueue_begin( magic ); 231 MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstack ); 232 MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); 233 MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); 234 MemWrite( &item->gpuZoneBegin.thread, thread ); 235 MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); 236 MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); 237 tail.store( magic + 1, std::memory_order_release ); 238 239 GetProfiler().SendCallstack( depth ); 240 } 241 242 tracy_force_inline ~GpuCtxScope() 243 { 244 #ifdef TRACY_ON_DEMAND 245 if( !m_active ) return; 246 #endif 247 const auto queryId = GetGpuCtx().ptr->NextQueryId(); 248 glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); 249 250 Magic magic; 251 auto token = GetToken(); 252 auto& tail = token->get_tail_index(); 253 auto item = token->enqueue_begin( magic ); 254 MemWrite( &item->hdr.type, QueueType::GpuZoneEnd ); 255 MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); 256 memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); 257 MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); 258 MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() ); 259 tail.store( magic + 1, std::memory_order_release ); 260 } 261 262 private: 263 #ifdef TRACY_ON_DEMAND 264 const bool m_active; 265 #endif 266 }; 267 268 } 269 270 #endif 271 272 #endif