commit ae25bb2a453ccfe2e25e8df0125bf425b6046a01 parent b29172a2997ae245ee64b06b2747818509289442 Author: Michael Savage <mikejsavage@gmail.com> Date: Mon Dec 26 00:54:04 +0200 New hierarchical profiler Diffstat:
| Makefile | | | 2 | +- | 
| benchmark.cc | | | 63 | --------------------------------------------------------------- | 
| benchmark.h | | | 23 | ----------------------- | 
| game.h | | | 1 | - | 
| profiler.cc | | | 135 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | 
| profiler.h | | | 18 | ++++++++++++++++++ | 
| stats.cc | | | 8 | ++++---- | 
| stats.h | | | 2 | +- | 
| str.h | | | 4 | ++-- | 
diff --git a/Makefile b/Makefile @@ -30,7 +30,7 @@ xxhash.o: CXXFLAGS += -O3 visitors/linear_algebra.h: linear_algebra.h # Common dependencies -COMMON_OBJS := log.o memory_arena.o work_queue.o immediate.o benchmark.o text_renderer.o stb_truetype.o renderer.o strlcpy.o +COMMON_OBJS := log.o memory_arena.o work_queue.o immediate.o profiler.o text_renderer.o stb_truetype.o renderer.o strlcpy.o stats.o # Compiler flags WARNINGS := -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -Wconversion -Wshadow -Wcast-align -Wstrict-overflow -Werror=switch-enum diff --git a/benchmark.cc b/benchmark.cc @@ -1,63 +0,0 @@ -// TODO -#if !defined( _MSC_VER ) -#include <x86intrin.h> -#endif - -#include "intrinsics.h" -#include "benchmark.h" -#include "platform_atomic.h" - -#define MAX_TIMERS 4096 - -struct Timer { - NONCOPYABLE( Timer ); - Timer() { } - - const char * fn; - const char * file; - int line; - // TODO: use Stats - atomic_u64 total_clocks; - atomic_u64 num_calls; -}; - -static Timer timers[ 4096 ]; -static atomic_u32 num_timers( 0 ); - -ScopedTimer::ScopedTimer( u32 idx ) { - initial_clock = __rdtsc(); - timer_idx = idx; -} - -ScopedTimer::~ScopedTimer() { - u64 dt = __rdtsc() - initial_clock; - fetch_add_relaxed( &timers[ timer_idx ].total_clocks, dt ); - fetch_add_relaxed( &timers[ timer_idx ].num_calls, 1 ); -} - -u32 benchmark_new_timer( const char * fn, const char * file, int line ) { - u32 idx = fetch_add_relaxed( &num_timers, 1 ); - assert( idx < MAX_TIMERS ); - - Timer & info = timers[ idx ]; - info.fn = fn; - info.file = file; - info.line = line; - store_relaxed( &info.total_clocks, 0 ); - store_release( &info.num_calls, 0 ); - - return idx; -} - -void benchmark_print_timers() { - u32 n = load_acquire( &num_timers ); - for( u32 i = 0; i < n; i++ ) { - u64 clocks = load_relaxed( &timers[ i ].total_clocks ); - u64 calls = load_relaxed( &timers[ i ].num_calls ); - - printf( "%s (%s:%d) called %lu times, %lu clocks, %lu avg\n", - timers[ i ].fn, timers[ i ].file, timers[ i ].line, - calls, clocks, clocks / calls - ); - } -} diff --git a/benchmark.h b/benchmark.h @@ -1,23 +0,0 @@ -#ifndef _BENCHMARK_H_ -#define _BENCHMARK_H_ - -#include "intrinsics.h" - -struct ScopedTimer { - u64 initial_clock; - u32 timer_idx; - - ScopedTimer( u32 idx ); - ~ScopedTimer(); -}; - -u32 benchmark_new_timer( const char * fn, const char * file, int line ); -void benchmark_print_timers(); - -// TODO: i think this works but double check to make sure it really does. -#define TIMED_BLOCK( fn, file, line ) \ - static u32 benchmark_timer_idx = benchmark_new_timer( fn, file, line ); \ - ScopedTimer benchmark_timer( benchmark_timer_idx ); -#define TIMED_FUNCTION() TIMED_BLOCK( __FUNCTION__, __FILE__, __LINE__ ) - -#endif // _BENCHMARK_H_ diff --git a/game.h b/game.h @@ -5,7 +5,6 @@ #include "intrinsics.h" #include "linear_algebra.h" #include "renderer.h" -#include "benchmark.h" #include "terrain_manager.h" #include "btt.h" #include "gpubtt.h" diff --git a/profiler.cc b/profiler.cc @@ -0,0 +1,135 @@ +#include <stdio.h> +#include <unistd.h> +#include <sys/time.h> + +#include "intrinsics.h" +#include "profiler.h" +#include "stats.h" +#include "str.h" + +static double get_time() { + struct timeval tv; + gettimeofday( &tv, NULL ); + + return ( double ) tv.tv_sec + tv.tv_usec / 1000000.0; +} + +struct Timer { + const char * name; + const char * file; + int line; + Stats stats; +}; + +struct StackTimer { + u32 timer_idx; + double pushed_at; +}; + +struct LogEntry { + str< 256 > message; + double dt; + u32 first_child; + u32 next_sibling; +}; + +#define MAX_TIMERS 4096 +#define TIMER_STACK_SIZE ( U8_MAX + 1 ) +#define TIMER_LOG_SIZE 1024 + +static Timer all_timers[ MAX_TIMERS ]; +static u32 num_timers = 0; + +static StackTimer timer_stack[ TIMER_STACK_SIZE ]; +static u32 timer_stack_top = 0; + +static LogEntry logs[ TIMER_LOG_SIZE ]; +static u32 last_at_depth[ TIMER_LOG_SIZE ]; +static u32 num_log_entries = 1; + +u32 profiler_new_timer( const char * name, const char * file, int line ) { + ASSERT( num_timers < MAX_TIMERS ); + + all_timers[ num_timers ].name = name; + all_timers[ num_timers ].file = file; + all_timers[ num_timers ].line = line; + + u32 ret = num_timers; + num_timers++; + return ret; +} + +void profiler_push( u32 timer_idx ) { + ASSERT( timer_stack_top < TIMER_STACK_SIZE ); + + timer_stack[ timer_stack_top ].timer_idx = timer_idx; + timer_stack[ timer_stack_top ].pushed_at = get_time(); + timer_stack_top++; +} + +static void print_node( u32 idx, double frame_dt ) { + const LogEntry & entry = logs[ idx ]; + if( entry.next_sibling != 0 ) { + print_node( entry.next_sibling, frame_dt ); + } + + printf( "%s (%.2f%%)\n", entry.message.c_str(), entry.dt * 100.0 / frame_dt ); + + if( entry.first_child != 0 ) { + print_node( entry.first_child, frame_dt ); + } +} + +static void print_frame( double frame_dt ) { + print_node( num_log_entries - 1, frame_dt ); + printf( "\n" ); +} + +void profiler_pop() { + ASSERT( timer_stack_top > 0 ); + + double now = get_time(); + + timer_stack_top--; + u32 idx = timer_stack[ timer_stack_top ].timer_idx; + double dt = now - timer_stack[ timer_stack_top ].pushed_at; + + Timer * timer = &all_timers[ idx ]; + stats_record( &timer->stats, dt ); + + ASSERT( num_log_entries < TIMER_LOG_SIZE ); + LogEntry * entry = &logs[ num_log_entries ]; + entry->message.sprintf( "%*s%s (%s:%d) - %.3fms", timer_stack_top * 2, "", timer->name, timer->file, timer->line, dt * 1000.0 ); + entry->dt = dt; + entry->first_child = last_at_depth[ timer_stack_top + 1 ]; + entry->next_sibling = last_at_depth[ timer_stack_top ]; + + last_at_depth[ timer_stack_top ] = num_log_entries; + last_at_depth[ timer_stack_top + 1 ] = 0; + + num_log_entries++; + + if( timer_stack_top == 0 ) { + print_frame( dt ); + num_log_entries = 1; + memset( last_at_depth, 0, sizeof( last_at_depth ) ); + } +} + +void profiler_print_all() { + for( u32 i = 0; i < num_timers; i++ ) { + Timer * timer = &all_timers[ i ]; + + printf( "%s (%s:%d): ", timer->name, timer->file, timer->line ); + stats_print( &timer->stats ); + printf( "\n" ); + } +} + +ScopedTimer::ScopedTimer( u32 timer_idx ) { + profiler_push( timer_idx ); +} + +ScopedTimer::~ScopedTimer() { + profiler_pop(); +} diff --git a/profiler.h b/profiler.h @@ -0,0 +1,18 @@ +#pragma once + +#include "intrinsics.h" + +u32 profiler_new_timer( const char * name, const char * file, int line ); +void profiler_push( u32 timer_idx ); +void profiler_pop(); +void profiler_print_all(); + +struct ScopedTimer { + ScopedTimer( u32 timer_idx ); + ~ScopedTimer(); +}; + +#define PROFILE_BLOCK( name ) \ + static u32 CONCAT( scoped_timer_idx, __LINE__ ) = profiler_new_timer( name, __FILE__, __LINE__ ); \ + ScopedTimer CONCAT( scoped_timer, __LINE__ )( CONCAT( scoped_timer_idx, __LINE__ ) ); +#define PROFILE_FUNCTION() PROFILE_BLOCK( __FUNCTION__ ) diff --git a/stats.cc b/stats.cc @@ -48,14 +48,14 @@ void stats_record( Stats * stats, double x ) { stats->num_records++; } -void stats_print( Stats * stats, const char * name ) { +void stats_print( Stats * stats ) { u32 num_samples = min( stats->num_records, ( u64 ) STATS_NUM_QUART_SAMPLES ); std::sort( stats->samples, stats->samples + num_samples ); - printf( "[%s]\n\tn: %llu\n\tmean: %f\n\tstddev: %f\n\trange: %f to %f\n\t25%%: %f 75%%: %f", - name, stats->num_records, + printf( "n = %llu, mean = %f, stddev = %f, range = %f to %f, 25%% = %f, 75%% = %f", + stats->num_records, stats_mean( stats ), stats_stddev( stats ), stats->min, stats->max, - local_samples[ num_samples / 4 ], local_samples[ num_samples * 3 / 4 ] + stats->samples[ num_samples / 4 ], stats->samples[ num_samples * 3 / 4 ] ); } diff --git a/stats.h b/stats.h @@ -22,6 +22,6 @@ void stats_init( Stats * stats ); double stats_mean( const Stats * stats ); double stats_stddev( const Stats * stats ); void stats_record( Stats * stats, double x ); -void stats_print( Stats * stats, const char * name = "stats" ); +void stats_print( Stats * stats ); #endif // _STATS_H_ diff --git a/str.h b/str.h @@ -85,11 +85,11 @@ public: va_end( argp ); } - const char * c_str() { + const char * c_str() const { return buf; } - size_t len() { + size_t len() const { return length; }
