commit 556320b2bed32f2e01328c1cfe7e030b1be75e98 parent 238cbe8642a3c4e04dfa3ebb261312b97218f7c8 Author: Michael Savage <mikejsavage@gmail.com> Date: Fri Mar 4 23:23:10 +0000 Add atomic_* types Diffstat:
benchmark.cc | | | 16 | ++++++++-------- |
platform_atomic.h | | | 73 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
unix_atomic.h | | | 83 | +++++++++++++++++++++++++++++++++++++++++++++++-------------------------------- |
work_queue.cc | | | 39 | +++++++++++++++++++++------------------ |
work_queue.h | | | 9 | +++++---- |
diff --git a/benchmark.cc b/benchmark.cc @@ -12,12 +12,12 @@ struct Timer { const char * file; int line; // TODO: use Stats - volatile u64 total_clocks; - volatile u64 num_calls; + atomic_u64 total_clocks; + atomic_u64 num_calls; }; static Timer timers[ 4096 ]; -static volatile u32 num_timers = 0; +static atomic_u32 num_timers = { 0 }; ScopedTimer::ScopedTimer( u32 idx ) { initial_clock = __rdtsc(); @@ -38,8 +38,8 @@ u32 benchmark_new_timer( const char * fn, const char * file, int line ) { info.fn = fn; info.file = file; info.line = line; - info.total_clocks = 0; - info.num_calls = 0; + atomic_set_u64( &info.total_clocks, 0 ); + atomic_set_u64( &info.num_calls, 0 ); write_barrier(); @@ -49,9 +49,9 @@ u32 benchmark_new_timer( const char * fn, const char * file, int line ) { } void benchmark_print_timers() { - for( u64 i = 0; i < num_timers; i++ ) { - u64 clocks = timers[ i ].total_clocks; - u64 calls = timers[ i ].num_calls; + for( u64 i = 0; i < atomic_get_u32( &num_timers ); i++ ) { + u64 clocks = atomic_get( &timers[ i ].total_clocks ); + u64 calls = atomic_get( &timers[ i ].num_calls ); printf( "%s (%s:%d) called %lu times, %lu clocks, %lu avg\n", timers[ i ].fn, timers[ i ].file, timers[ i ].line, diff --git a/platform_atomic.h b/platform_atomic.h @@ -1,8 +1,81 @@ #ifndef _PLATFORM_ATOMIC_H_ #define _PLATFORM_ATOMIC_H_ +#include "intrinsics.h" + +#define ATOMIC_STRUCT_DEF( T ) \ + struct atomic_##T { \ + volatile T v; \ + } + +#define ATOMIC_FUNCTION_DEF( T ) \ + inline T atomic_add( atomic_##T * atom, T x ) { \ + return atomic_add_##T( atom, x ); \ + } \ + inline T atomic_sub( atomic_##T * atom, T x ) { \ + return atomic_sub_##T( atom, x ); \ + } \ + inline T atomic_swap( atomic_##T * atom, T newval ) { \ + return atomic_swap_##T( atom, newval ); \ + } \ + inline bool atomic_cas( atomic_##T * atom, T oldval, T newval ) { \ + return atomic_cas_##T( atom, oldval, newval ); \ + } \ + inline T atomic_get( atomic_##T * atom ) { \ + return atomic_get_##T( atom ); \ + } \ + inline void atomic_set( atomic_##T * atom, T x ) { \ + atomic_set_##T( atom, x ); \ + } + +ATOMIC_STRUCT_DEF( s8 ); +ATOMIC_STRUCT_DEF( s16 ); +ATOMIC_STRUCT_DEF( s32 ); +ATOMIC_STRUCT_DEF( s64 ); +ATOMIC_STRUCT_DEF( u8 ); +ATOMIC_STRUCT_DEF( u16 ); +ATOMIC_STRUCT_DEF( u32 ); +ATOMIC_STRUCT_DEF( u64 ); + +template< typename T > +struct atomic_ptr { + T * volatile v; +}; + #if defined( __linux__ ) || defined( __APPLE__ ) #include "unix_atomic.h" #endif +ATOMIC_FUNCTION_DEF( s8 ); +ATOMIC_FUNCTION_DEF( s16 ); +ATOMIC_FUNCTION_DEF( s32 ); +ATOMIC_FUNCTION_DEF( s64 ); +ATOMIC_FUNCTION_DEF( u8 ); +ATOMIC_FUNCTION_DEF( u16 ); +ATOMIC_FUNCTION_DEF( u32 ); +ATOMIC_FUNCTION_DEF( u64 ); + +template< typename T > +inline T atomic_swap( atomic_ptr< T > * atom, T * x ) { + return atomic_swap_ptr( atom, x ); +} + +template< typename T > +inline bool atomic_cas( atomic_ptr< T > * atom, T * oldval, T * newval ) { + return atomic_cas_ptr( atom, oldval, newval ); +} + +template< typename T > +inline T atomic_get( atomic_ptr< T > * atom ) { + return atomic_get_ptr( atom ); +} + +template< typename T > +inline void atomic_set( atomic_ptr< T > * atom, T * x ) { + atomic_set_ptr( atom, x ); +} + +#undef ATOMIC_STRUCT_DEF +#undef ATOMIC_FUNCTION_DEF + #endif // _PLATFORM_ATOMIC_H_ diff --git a/unix_atomic.h b/unix_atomic.h @@ -6,40 +6,55 @@ #define read_barrier() asm volatile ( "" ::: "memory" ) #define write_barrier() asm volatile ( "" ::: "memory" ) -inline u8 atomic_add_u8( volatile u8 * dest, u8 i ) { - return __sync_add_and_fetch( dest, i ); -} - -inline u16 atomic_add_u16( volatile u16 * dest, u16 i ) { - return __sync_add_and_fetch( dest, i ); -} - -inline u32 atomic_add_u32( volatile u32 * dest, u32 i ) { - return __sync_add_and_fetch( dest, i ); -} - -inline u64 atomic_add_u64( volatile u64 * dest, u64 i ) { - return __sync_add_and_fetch( dest, i ); -} - -inline bool atomic_cas_u8( volatile u8 * dest, u8 oldval, u8 newval ) { - return __sync_bool_compare_and_swap( dest, oldval, newval ); -} - -inline bool atomic_cas_u16( volatile u16 * dest, u16 oldval, u16 newval ) { - return __sync_bool_compare_and_swap( dest, oldval, newval ); -} - -inline bool atomic_cas_u32( volatile u32 * dest, u32 oldval, u32 newval ) { - return __sync_bool_compare_and_swap( dest, oldval, newval ); -} - -inline bool atomic_cas_u64( volatile u64 * dest, u64 oldval, u64 newval ) { - return __sync_bool_compare_and_swap( dest, oldval, newval ); -} - -inline bool atomic_cas_pointer( volatile void ** dest, void * oldval, void * newval ) { - return __sync_bool_compare_and_swap( dest, oldval, newval ); +#define ATOMIC_DEFS( T ) \ + inline T atomic_add_##T( atomic_##T * atom, T x ) { \ + return __sync_add_and_fetch( &atom->v, x ); \ + } \ + inline T atomic_sub_##T( atomic_##T * atom, T x ) { \ + return __sync_sub_and_fetch( &atom->v, x ); \ + } \ + inline T atomic_swap_##T( atomic_##T * atom, T x ) { \ + return __sync_lock_test_and_set( &atom->v, x ); \ + } \ + inline T atomic_cas_##T( atomic_##T * atom, T oldval, T newval ) { \ + return __sync_bool_compare_and_swap( &atom->v, oldval, newval ); \ + } \ + inline T atomic_get_##T( atomic_##T * atom ) { \ + return atom->v; \ + } \ + inline void atomic_set_##T( atomic_##T * atom, T x ) { \ + atom->v = x; \ + } + +ATOMIC_DEFS( s8 ); +ATOMIC_DEFS( s16 ); +ATOMIC_DEFS( s32 ); +ATOMIC_DEFS( s64 ); +ATOMIC_DEFS( u8 ); +ATOMIC_DEFS( u16 ); +ATOMIC_DEFS( u32 ); +ATOMIC_DEFS( u64 ); + +#undef ATOMIC_DEFS + +template< typename T > +inline bool atomic_swap_pointer( atomic_ptr< T > * atom, T * x ) { + return __sync_lock_test_and_set( &atom->v, x ); +} + +template< typename T > +inline bool atomic_cas_pointer( atomic_ptr< T > * atom, T * oldval, T * newval ) { + return __sync_lock_compare_and_swap( &atom->v, oldval, newval ); +} + +template< typename T > +inline T atomic_get_ptr( atomic_ptr< T > * atom ) { + return atom->v; +} + +template< typename T > +inline void atomic_set_ptr( atomic_ptr< T > * atom, T * x ) { + atom->v = x; } #endif // _UNIX_ATOMIC_H_ diff --git a/work_queue.cc b/work_queue.cc @@ -7,14 +7,14 @@ struct ThreadInfo { u32 thread_id; WorkQueue * queue; - volatile u32 * started_threads; + atomic_u32 * started_threads; }; -static bool workqueue_step( const u32 thread_id, WorkQueue * const queue ) { - const u16 current_head = queue->head; - const u16 new_head = ( current_head + 1 ) % array_count( queue->jobs ); +static bool workqueue_step( u32 thread_id, WorkQueue * queue ) { + u16 current_head = atomic_get_u16( &queue->head ); + u16 new_head = ( current_head + 1 ) % array_count( queue->jobs ); - if( current_head != queue->tail ) { + if( current_head != atomic_get_u16( &queue->tail ) ) { if( atomic_cas_u16( &queue->head, current_head, new_head ) ) { const Job & job = queue->jobs[ current_head ]; job.callback( job.data, &queue->arenas[ thread_id ] ); @@ -29,10 +29,10 @@ static bool workqueue_step( const u32 thread_id, WorkQueue * const queue ) { } static THREAD( workqueue_worker ) { - ThreadInfo * const info = ( ThreadInfo * const ) data; + ThreadInfo * info = ( ThreadInfo * ) data; - WorkQueue * const queue = info->queue; - const u32 thread_id = info->thread_id; + WorkQueue * queue = info->queue; + u32 thread_id = info->thread_id; write_barrier(); atomic_add_u32( info->started_threads, 1 ); @@ -46,7 +46,7 @@ static THREAD( workqueue_worker ) { THREAD_END; } -void workqueue_init( WorkQueue * const queue, MemoryArena * const arena, const u32 num_threads ) { +void workqueue_init( WorkQueue * queue, MemoryArena * arena, u32 num_threads ) { *queue = { }; semaphore_init( &queue->sem ); @@ -60,7 +60,8 @@ void workqueue_init( WorkQueue * const queue, MemoryArena * const arena, const u MEMARENA_SCOPED_CHECKPOINT( arena ); ThreadInfo * infos = memarena_push_many( arena, ThreadInfo, num_threads ); - volatile u32 started_threads = 0; + atomic_u32 started_threads; + atomic_set_u32( &started_threads, 0 ); for( u32 i = 0; i < num_threads; i++ ) { infos[ i ] = { i, queue, &started_threads }; @@ -70,28 +71,30 @@ void workqueue_init( WorkQueue * const queue, MemoryArena * const arena, const u } // wait until all threads have a local copy of ThreadInfo - while( started_threads < num_threads ); + while( atomic_get_u32( &started_threads ) < num_threads ); } -void workqueue_enqueue( WorkQueue * const queue, WorkQueueCallback * const callback, void * const data ) { +void workqueue_enqueue( WorkQueue * queue, WorkQueueCallback * callback, void * data ) { assert( queue->jobs_queued < array_count( queue->jobs ) ); - const Job job = { callback, data }; + Job job = { callback, data }; - queue->jobs[ queue->tail ] = job; + u16 tail = atomic_get_u16( &queue->tail ); + + queue->jobs[ tail ] = job; queue->jobs_queued++; write_barrier(); - queue->tail = ( queue->tail + 1 ) % array_count( queue->jobs ); + atomic_set( &queue->tail, ( tail + 1 ) % array_count( queue->jobs ) ); semaphore_signal( &queue->sem ); } -void workqueue_exhaust( WorkQueue * const queue ) { - while( queue->jobs_completed < queue->jobs_queued ) { +void workqueue_exhaust( WorkQueue * queue ) { + while( atomic_get( &queue->jobs_completed ) < queue->jobs_queued ) { workqueue_step( queue->num_threads, queue ); } queue->jobs_queued = 0; - queue->jobs_completed = 0; + atomic_set_u16( &queue->jobs_completed, 0 ); } diff --git a/work_queue.h b/work_queue.h @@ -2,6 +2,7 @@ #define _WORK_QUEUE_H_ #include "intrinsics.h" +#include "platform_atomic.h" #include "platform_semaphore.h" #include "memory_arena.h" @@ -19,11 +20,11 @@ struct WorkQueue { Semaphore sem; // using head/length means we need to an atomic pair which is a pain - volatile u16 head; - volatile u16 tail; + atomic_u16 head; + atomic_u16 tail; - volatile u16 jobs_queued; - volatile u16 jobs_completed; + u16 jobs_queued; + atomic_u16 jobs_completed; u32 num_threads; MemoryArena * arenas;