Add atomic_* types - medfall - Unnamed repository; edit this file 'description' to name the repository.

commit 556320b2bed32f2e01328c1cfe7e030b1be75e98
parent 238cbe8642a3c4e04dfa3ebb261312b97218f7c8
Author: Michael Savage <mikejsavage@gmail.com>
Date:   Fri Mar  4 23:23:10 +0000

Add atomic_* types

Diffstat:
benchmark.cc  | 16 ++++++++--------
platform_atomic.h  | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
unix_atomic.h  | 83 +++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
work_queue.cc  | 39 +++++++++++++++++++++------------------
work_queue.h  | 9 +++++----
5 files changed, 156 insertions(+), 64 deletions(-)
diff --git a/benchmark.cc b/benchmark.cc
@@ -12,12 +12,12 @@ struct Timer {
 	const char * file;
 	int line;
 	// TODO: use Stats
-	volatile u64 total_clocks;
-	volatile u64 num_calls;
+	atomic_u64 total_clocks;
+	atomic_u64 num_calls;
 };
 
 static Timer timers[ 4096 ];
-static volatile u32 num_timers = 0;
+static atomic_u32 num_timers = { 0 };
 
 ScopedTimer::ScopedTimer( u32 idx ) {
 	initial_clock = __rdtsc();
@@ -38,8 +38,8 @@ u32 benchmark_new_timer( const char * fn, const char * file, int line ) {
 	info.fn = fn;
 	info.file = file;
 	info.line = line;
-	info.total_clocks = 0;
-	info.num_calls = 0;
+	atomic_set_u64( &info.total_clocks, 0 );
+	atomic_set_u64( &info.num_calls, 0 );
 
 	write_barrier();
 
@@ -49,9 +49,9 @@ u32 benchmark_new_timer( const char * fn, const char * file, int line ) {
 }
 
 void benchmark_print_timers() {
-	for( u64 i = 0; i < num_timers; i++ ) {
-		u64 clocks = timers[ i ].total_clocks;
-		u64 calls = timers[ i ].num_calls;
+	for( u64 i = 0; i < atomic_get_u32( &num_timers ); i++ ) {
+		u64 clocks = atomic_get( &timers[ i ].total_clocks );
+		u64 calls = atomic_get( &timers[ i ].num_calls );
 
 		printf( "%s (%s:%d) called %lu times, %lu clocks, %lu avg\n",
 			timers[ i ].fn, timers[ i ].file, timers[ i ].line,
diff --git a/platform_atomic.h b/platform_atomic.h
@@ -1,8 +1,81 @@
 #ifndef _PLATFORM_ATOMIC_H_
 #define _PLATFORM_ATOMIC_H_
 
+#include "intrinsics.h"
+
+#define ATOMIC_STRUCT_DEF( T ) \
+	struct atomic_##T { \
+		volatile T v; \
+	}
+
+#define ATOMIC_FUNCTION_DEF( T ) \
+	inline T atomic_add( atomic_##T * atom, T x ) { \
+		return atomic_add_##T( atom, x ); \
+	} \
+	inline T atomic_sub( atomic_##T * atom, T x ) { \
+		return atomic_sub_##T( atom, x ); \
+	} \
+	inline T atomic_swap( atomic_##T * atom, T newval ) { \
+		return atomic_swap_##T( atom, newval ); \
+	} \
+	inline bool atomic_cas( atomic_##T * atom, T oldval, T newval ) { \
+		return atomic_cas_##T( atom, oldval, newval ); \
+	} \
+	inline T atomic_get( atomic_##T * atom ) { \
+		return atomic_get_##T( atom ); \
+	} \
+	inline void atomic_set( atomic_##T * atom, T x ) { \
+		atomic_set_##T( atom, x ); \
+	}
+
+ATOMIC_STRUCT_DEF( s8 );
+ATOMIC_STRUCT_DEF( s16 );
+ATOMIC_STRUCT_DEF( s32 );
+ATOMIC_STRUCT_DEF( s64 );
+ATOMIC_STRUCT_DEF( u8 );
+ATOMIC_STRUCT_DEF( u16 );
+ATOMIC_STRUCT_DEF( u32 );
+ATOMIC_STRUCT_DEF( u64 );
+
+template< typename T >
+struct atomic_ptr {
+	T * volatile v;
+};
+
 #if defined( __linux__ ) || defined( __APPLE__ )
 #include "unix_atomic.h"
 #endif
 
+ATOMIC_FUNCTION_DEF( s8 );
+ATOMIC_FUNCTION_DEF( s16 );
+ATOMIC_FUNCTION_DEF( s32 );
+ATOMIC_FUNCTION_DEF( s64 );
+ATOMIC_FUNCTION_DEF( u8 );
+ATOMIC_FUNCTION_DEF( u16 );
+ATOMIC_FUNCTION_DEF( u32 );
+ATOMIC_FUNCTION_DEF( u64 );
+
+template< typename T >
+inline T atomic_swap( atomic_ptr< T > * atom, T * x ) {
+	return atomic_swap_ptr( atom, x );
+}
+
+template< typename T >
+inline bool atomic_cas( atomic_ptr< T > * atom, T * oldval, T * newval ) {
+	return atomic_cas_ptr( atom, oldval, newval );
+}
+
+template< typename T >
+inline T atomic_get( atomic_ptr< T > * atom ) {
+	return atomic_get_ptr( atom );
+}
+
+template< typename T >
+inline void atomic_set( atomic_ptr< T > * atom, T * x ) {
+	atomic_set_ptr( atom, x );
+}
+
+#undef ATOMIC_STRUCT_DEF
+#undef ATOMIC_FUNCTION_DEF
+
 #endif // _PLATFORM_ATOMIC_H_
diff --git a/unix_atomic.h b/unix_atomic.h
@@ -6,40 +6,55 @@
 #define read_barrier() asm volatile ( "" ::: "memory" )
 #define write_barrier() asm volatile ( "" ::: "memory" )
 
-inline u8 atomic_add_u8( volatile u8 * dest, u8 i ) {
-	return __sync_add_and_fetch( dest, i );
-}
-
-inline u16 atomic_add_u16( volatile u16 * dest, u16 i ) {
-	return __sync_add_and_fetch( dest, i );
-}
-
-inline u32 atomic_add_u32( volatile u32 * dest, u32 i ) {
-	return __sync_add_and_fetch( dest, i );
-}
-
-inline u64 atomic_add_u64( volatile u64 * dest, u64 i ) {
-	return __sync_add_and_fetch( dest, i );
-}
-
-inline bool atomic_cas_u8( volatile u8 * dest, u8 oldval, u8 newval ) {
-	return __sync_bool_compare_and_swap( dest, oldval, newval );
-}
-
-inline bool atomic_cas_u16( volatile u16 * dest, u16 oldval, u16 newval ) {
-	return __sync_bool_compare_and_swap( dest, oldval, newval );
-}
-
-inline bool atomic_cas_u32( volatile u32 * dest, u32 oldval, u32 newval ) {
-	return __sync_bool_compare_and_swap( dest, oldval, newval );
-}
-
-inline bool atomic_cas_u64( volatile u64 * dest, u64 oldval, u64 newval ) {
-	return __sync_bool_compare_and_swap( dest, oldval, newval );
-}
-
-inline bool atomic_cas_pointer( volatile void ** dest, void * oldval, void * newval ) {
-	return __sync_bool_compare_and_swap( dest, oldval, newval );
+#define ATOMIC_DEFS( T ) \
+	inline T atomic_add_##T( atomic_##T * atom, T x ) { \
+		return __sync_add_and_fetch( &atom->v, x ); \
+	} \
+	inline T atomic_sub_##T( atomic_##T * atom, T x ) { \
+		return __sync_sub_and_fetch( &atom->v, x ); \
+	} \
+	inline T atomic_swap_##T( atomic_##T * atom, T x ) { \
+		return __sync_lock_test_and_set( &atom->v, x ); \
+	} \
+	inline T atomic_cas_##T( atomic_##T * atom, T oldval, T newval ) { \
+		return __sync_bool_compare_and_swap( &atom->v, oldval, newval ); \
+	} \
+	inline T atomic_get_##T( atomic_##T * atom ) { \
+		return atom->v; \
+	} \
+	inline void atomic_set_##T( atomic_##T * atom, T x ) { \
+		atom->v = x; \
+	}
+
+ATOMIC_DEFS( s8 );
+ATOMIC_DEFS( s16 );
+ATOMIC_DEFS( s32 );
+ATOMIC_DEFS( s64 );
+ATOMIC_DEFS( u8 );
+ATOMIC_DEFS( u16 );
+ATOMIC_DEFS( u32 );
+ATOMIC_DEFS( u64 );
+
+#undef ATOMIC_DEFS
+
+template< typename T >
+inline bool atomic_swap_pointer( atomic_ptr< T > * atom, T * x ) {
+	return __sync_lock_test_and_set( &atom->v, x );
+}
+
+template< typename T >
+inline bool atomic_cas_pointer( atomic_ptr< T > * atom, T * oldval, T * newval ) {
+	return __sync_lock_compare_and_swap( &atom->v, oldval, newval );
+}
+
+template< typename T >
+inline T atomic_get_ptr( atomic_ptr< T > * atom ) {
+	return atom->v;
+}
+
+template< typename T >
+inline void atomic_set_ptr( atomic_ptr< T > * atom, T * x ) {
+	atom->v = x;
 }
 
 #endif // _UNIX_ATOMIC_H_
diff --git a/work_queue.cc b/work_queue.cc
@@ -7,14 +7,14 @@
 struct ThreadInfo {
 	u32 thread_id;
 	WorkQueue * queue;
-	volatile u32 * started_threads;
+	atomic_u32 * started_threads;
 };
 
-static bool workqueue_step( const u32 thread_id, WorkQueue * const queue ) {
-	const u16 current_head = queue->head;
-	const u16 new_head = ( current_head + 1 ) % array_count( queue->jobs );
+static bool workqueue_step( u32 thread_id, WorkQueue * queue ) {
+	u16 current_head = atomic_get_u16( &queue->head );
+	u16 new_head = ( current_head + 1 ) % array_count( queue->jobs );
 
-	if( current_head != queue->tail ) {
+	if( current_head != atomic_get_u16( &queue->tail ) ) {
 		if( atomic_cas_u16( &queue->head, current_head, new_head ) ) {
 			const Job & job = queue->jobs[ current_head ];
 			job.callback( job.data, &queue->arenas[ thread_id ] );
@@ -29,10 +29,10 @@ static bool workqueue_step( const u32 thread_id, WorkQueue * const queue ) {
 }
 
 static THREAD( workqueue_worker ) {
-	ThreadInfo * const info = ( ThreadInfo * const ) data;
+	ThreadInfo * info = ( ThreadInfo * ) data;
 
-	WorkQueue * const queue = info->queue;
-	const u32 thread_id = info->thread_id;
+	WorkQueue * queue = info->queue;
+	u32 thread_id = info->thread_id;
 
 	write_barrier();
 	atomic_add_u32( info->started_threads, 1 );
@@ -46,7 +46,7 @@ static THREAD( workqueue_worker ) {
 	THREAD_END;
 }
 
-void workqueue_init( WorkQueue * const queue, MemoryArena * const arena, const u32 num_threads ) {
+void workqueue_init( WorkQueue * queue, MemoryArena * arena, u32 num_threads ) {
 	*queue = { };
 	semaphore_init( &queue->sem );
 
@@ -60,7 +60,8 @@ void workqueue_init( WorkQueue * const queue, MemoryArena * const arena, const u
 	MEMARENA_SCOPED_CHECKPOINT( arena );
 
 	ThreadInfo * infos = memarena_push_many( arena, ThreadInfo, num_threads );
-	volatile u32 started_threads = 0;
+	atomic_u32 started_threads;
+	atomic_set_u32( &started_threads, 0 );
 
 	for( u32 i = 0; i < num_threads; i++ ) {
 		infos[ i ] = { i, queue, &started_threads };
@@ -70,28 +71,30 @@ void workqueue_init( WorkQueue * const queue, MemoryArena * const arena, const u
 	}
 
 	// wait until all threads have a local copy of ThreadInfo
-	while( started_threads < num_threads );
+	while( atomic_get_u32( &started_threads ) < num_threads );
 }
 
-void workqueue_enqueue( WorkQueue * const queue, WorkQueueCallback * const callback, void * const data ) {
+void workqueue_enqueue( WorkQueue * queue, WorkQueueCallback * callback, void * data ) {
 	assert( queue->jobs_queued < array_count( queue->jobs ) );
 
-	const Job job = { callback, data };
+	Job job = { callback, data };
 
-	queue->jobs[ queue->tail ] = job;
+	u16 tail = atomic_get_u16( &queue->tail );
+
+	queue->jobs[ tail ] = job;
 	queue->jobs_queued++;
 
 	write_barrier();
-	queue->tail = ( queue->tail + 1 ) % array_count( queue->jobs );
+	atomic_set( &queue->tail, ( tail + 1 ) % array_count( queue->jobs ) );
 
 	semaphore_signal( &queue->sem );
 }
 
-void workqueue_exhaust( WorkQueue * const queue ) {
-	while( queue->jobs_completed < queue->jobs_queued ) {
+void workqueue_exhaust( WorkQueue * queue ) {
+	while( atomic_get( &queue->jobs_completed ) < queue->jobs_queued ) {
 		workqueue_step( queue->num_threads, queue );
 	}
 
 	queue->jobs_queued = 0;
-	queue->jobs_completed = 0;
+	atomic_set_u16( &queue->jobs_completed, 0 );
 }
diff --git a/work_queue.h b/work_queue.h
@@ -2,6 +2,7 @@
 #define _WORK_QUEUE_H_
 
 #include "intrinsics.h"
+#include "platform_atomic.h"
 #include "platform_semaphore.h"
 #include "memory_arena.h"
 
@@ -19,11 +20,11 @@ struct WorkQueue {
 	Semaphore sem;
 
 	// using head/length means we need to an atomic pair which is a pain
-	volatile u16 head;
-	volatile u16 tail;
+	atomic_u16 head;
+	atomic_u16 tail;
 
-	volatile u16 jobs_queued;
-	volatile u16 jobs_completed;
+	u16 jobs_queued;
+	atomic_u16 jobs_completed;
 
 	u32 num_threads;
 	MemoryArena * arenas;
	medfall Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs
benchmark.cc	\|	16	++++++++--------
platform_atomic.h	\|	73	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
unix_atomic.h	\|	83	+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
work_queue.cc	\|	39	+++++++++++++++++++++------------------
work_queue.h	\|	9	+++++----