mudgangster

Tiny, scriptable MUD client
Log | Files | Refs | README

tracy_rpmalloc.cpp (70419B)


      1 #ifdef TRACY_ENABLE
      2 
      3 /* rpmalloc.c  -  Memory allocator  -  Public Domain  -  2016 Mattias Jansson / Rampant Pixels
      4  *
      5  * This library provides a cross-platform lock free thread caching malloc implementation in C11.
      6  * The latest source code is always available at
      7  *
      8  * https://github.com/rampantpixels/rpmalloc
      9  *
     10  * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
     11  *
     12  */
     13 
     14 #include "tracy_rpmalloc.hpp"
     15 
     16 /// Build time configurable limits
     17 #ifndef HEAP_ARRAY_SIZE
     18 //! Size of heap hashmap
     19 #define HEAP_ARRAY_SIZE           79
     20 #endif
     21 #ifndef ENABLE_THREAD_CACHE
     22 //! Enable per-thread cache
     23 #define ENABLE_THREAD_CACHE       1
     24 #endif
     25 #ifndef ENABLE_GLOBAL_CACHE
     26 //! Enable global cache shared between all threads, requires thread cache
     27 #define ENABLE_GLOBAL_CACHE       1
     28 #endif
     29 #ifndef ENABLE_VALIDATE_ARGS
     30 //! Enable validation of args to public entry points
     31 #define ENABLE_VALIDATE_ARGS      0
     32 #endif
     33 #ifndef ENABLE_STATISTICS
     34 //! Enable statistics collection
     35 #define ENABLE_STATISTICS         0
     36 #endif
     37 #ifndef ENABLE_ASSERTS
     38 //! Enable asserts
     39 #define ENABLE_ASSERTS            0
     40 #endif
     41 #ifndef ENABLE_PRELOAD
     42 //! Support preloading
     43 #define ENABLE_PRELOAD            0
     44 #endif
     45 #ifndef ENABLE_GUARDS
     46 //! Enable overwrite/underwrite guards
     47 #define ENABLE_GUARDS             0
     48 #endif
     49 #ifndef ENABLE_UNLIMITED_CACHE
     50 //! Unlimited cache disables any cache limitations
     51 #define ENABLE_UNLIMITED_CACHE    0
     52 #endif
     53 #ifndef DEFAULT_SPAN_MAP_COUNT
     54 //! Default number of spans to map in call to map more virtual memory
     55 #define DEFAULT_SPAN_MAP_COUNT    16
     56 #endif
     57 //! Minimum cache size to remain after a release to global cache
     58 #define MIN_SPAN_CACHE_SIZE 64
     59 //! Minimum number of spans to transfer between thread and global cache
     60 #define MIN_SPAN_CACHE_RELEASE 16
     61 //! Maximum cache size divisor (max cache size will be max allocation count divided by this divisor)
     62 #define MAX_SPAN_CACHE_DIVISOR 4
     63 //! Minimum cache size to remain after a release to global cache, large spans
     64 #define MIN_LARGE_SPAN_CACHE_SIZE 8
     65 //! Minimum number of spans to transfer between thread and global cache, large spans
     66 #define MIN_LARGE_SPAN_CACHE_RELEASE 4
     67 //! Maximum cache size divisor, large spans (max cache size will be max allocation count divided by this divisor)
     68 #define MAX_LARGE_SPAN_CACHE_DIVISOR 16
     69 //! Multiplier for global span cache limit (max cache size will be calculated like thread cache and multiplied with this)
     70 #define MAX_GLOBAL_CACHE_MULTIPLIER 8
     71 
     72 #if !ENABLE_THREAD_CACHE
     73 #  undef ENABLE_GLOBAL_CACHE
     74 #  define ENABLE_GLOBAL_CACHE 0
     75 #  undef MIN_SPAN_CACHE_SIZE
     76 #  undef MIN_SPAN_CACHE_RELEASE
     77 #  undef MAX_SPAN_CACHE_DIVISOR
     78 #  undef MIN_LARGE_SPAN_CACHE_SIZE
     79 #  undef MIN_LARGE_SPAN_CACHE_RELEASE
     80 #  undef MAX_LARGE_SPAN_CACHE_DIVISOR
     81 #endif
     82 #if !ENABLE_GLOBAL_CACHE
     83 #  undef MAX_GLOBAL_CACHE_MULTIPLIER
     84 #endif
     85 
     86 /// Platform and arch specifics
     87 #ifdef _MSC_VER
     88 #  pragma warning( push )
     89 #  pragma warning( disable : 4324 )
     90 #  define ALIGNED_STRUCT(name, alignment) __declspec(align(alignment)) struct name
     91 #  define FORCEINLINE __forceinline
     92 #  define atomic_thread_fence_acquire() //_ReadWriteBarrier()
     93 #  define atomic_thread_fence_release() //_ReadWriteBarrier()
     94 #  if ENABLE_VALIDATE_ARGS
     95 #    include <Intsafe.h>
     96 #  endif
     97 #  include <intrin.h>
     98 #else
     99 #  include <unistd.h>
    100 #  if defined(__APPLE__) && ENABLE_PRELOAD
    101 #    include <pthread.h>
    102 #  endif
    103 #  define ALIGNED_STRUCT(name, alignment) struct __attribute__((__aligned__(alignment))) name
    104 #  ifdef FORCEINLINE
    105 #    undef FORCEINLINE
    106 #  endif
    107 #  define FORCEINLINE inline __attribute__((__always_inline__))
    108 #  ifdef __arm__
    109 #    define atomic_thread_fence_acquire() __asm volatile("dmb ish" ::: "memory")
    110 #    define atomic_thread_fence_release() __asm volatile("dmb ishst" ::: "memory")
    111 #  else
    112 #    define atomic_thread_fence_acquire() //__asm volatile("" ::: "memory")
    113 #    define atomic_thread_fence_release() //__asm volatile("" ::: "memory")
    114 #  endif
    115 #endif
    116 
    117 #if defined( __x86_64__ ) || defined( _M_AMD64 ) || defined( _M_X64 ) || defined( _AMD64_ ) || defined( __arm64__ ) || defined( __aarch64__ )
    118 #  define ARCH_64BIT 1
    119 #else
    120 #  define ARCH_64BIT 0
    121 #endif
    122 
    123 #if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 )
    124 #  define PLATFORM_WINDOWS 1
    125 #  define PLATFORM_POSIX 0
    126 #else
    127 #  define PLATFORM_WINDOWS 0
    128 #  define PLATFORM_POSIX 1
    129 #endif
    130 
    131 #include <stdint.h>
    132 #include <string.h>
    133 
    134 #include <assert.h>
    135 
    136 #if ENABLE_GUARDS
    137 #  define MAGIC_GUARD 0xDEADBAAD
    138 #endif
    139 
    140 namespace tracy
    141 {
    142 
    143 /// Atomic access abstraction
    144 ALIGNED_STRUCT(atomic32_t, 4) {
    145 	volatile int32_t nonatomic;
    146 };
    147 typedef struct atomic32_t atomic32_t;
    148 
    149 ALIGNED_STRUCT(atomic64_t, 8) {
    150 	volatile int64_t nonatomic;
    151 };
    152 typedef struct atomic64_t atomic64_t;
    153 
    154 ALIGNED_STRUCT(atomicptr_t, 8) {
    155 	volatile void* nonatomic;
    156 };
    157 typedef struct atomicptr_t atomicptr_t;
    158 
    159 static FORCEINLINE int32_t
    160 atomic_load32(atomic32_t* src) {
    161 	return src->nonatomic;
    162 }
    163 
    164 static FORCEINLINE void
    165 atomic_store32(atomic32_t* dst, int32_t val) {
    166 	dst->nonatomic = val;
    167 }
    168 
    169 static FORCEINLINE int32_t
    170 atomic_incr32(atomic32_t* val) {
    171 #ifdef _MSC_VER
    172 	int32_t old = (int32_t)_InterlockedExchangeAdd((volatile long*)&val->nonatomic, 1);
    173 	return (old + 1);
    174 #else
    175 	return __sync_add_and_fetch(&val->nonatomic, 1);
    176 #endif
    177 }
    178 
    179 static FORCEINLINE int32_t
    180 atomic_add32(atomic32_t* val, int32_t add) {
    181 #ifdef _MSC_VER
    182 	int32_t old = (int32_t)_InterlockedExchangeAdd((volatile long*)&val->nonatomic, add);
    183 	return (old + add);
    184 #else
    185 	return __sync_add_and_fetch(&val->nonatomic, add);
    186 #endif
    187 }
    188 
    189 static FORCEINLINE void*
    190 atomic_load_ptr(atomicptr_t* src) {
    191 	return (void*)((uintptr_t)src->nonatomic);
    192 }
    193 
    194 static FORCEINLINE void
    195 atomic_store_ptr(atomicptr_t* dst, void* val) {
    196 	dst->nonatomic = val;
    197 }
    198 
    199 static FORCEINLINE int
    200 atomic_cas_ptr(atomicptr_t* dst, void* val, void* ref) {
    201 #ifdef _MSC_VER
    202 #  if ARCH_64BIT
    203 	return (_InterlockedCompareExchange64((volatile long long*)&dst->nonatomic,
    204 	                                      (long long)val, (long long)ref) == (long long)ref) ? 1 : 0;
    205 #  else
    206 	return (_InterlockedCompareExchange((volatile long*)&dst->nonatomic,
    207 	                                    (long)val, (long)ref) == (long)ref) ? 1 : 0;
    208 #  endif
    209 #else
    210 	return __sync_bool_compare_and_swap(&dst->nonatomic, ref, val);
    211 #endif
    212 }
    213 
    214 /// Preconfigured limits and sizes
    215 //! Granularity of a small allocation block
    216 #define SMALL_GRANULARITY         32
    217 //! Small granularity shift count
    218 #define SMALL_GRANULARITY_SHIFT   5
    219 //! Number of small block size classes
    220 #define SMALL_CLASS_COUNT         63
    221 //! Maximum size of a small block
    222 #define SMALL_SIZE_LIMIT          2016
    223 //! Granularity of a medium allocation block
    224 #define MEDIUM_GRANULARITY        512
    225 //! Medium granularity shift count
    226 #define MEDIUM_GRANULARITY_SHIFT  9
    227 //! Number of medium block size classes
    228 #define MEDIUM_CLASS_COUNT        60
    229 //! Total number of small + medium size classes
    230 #define SIZE_CLASS_COUNT          (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
    231 //! Number of large block size classes
    232 #define LARGE_CLASS_COUNT         32
    233 //! Maximum size of a medium block
    234 #define MEDIUM_SIZE_LIMIT         (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT) - SPAN_HEADER_SIZE)
    235 //! Maximum size of a large block
    236 #define LARGE_SIZE_LIMIT          ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
    237 //! Size of a span header
    238 #define SPAN_HEADER_SIZE          32
    239 
    240 #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
    241 #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
    242 
    243 #if ARCH_64BIT
    244 typedef int64_t offset_t;
    245 #else
    246 typedef int32_t offset_t;
    247 #endif
    248 typedef uint32_t count_t;
    249 
    250 #if ENABLE_VALIDATE_ARGS
    251 //! Maximum allocation size to avoid integer overflow
    252 #undef  MAX_ALLOC_SIZE
    253 #define MAX_ALLOC_SIZE            (((size_t)-1) - _memory_span_size)
    254 #endif
    255 
    256 /// Data types
    257 //! A memory heap, per thread
    258 typedef struct heap_t heap_t;
    259 //! Span of memory pages
    260 typedef struct span_t span_t;
    261 //! Size class definition
    262 typedef struct size_class_t size_class_t;
    263 //! Span block bookkeeping
    264 typedef struct span_block_t span_block_t;
    265 //! Span list bookkeeping
    266 typedef struct span_list_t span_list_t;
    267 //! Span data union, usage depending on span state
    268 typedef union span_data_t span_data_t;
    269 //! Cache data
    270 typedef struct span_counter_t span_counter_t;
    271 //! Global cache
    272 typedef struct global_cache_t global_cache_t;
    273 
    274 //! Flag indicating span is the first (master) span of a split superspan
    275 #define SPAN_FLAG_MASTER 1
    276 //! Flag indicating span is a secondary (sub) span of a split superspan
    277 #define SPAN_FLAG_SUBSPAN 2
    278 
    279 //Alignment offset must match in both structures to keep the data when
    280 //transitioning between being used for blocks and being part of a list
    281 struct span_block_t {
    282 	//! Free list
    283 	uint16_t    free_list;
    284 	//! First autolinked block
    285 	uint16_t    first_autolink;
    286 	//! Free count
    287 	uint16_t    free_count;
    288 	//! Alignment offset
    289 	uint16_t    align_offset;
    290 };
    291 
    292 struct span_list_t {
    293 	//! List size
    294 	uint32_t    size;
    295 	//! Unused in lists
    296 	uint16_t    unused;
    297 	//! Alignment offset
    298 	uint16_t    align_offset;
    299 };
    300 
    301 union span_data_t {
    302 	//! Span data when used as blocks
    303 	span_block_t block;
    304 	//! Span data when used in lists
    305 	span_list_t list;
    306 };
    307 
    308 //A span can either represent a single span of memory pages with size declared by span_map_count configuration variable,
    309 //or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single
    310 //span or a super span. A super span can further be diviced into multiple spans (or this, super spans), where the first
    311 //(super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans
    312 //that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire
    313 //superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released
    314 //in the same call to release the virtual memory range, but individual subranges can be decommitted individually
    315 //to reduce physical memory use).
    316 struct span_t {
    317 	//!	Heap ID
    318 	atomic32_t  heap_id;
    319 	//! Size class
    320 	uint16_t    size_class;
    321 	// TODO: If we could store remainder part of flags as an atomic counter, the entire check
    322 	//       if master is owned by calling heap could be simplified to an atomic dec from any thread
    323 	//       since remainder of a split super span only ever decreases, never increases
    324 	//! Flags and counters
    325 	uint16_t    flags;
    326 	//! Span data
    327 	span_data_t data;
    328 	//! Next span
    329 	span_t*     next_span;
    330 	//! Previous span
    331 	span_t*     prev_span;
    332 };
    333 static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
    334 
    335 //Adaptive cache counter of a single superspan span count
    336 struct span_counter_t {
    337 	//! Allocation high water mark
    338 	uint32_t  max_allocations;
    339 	//! Current number of allocations
    340 	uint32_t  current_allocations;
    341 	//! Cache limit
    342 	uint32_t  cache_limit;
    343 };
    344 
    345 struct heap_t {
    346 	//! Heap ID
    347 	int32_t      id;
    348 	//! Free count for each size class active span
    349 	span_block_t active_block[SIZE_CLASS_COUNT];
    350 	//! Active span for each size class
    351 	span_t*      active_span[SIZE_CLASS_COUNT];
    352 	//! List of semi-used spans with free blocks for each size class (double linked list)
    353 	span_t*      size_cache[SIZE_CLASS_COUNT];
    354 #if ENABLE_THREAD_CACHE
    355 	//! List of free spans (single linked list)
    356 	span_t*      span_cache[LARGE_CLASS_COUNT];
    357 	//! Allocation counters
    358 	span_counter_t span_counter[LARGE_CLASS_COUNT];
    359 #endif
    360 	//! Mapped but unused spans
    361 	span_t*      span_reserve;
    362 	//! Master span for mapped but unused spans
    363 	span_t*      span_reserve_master;
    364 	//! Number of mapped but unused spans
    365 	size_t       spans_reserved;
    366 	//! Deferred deallocation
    367 	atomicptr_t  defer_deallocate;
    368 	//! Deferred unmaps
    369 	atomicptr_t  defer_unmap;
    370 	//! Next heap in id list
    371 	heap_t*      next_heap;
    372 	//! Next heap in orphan list
    373 	heap_t*      next_orphan;
    374 	//! Memory pages alignment offset
    375 	size_t       align_offset;
    376 #if ENABLE_STATISTICS
    377 	//! Number of bytes transitioned thread -> global
    378 	size_t       thread_to_global;
    379 	//! Number of bytes transitioned global -> thread
    380 	size_t       global_to_thread;
    381 #endif
    382 };
    383 
    384 struct size_class_t {
    385 	//! Size of blocks in this class
    386 	uint32_t size;
    387 	//! Number of blocks in each chunk
    388 	uint16_t block_count;
    389 	//! Class index this class is merged with
    390 	uint16_t class_idx;
    391 };
    392 static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
    393 
    394 struct global_cache_t {
    395 	//! Cache list pointer
    396 	atomicptr_t cache;
    397 	//! Cache size
    398 	atomic32_t size;
    399 	//! ABA counter
    400 	atomic32_t counter;
    401 };
    402 
    403 /// Global data
    404 //! Configuration
    405 static rpmalloc_config_t _memory_config;
    406 //! Memory page size
    407 static size_t _memory_page_size;
    408 //! Shift to divide by page size
    409 static size_t _memory_page_size_shift;
    410 //! Mask to get to start of a memory page
    411 static size_t _memory_page_mask;
    412 //! Granularity at which memory pages are mapped by OS
    413 static size_t _memory_map_granularity;
    414 //! Size of a span of memory pages
    415 static size_t _memory_span_size;
    416 //! Shift to divide by span size
    417 static size_t _memory_span_size_shift;
    418 //! Mask to get to start of a memory span
    419 static uintptr_t _memory_span_mask;
    420 //! Global size classes
    421 static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
    422 //! Run-time size limit of medium blocks
    423 static size_t _memory_medium_size_limit;
    424 //! Heap ID counter
    425 static atomic32_t _memory_heap_id;
    426 #if ENABLE_THREAD_CACHE
    427 //! Adaptive cache max allocation count
    428 static uint32_t _memory_max_allocation[LARGE_CLASS_COUNT];
    429 #endif
    430 #if ENABLE_GLOBAL_CACHE
    431 //! Global span cache
    432 static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
    433 #endif
    434 //! All heaps
    435 static atomicptr_t _memory_heaps[HEAP_ARRAY_SIZE];
    436 //! Orphaned heaps
    437 static atomicptr_t _memory_orphan_heaps;
    438 //! Running orphan counter to avoid ABA issues in linked list
    439 static atomic32_t _memory_orphan_counter;
    440 //! Active heap count
    441 static atomic32_t _memory_active_heaps;
    442 #if ENABLE_STATISTICS
    443 //! Total number of currently mapped memory pages
    444 static atomic32_t _mapped_pages;
    445 //! Total number of currently lost spans
    446 static atomic32_t _reserved_spans;
    447 //! Running counter of total number of mapped memory pages since start
    448 static atomic32_t _mapped_total;
    449 //! Running counter of total number of unmapped memory pages since start
    450 static atomic32_t _unmapped_total;
    451 #endif
    452 
    453 #define MEMORY_UNUSED(x) (void)sizeof((x))
    454 
    455 //! Current thread heap
    456 #if defined(__APPLE__) && ENABLE_PRELOAD
    457 static pthread_key_t _memory_thread_heap;
    458 #else
    459 #  ifdef _MSC_VER
    460 #    define _Thread_local __declspec(thread)
    461 #    define TLS_MODEL
    462 #  else
    463 #    define TLS_MODEL __attribute__((tls_model("initial-exec")))
    464 #    if !defined(__clang__) && defined(__GNUC__)
    465 #      define _Thread_local __thread
    466 #    endif
    467 #  endif
    468 static _Thread_local heap_t* _memory_thread_heap TLS_MODEL;
    469 #endif
    470 
    471 //! Get the current thread heap
    472 static FORCEINLINE heap_t*
    473 get_thread_heap(void) {
    474 #if defined(__APPLE__) && ENABLE_PRELOAD
    475 	return pthread_getspecific(_memory_thread_heap);
    476 #else
    477 	return _memory_thread_heap;
    478 #endif
    479 }
    480 
    481 //! Set the current thread heap
    482 static void
    483 set_thread_heap(heap_t* heap) {
    484 #if defined(__APPLE__) && ENABLE_PRELOAD
    485 	pthread_setspecific(_memory_thread_heap, heap);
    486 #else
    487 	_memory_thread_heap = heap;
    488 #endif
    489 }
    490 
    491 //! Default implementation to map more virtual memory
    492 static void*
    493 _memory_map_os(size_t size, size_t* offset);
    494 
    495 //! Default implementation to unmap virtual memory
    496 static void
    497 _memory_unmap_os(void* address, size_t size, size_t offset, int release);
    498 
    499 //! Deallocate any deferred blocks and check for the given size class
    500 static int
    501 _memory_deallocate_deferred(heap_t* heap, size_t size_class);
    502 
    503 //! Lookup a memory heap from heap ID
    504 static heap_t*
    505 _memory_heap_lookup(int32_t id) {
    506 	uint32_t list_idx = id % HEAP_ARRAY_SIZE;
    507 	heap_t* heap = (heap_t*)atomic_load_ptr(&_memory_heaps[list_idx]);
    508 	while (heap && (heap->id != id))
    509 		heap = heap->next_heap;
    510 	return heap;
    511 }
    512 
    513 #if ENABLE_THREAD_CACHE
    514 
    515 //! Increase an allocation counter
    516 static void
    517 _memory_counter_increase(span_counter_t* counter, uint32_t* global_counter, size_t span_count) {
    518 	if (++counter->current_allocations > counter->max_allocations) {
    519 		counter->max_allocations = counter->current_allocations;
    520 		const uint32_t cache_limit_max = (uint32_t)_memory_span_size - 2;
    521 #if !ENABLE_UNLIMITED_CACHE
    522 		counter->cache_limit = counter->max_allocations / ((span_count == 1) ? MAX_SPAN_CACHE_DIVISOR : MAX_LARGE_SPAN_CACHE_DIVISOR);
    523 		const uint32_t cache_limit_min = (span_count == 1) ? (MIN_SPAN_CACHE_RELEASE + MIN_SPAN_CACHE_SIZE) : (MIN_LARGE_SPAN_CACHE_RELEASE + MIN_LARGE_SPAN_CACHE_SIZE);
    524 		if (counter->cache_limit < cache_limit_min)
    525 			counter->cache_limit = cache_limit_min;
    526 		if (counter->cache_limit > cache_limit_max)
    527 			counter->cache_limit = cache_limit_max;
    528 #else
    529 		counter->cache_limit = cache_limit_max;
    530 #endif
    531 		if (counter->max_allocations > *global_counter)
    532 			*global_counter = counter->max_allocations;
    533 	}
    534 }
    535 
    536 #else
    537 #  define _memory_counter_increase(counter, global_counter, span_count) do {} while (0)
    538 #endif
    539 
    540 #if ENABLE_STATISTICS
    541 #  define _memory_statistics_add(atomic_counter, value) atomic_add32(atomic_counter, (int32_t)(value))
    542 #  define _memory_statistics_sub(atomic_counter, value) atomic_add32(atomic_counter, -(int32_t)(value))
    543 #else
    544 #  define _memory_statistics_add(atomic_counter, value) do {} while(0)
    545 #  define _memory_statistics_sub(atomic_counter, value) do {} while(0)
    546 #endif
    547 
    548 //! Map more virtual memory
    549 static void*
    550 _memory_map(size_t size, size_t* offset) {
    551 	assert(!(size % _memory_page_size));
    552 	_memory_statistics_add(&_mapped_pages, (size >> _memory_page_size_shift));
    553 	_memory_statistics_add(&_mapped_total, (size >> _memory_page_size_shift));
    554 	return _memory_config.memory_map(size, offset);
    555 }
    556 
    557 //! Unmap virtual memory
    558 static void
    559 _memory_unmap(void* address, size_t size, size_t offset, int release) {
    560 	assert((size < _memory_span_size) || !((uintptr_t)address & ~_memory_span_mask));
    561 	assert(!(size % _memory_page_size));
    562 	_memory_statistics_sub(&_mapped_pages, (size >> _memory_page_size_shift));
    563 	_memory_statistics_add(&_unmapped_total, (size >> _memory_page_size_shift));
    564 	_memory_config.memory_unmap(address, size, offset, release);
    565 }
    566 
    567 //! Make flags field in a span from flags, remainder/distance and count
    568 #define SPAN_MAKE_FLAGS(flags, remdist, count) ((uint16_t)((flags) | ((uint16_t)((remdist) - 1) << 2) | ((uint16_t)((count) - 1) << 9))); assert((flags) < 4); assert((remdist) && (remdist) < 128); assert((count) && (count) < 128)
    569 //! Check if span has any of the given flags
    570 #define SPAN_HAS_FLAG(flags, flag) ((flags) & (flag))
    571 //! Get the distance from flags field
    572 #define SPAN_DISTANCE(flags) (1 + (((flags) >> 2) & 0x7f))
    573 //! Get the remainder from flags field
    574 #define SPAN_REMAINS(flags) (1 + (((flags) >> 2) & 0x7f))
    575 //! Get the count from flags field
    576 #define SPAN_COUNT(flags) (1 + (((flags) >> 9) & 0x7f))
    577 //! Set the remainder in the flags field (MUST be done from the owner heap thread)
    578 #define SPAN_SET_REMAINS(flags, remains) flags = ((uint16_t)(((flags) & 0xfe03) | ((uint16_t)((remains) - 1) << 2))); assert((remains) < 128)
    579 
    580 //! Resize the given super span to the given count of spans, store the remainder in the heap reserved spans fields
    581 static void
    582 _memory_set_span_remainder_as_reserved(heap_t* heap, span_t* span, size_t use_count) {
    583 	size_t current_count = SPAN_COUNT(span->flags);
    584 
    585 	assert(!SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER) || !SPAN_HAS_FLAG(span->flags, SPAN_FLAG_SUBSPAN));
    586 	assert((current_count > 1) && (current_count < 127));
    587 	assert(!heap->spans_reserved);
    588 	assert((size_t)SPAN_COUNT(span->flags) == current_count);
    589 	assert(current_count > use_count);
    590 
    591 	heap->span_reserve = (span_t*)pointer_offset(span, use_count * _memory_span_size);
    592 	heap->spans_reserved = current_count - use_count;
    593 	if (!SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN)) {
    594 		//We must store the heap id before setting as master, to force unmaps to defer to this heap thread
    595 		atomic_store32(&span->heap_id, heap->id);
    596 		atomic_thread_fence_release();
    597 		heap->span_reserve_master = span;
    598 		span->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_MASTER, current_count, use_count);
    599 		_memory_statistics_add(&_reserved_spans, current_count);
    600 	}
    601 	else if (SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER)) {
    602 		//Only owner heap thread can modify a master span
    603 		assert(atomic_load32(&span->heap_id) == heap->id);
    604 		uint16_t remains = SPAN_REMAINS(span->flags);
    605 		assert(remains >= current_count);
    606 		heap->span_reserve_master = span;
    607 		span->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_MASTER, remains, use_count);
    608 	}
    609 	else { //SPAN_FLAG_SUBSPAN
    610 		//Resizing a subspan is a safe operation in any thread
    611 		uint16_t distance = SPAN_DISTANCE(span->flags);
    612 		span_t* master = (span_t*)pointer_offset(span, -(int)distance * (int)_memory_span_size);
    613 		heap->span_reserve_master = master;
    614 		assert(SPAN_HAS_FLAG(master->flags, SPAN_FLAG_MASTER));
    615 		assert((size_t)SPAN_REMAINS(master->flags) >= current_count);
    616 		span->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_SUBSPAN, distance, use_count);
    617 	}
    618 	assert((SPAN_COUNT(span->flags) + heap->spans_reserved) == current_count);
    619 }
    620 
    621 //! Map in memory pages for the given number of spans (or use previously reserved pages)
    622 static span_t*
    623 _memory_map_spans(heap_t* heap, size_t span_count) {
    624 	if (span_count <= heap->spans_reserved) {
    625 		span_t* span = heap->span_reserve;
    626 		heap->span_reserve = (span_t*)pointer_offset(span, span_count * _memory_span_size);
    627 		heap->spans_reserved -= span_count;
    628 		//Declare the span to be a subspan with given distance from master span
    629 		uint16_t distance = (uint16_t)((uintptr_t)pointer_diff(span, heap->span_reserve_master) >> _memory_span_size_shift);
    630 		span->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_SUBSPAN, distance, span_count);
    631 		span->data.block.align_offset = 0;
    632 		return span;
    633 	}
    634 
    635 	//We cannot request extra spans if we already have some (but not enough) pending reserved spans
    636 	size_t request_spans = (heap->spans_reserved || (span_count > _memory_config.span_map_count)) ? span_count : _memory_config.span_map_count;
    637 	size_t align_offset = 0;
    638 	span_t* span = (span_t*)_memory_map(request_spans * _memory_span_size, &align_offset);
    639 	span->flags = SPAN_MAKE_FLAGS(0, request_spans, request_spans);
    640 	span->data.block.align_offset = (uint16_t)align_offset;
    641 	if (request_spans > span_count) {
    642 		//We have extra spans, store them as reserved spans in heap
    643 		_memory_set_span_remainder_as_reserved(heap, span, span_count);
    644 	}
    645 	return span;
    646 }
    647 
    648 //! Defer unmapping of the given span to the owner heap
    649 static int
    650 _memory_unmap_defer(int32_t heap_id, span_t* span) {
    651 	//Get the heap and link in pointer in list of deferred operations
    652 	heap_t* heap = _memory_heap_lookup(heap_id);
    653 	if (!heap)
    654 		return 0;
    655 	atomic_store32(&span->heap_id, heap_id);
    656 	void* last_ptr;
    657 	do {
    658 		last_ptr = atomic_load_ptr(&heap->defer_unmap);
    659 		span->next_span = (span_t*)last_ptr;
    660 	} while (!atomic_cas_ptr(&heap->defer_unmap, span, last_ptr));
    661 	return 1;
    662 }
    663 
    664 //! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
    665 static void
    666 _memory_unmap_span(heap_t* heap, span_t* span) {
    667 	size_t span_count = SPAN_COUNT(span->flags);
    668 	assert(!SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER) || !SPAN_HAS_FLAG(span->flags, SPAN_FLAG_SUBSPAN));
    669 	//A plain run of spans can be unmapped directly
    670 	if (!SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN)) {
    671 		_memory_unmap(span, span_count * _memory_span_size, span->data.list.align_offset, 1);
    672 		return;
    673 	}
    674 
    675 	uint32_t is_master = SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER);
    676 	span_t* master = is_master ? span : (span_t*)(pointer_offset(span, -(int)SPAN_DISTANCE(span->flags) * (int)_memory_span_size));
    677 
    678 	assert(is_master || SPAN_HAS_FLAG(span->flags, SPAN_FLAG_SUBSPAN));
    679 	assert(SPAN_HAS_FLAG(master->flags, SPAN_FLAG_MASTER));
    680 
    681 	//Check if we own the master span, otherwise defer (only owner of master span can modify remainder field)
    682 	int32_t master_heap_id = atomic_load32(&master->heap_id);
    683 	if (heap && (master_heap_id != heap->id)) {
    684 		if (_memory_unmap_defer(master_heap_id, span))
    685 			return;
    686 	}
    687 	if (!is_master) {
    688 		//Directly unmap subspans
    689 		assert(span->data.list.align_offset == 0);
    690 		_memory_unmap(span, span_count * _memory_span_size, 0, 0);
    691 		_memory_statistics_sub(&_reserved_spans, span_count);
    692 	}
    693 	else {
    694 		//Special double flag to denote an unmapped master
    695 		//It must be kept in memory since span header must be used
    696 		span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN;
    697 	}
    698 	//We are in owner thread of the master span
    699 	uint32_t remains = SPAN_REMAINS(master->flags);
    700 	assert(remains >= span_count);
    701 	remains = ((uint32_t)span_count >= remains) ? 0 : (remains - (uint32_t)span_count);
    702 	if (!remains) {
    703 		//Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span
    704 		assert(SPAN_HAS_FLAG(master->flags, SPAN_FLAG_MASTER) && SPAN_HAS_FLAG(master->flags, SPAN_FLAG_SUBSPAN));
    705 		span_count = SPAN_COUNT(master->flags);
    706 		_memory_unmap(master, span_count * _memory_span_size, master->data.list.align_offset, 1);
    707 		_memory_statistics_sub(&_reserved_spans, span_count);
    708 	}
    709 	else {
    710 		//Set remaining spans
    711 		SPAN_SET_REMAINS(master->flags, remains);
    712 	}
    713 }
    714 
    715 //! Process pending deferred cross-thread unmaps
    716 static span_t*
    717 _memory_unmap_deferred(heap_t* heap, size_t wanted_count) {
    718 	//Grab the current list of deferred unmaps
    719 	atomic_thread_fence_acquire();
    720 	span_t* span = (span_t*)atomic_load_ptr(&heap->defer_unmap);
    721 	if (!span || !atomic_cas_ptr(&heap->defer_unmap, 0, span))
    722 		return 0;
    723 	span_t* found_span = 0;
    724 	do {
    725 		//Verify that we own the master span, otherwise re-defer to owner
    726 		void* next = span->next_span;
    727 		size_t span_count = SPAN_COUNT(span->flags);
    728 		if (!found_span && span_count == wanted_count) {
    729 			assert(!SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER) || !SPAN_HAS_FLAG(span->flags, SPAN_FLAG_SUBSPAN));
    730 			found_span = span;
    731 		}
    732 		else {
    733 			uint32_t is_master = SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER);
    734 			span_t* master = is_master ? span : (span_t*)(pointer_offset(span, -(int)SPAN_DISTANCE(span->flags) * (int)_memory_span_size));
    735 			int32_t master_heap_id = atomic_load32(&master->heap_id);
    736 			if ((atomic_load32(&span->heap_id) == master_heap_id) ||
    737 			        !_memory_unmap_defer(master_heap_id, span)) {
    738 				//We own the master span (or heap merged and abandoned)
    739 				_memory_unmap_span(heap, span);
    740 			}
    741 		}
    742 		span = (span_t*)next;
    743 	} while (span);
    744 	return found_span;
    745 }
    746 
    747 //! Unmap a single linked list of spans
    748 static void
    749 _memory_unmap_span_list(heap_t* heap, span_t* span) {
    750 	size_t list_size = span->data.list.size;
    751 	for (size_t ispan = 0; ispan < list_size; ++ispan) {
    752 		span_t* next_span = span->next_span;
    753 		_memory_unmap_span(heap, span);
    754 		span = next_span;
    755 	}
    756 	assert(!span);
    757 }
    758 
    759 #if ENABLE_THREAD_CACHE
    760 
    761 //! Split a super span in two
    762 static span_t*
    763 _memory_span_split(heap_t* heap, span_t* span, size_t use_count) {
    764 	uint16_t distance = 0;
    765 	size_t current_count = SPAN_COUNT(span->flags);
    766 	assert(current_count > use_count);
    767 	assert(!SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER) || !SPAN_HAS_FLAG(span->flags, SPAN_FLAG_SUBSPAN));
    768 	if (!SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN)) {
    769 		//Must store heap in master span before use, to avoid issues when unmapping subspans
    770 		atomic_store32(&span->heap_id, heap->id);
    771 		atomic_thread_fence_release();
    772 		span->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_MASTER, current_count, use_count);
    773 		_memory_statistics_add(&_reserved_spans, current_count);
    774 	}
    775 	else if (SPAN_HAS_FLAG(span->flags, SPAN_FLAG_MASTER)) {
    776 		//Only valid to call on master span if we own it
    777 		assert(atomic_load32(&span->heap_id) == heap->id);
    778 		uint16_t remains = SPAN_REMAINS(span->flags);
    779 		assert(remains >= current_count);
    780 		span->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_MASTER, remains, use_count);
    781 	}
    782 	else { //SPAN_FLAG_SUBSPAN
    783 		distance = SPAN_DISTANCE(span->flags);
    784 		span->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_SUBSPAN, distance, use_count);
    785 	}
    786 	//Setup remainder as a subspan
    787 	span_t* subspan = (span_t*)pointer_offset(span, use_count * _memory_span_size);
    788 	subspan->flags = SPAN_MAKE_FLAGS(SPAN_FLAG_SUBSPAN, distance + use_count, current_count - use_count);
    789 	subspan->data.list.align_offset = 0;
    790 	return subspan;
    791 }
    792 
    793 //! Add span to head of single linked span list
    794 static size_t
    795 _memory_span_list_push(span_t** head, span_t* span) {
    796 	span->next_span = *head;
    797 	if (*head)
    798 		span->data.list.size = (*head)->data.list.size + 1;
    799 	else
    800 		span->data.list.size = 1;
    801 	*head = span;
    802 	return span->data.list.size;
    803 }
    804 
    805 //! Remove span from head of single linked span list, returns the new list head
    806 static span_t*
    807 _memory_span_list_pop(span_t** head) {
    808 	span_t* span = *head;
    809 	span_t* next_span = 0;
    810 	if (span->data.list.size > 1) {
    811 		next_span = span->next_span;
    812 		assert(next_span);
    813 		next_span->data.list.size = span->data.list.size - 1;
    814 	}
    815 	*head = next_span;
    816 	return span;
    817 }
    818 
    819 //! Split a single linked span list
    820 static span_t*
    821 _memory_span_list_split(span_t* span, size_t limit) {
    822 	span_t* next = 0;
    823 	if (limit < 2)
    824 		limit = 2;
    825 	if (span->data.list.size > limit) {
    826 		count_t list_size = 1;
    827 		span_t* last = span;
    828 		next = span->next_span;
    829 		while (list_size < limit) {
    830 			last = next;
    831 			next = next->next_span;
    832 			++list_size;
    833 		}
    834 		last->next_span = 0;
    835 		assert(next);
    836 		next->data.list.size = span->data.list.size - list_size;
    837 		span->data.list.size = list_size;
    838 		span->prev_span = 0;
    839 	}
    840 	return next;
    841 }
    842 
    843 #endif
    844 
    845 //! Add a span to a double linked list
    846 static void
    847 _memory_span_list_doublelink_add(span_t** head, span_t* span) {
    848 	if (*head) {
    849 		(*head)->prev_span = span;
    850 		span->next_span = *head;
    851 	}
    852 	else {
    853 		span->next_span = 0;
    854 	}
    855 	*head = span;
    856 }
    857 
    858 //! Remove a span from a double linked list
    859 static void
    860 _memory_span_list_doublelink_remove(span_t** head, span_t* span) {
    861 	if (*head == span) {
    862 		*head = span->next_span;
    863 	}
    864 	else {
    865 		span_t* next_span = span->next_span;
    866 		span_t* prev_span = span->prev_span;
    867 		if (next_span)
    868 			next_span->prev_span = prev_span;
    869 		prev_span->next_span = next_span;
    870 	}
    871 }
    872 
    873 #if ENABLE_GLOBAL_CACHE
    874 
    875 //! Insert the given list of memory page spans in the global cache
    876 static void
    877 _memory_cache_insert(heap_t* heap, global_cache_t* cache, span_t* span, size_t cache_limit) {
    878 	assert((span->data.list.size == 1) || (span->next_span != 0));
    879 	int32_t list_size = (int32_t)span->data.list.size;
    880 	//Unmap if cache has reached the limit
    881 	if (atomic_add32(&cache->size, list_size) > (int32_t)cache_limit) {
    882 		_memory_unmap_span_list(heap, span);
    883 		atomic_add32(&cache->size, -list_size);
    884 		return;
    885 	}
    886 	void* current_cache, *new_cache;
    887 	do {
    888 		current_cache = atomic_load_ptr(&cache->cache);
    889 		span->prev_span = (span_t*)(void*)((uintptr_t)current_cache & _memory_span_mask);
    890 		new_cache = (void*)((uintptr_t)span | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
    891 	} while (!atomic_cas_ptr(&cache->cache, new_cache, current_cache));
    892 }
    893 
    894 //! Extract a number of memory page spans from the global cache
    895 static span_t*
    896 _memory_cache_extract(global_cache_t* cache) {
    897 	uintptr_t span_ptr;
    898 	do {
    899 		void* global_span = atomic_load_ptr(&cache->cache);
    900 		span_ptr = (uintptr_t)global_span & _memory_span_mask;
    901 		if (span_ptr) {
    902 			span_t* span = (span_t*)(void*)span_ptr;
    903 			//By accessing the span ptr before it is swapped out of list we assume that a contending thread
    904 			//does not manage to traverse the span to being unmapped before we access it
    905 			void* new_cache = (void*)((uintptr_t)span->prev_span | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
    906 			if (atomic_cas_ptr(&cache->cache, new_cache, global_span)) {
    907 				atomic_add32(&cache->size, -(int32_t)span->data.list.size);
    908 				return span;
    909 			}
    910 		}
    911 	} while (span_ptr);
    912 	return 0;
    913 }
    914 
    915 //! Finalize a global cache, only valid from allocator finalization (not thread safe)
    916 static void
    917 _memory_cache_finalize(global_cache_t* cache) {
    918 	void* current_cache = atomic_load_ptr(&cache->cache);
    919 	span_t* span = (span_t*)(void*)((uintptr_t)current_cache & _memory_span_mask);
    920 	while (span) {
    921 		span_t* skip_span = (span_t*)(void*)((uintptr_t)span->prev_span & _memory_span_mask);
    922 		atomic_add32(&cache->size, -(int32_t)span->data.list.size);
    923 		_memory_unmap_span_list(0, span);
    924 		span = skip_span;
    925 	}
    926 	assert(!atomic_load32(&cache->size));
    927 	atomic_store_ptr(&cache->cache, 0);
    928 	atomic_store32(&cache->size, 0);
    929 }
    930 
    931 //! Insert the given list of memory page spans in the global cache
    932 static void
    933 _memory_global_cache_insert(heap_t* heap, span_t* span) {
    934 	//Calculate adaptive limits
    935 	size_t span_count = SPAN_COUNT(span->flags);
    936 	const size_t cache_divisor = (span_count == 1) ? MAX_SPAN_CACHE_DIVISOR : (MAX_LARGE_SPAN_CACHE_DIVISOR * span_count * 2);
    937 	const size_t cache_limit = (MAX_GLOBAL_CACHE_MULTIPLIER * _memory_max_allocation[span_count - 1]) / cache_divisor;
    938 	const size_t cache_limit_min = MAX_GLOBAL_CACHE_MULTIPLIER * (span_count == 1 ? MIN_SPAN_CACHE_SIZE : MIN_LARGE_SPAN_CACHE_SIZE);
    939 	_memory_cache_insert(heap, &_memory_span_cache[span_count - 1], span, cache_limit > cache_limit_min ? cache_limit : cache_limit_min);
    940 }
    941 
    942 //! Extract a number of memory page spans from the global cache for large blocks
    943 static span_t*
    944 _memory_global_cache_extract(size_t span_count) {
    945 	span_t* span = _memory_cache_extract(&_memory_span_cache[span_count - 1]);
    946 	assert(!span || ((size_t)SPAN_COUNT(span->flags) == span_count));
    947 	return span;
    948 }
    949 
    950 #endif
    951 
    952 //! Insert a single span into thread heap cache, releasing to global cache if overflow
    953 static void
    954 _memory_heap_cache_insert(heap_t* heap, span_t* span) {
    955 #if ENABLE_THREAD_CACHE
    956 	size_t span_count = SPAN_COUNT(span->flags);
    957 	size_t idx = span_count - 1;
    958 	if (_memory_span_list_push(&heap->span_cache[idx], span) <= heap->span_counter[idx].cache_limit)
    959 		return;
    960 	heap->span_cache[idx] = _memory_span_list_split(span, heap->span_counter[idx].cache_limit);
    961 	assert(span->data.list.size == heap->span_counter[idx].cache_limit);
    962 #if ENABLE_STATISTICS
    963 	heap->thread_to_global += (size_t)span->data.list.size * span_count * _memory_span_size;
    964 #endif
    965 #if ENABLE_GLOBAL_CACHE
    966 	_memory_global_cache_insert(heap, span);
    967 #else
    968 	_memory_unmap_span_list(heap, span);
    969 #endif
    970 #else
    971 	_memory_unmap_span(heap, span);
    972 #endif
    973 }
    974 
    975 //! Extract the given number of spans from the different cache levels
    976 static span_t*
    977 _memory_heap_cache_extract(heap_t* heap, size_t span_count) {
    978 #if ENABLE_THREAD_CACHE
    979 	size_t idx = span_count - 1;
    980 	//Step 1: check thread cache
    981 	if (heap->span_cache[idx])
    982 		return _memory_span_list_pop(&heap->span_cache[idx]);
    983 #endif
    984 	//Step 2: Check reserved spans
    985 	if (heap->spans_reserved >= span_count)
    986 		return _memory_map_spans(heap, span_count);
    987 	//Step 3: Try processing deferred unmappings
    988 	span_t* span = _memory_unmap_deferred(heap, span_count);
    989 	if (span)
    990 		return span;
    991 #if ENABLE_THREAD_CACHE
    992 	//Step 4: Check larger super spans and split if we find one
    993 	for (++idx; idx < LARGE_CLASS_COUNT; ++idx) {
    994 		if (heap->span_cache[idx]) {
    995 			span = _memory_span_list_pop(&heap->span_cache[idx]);
    996 			break;
    997 		}
    998 	}
    999 	if (span) {
   1000 		//Mark the span as owned by this heap before splitting
   1001 		size_t got_count = SPAN_COUNT(span->flags);
   1002 		assert(got_count > span_count);
   1003 		atomic_store32(&span->heap_id, heap->id);
   1004 		atomic_thread_fence_release();
   1005 
   1006 		//Split the span and store as reserved if no previously reserved spans, or in thread cache otherwise
   1007 		span_t* subspan = _memory_span_split(heap, span, span_count);
   1008 		assert((size_t)(SPAN_COUNT(span->flags) + SPAN_COUNT(subspan->flags)) == got_count);
   1009 		assert((size_t)SPAN_COUNT(span->flags) == span_count);
   1010 		if (!heap->spans_reserved) {
   1011 			heap->spans_reserved = got_count - span_count;
   1012 			heap->span_reserve = subspan;
   1013 			heap->span_reserve_master = (span_t*)pointer_offset(subspan, -(int32_t)SPAN_DISTANCE(subspan->flags) * (int32_t)_memory_span_size);
   1014 		}
   1015 		else {
   1016 			_memory_heap_cache_insert(heap, subspan);
   1017 		}
   1018 		return span;
   1019 	}
   1020 #if ENABLE_GLOBAL_CACHE
   1021 	//Step 5: Extract from global cache
   1022 	idx = span_count - 1;
   1023 	heap->span_cache[idx] = _memory_global_cache_extract(span_count);
   1024 	if (heap->span_cache[idx]) {
   1025 #if ENABLE_STATISTICS
   1026 		heap->global_to_thread += (size_t)heap->span_cache[idx]->data.list.size * span_count * _memory_span_size;
   1027 #endif
   1028 		return _memory_span_list_pop(&heap->span_cache[idx]);
   1029 	}
   1030 #endif
   1031 #endif
   1032 	return 0;
   1033 }
   1034 
   1035 //! Allocate a small/medium sized memory block from the given heap
   1036 static void*
   1037 _memory_allocate_from_heap(heap_t* heap, size_t size) {
   1038 	//Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes)
   1039 	const size_t base_idx = (size <= SMALL_SIZE_LIMIT) ?
   1040 	                        ((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT) :
   1041 	                        SMALL_CLASS_COUNT + ((size - SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY - 1)) >> MEDIUM_GRANULARITY_SHIFT);
   1042 	assert(!base_idx || ((base_idx - 1) < SIZE_CLASS_COUNT));
   1043 	const size_t class_idx = _memory_size_class[base_idx ? (base_idx - 1) : 0].class_idx;
   1044 
   1045 	span_block_t* active_block = heap->active_block + class_idx;
   1046 	size_class_t* size_class = _memory_size_class + class_idx;
   1047 	const count_t class_size = size_class->size;
   1048 
   1049 	//Step 1: Try to get a block from the currently active span. The span block bookkeeping
   1050 	//        data for the active span is stored in the heap for faster access
   1051 use_active:
   1052 	if (active_block->free_count) {
   1053 		//Happy path, we have a span with at least one free block
   1054 		span_t* span = heap->active_span[class_idx];
   1055 		count_t offset = class_size * active_block->free_list;
   1056 		uint32_t* block = (uint32_t*)pointer_offset(span, SPAN_HEADER_SIZE + offset);
   1057 		assert(span);
   1058 
   1059 		--active_block->free_count;
   1060 		if (!active_block->free_count) {
   1061 			//Span is now completely allocated, set the bookkeeping data in the
   1062 			//span itself and reset the active span pointer in the heap
   1063 			span->data.block.free_count = 0;
   1064 			span->data.block.first_autolink = (uint16_t)size_class->block_count;
   1065 			heap->active_span[class_idx] = 0;
   1066 		}
   1067 		else {
   1068 			//Get the next free block, either from linked list or from auto link
   1069 			if (active_block->free_list < active_block->first_autolink) {
   1070 				active_block->free_list = (uint16_t)(*block);
   1071 			}
   1072 			else {
   1073 				++active_block->free_list;
   1074 				++active_block->first_autolink;
   1075 			}
   1076 			assert(active_block->free_list < size_class->block_count);
   1077 		}
   1078 
   1079 		return block;
   1080 	}
   1081 
   1082 	//Step 2: No active span, try executing deferred deallocations and try again if there
   1083 	//        was at least one of the requested size class
   1084 	if (_memory_deallocate_deferred(heap, class_idx)) {
   1085 		if (active_block->free_count)
   1086 			goto use_active;
   1087 	}
   1088 
   1089 	//Step 3: Check if there is a semi-used span of the requested size class available
   1090 	if (heap->size_cache[class_idx]) {
   1091 		//Promote a pending semi-used span to be active, storing bookkeeping data in
   1092 		//the heap structure for faster access
   1093 		span_t* span = heap->size_cache[class_idx];
   1094 		*active_block = span->data.block;
   1095 		assert(active_block->free_count > 0);
   1096 		heap->size_cache[class_idx] = span->next_span;
   1097 		heap->active_span[class_idx] = span;
   1098 
   1099 		//Mark span as owned by this heap
   1100 		atomic_store32(&span->heap_id, heap->id);
   1101 		atomic_thread_fence_release();
   1102 
   1103 		goto use_active;
   1104 	}
   1105 
   1106 	//Step 4: Find a span in one of the cache levels
   1107 	span_t* span = _memory_heap_cache_extract(heap, 1);
   1108 	if (!span) {
   1109 		//Step 5: Map in more virtual memory
   1110 		span = _memory_map_spans(heap, 1);
   1111 	}
   1112 
   1113 	//Mark span as owned by this heap and set base data
   1114 	assert(SPAN_COUNT(span->flags) == 1);
   1115 	span->size_class = (uint16_t)class_idx;
   1116 	atomic_store32(&span->heap_id, heap->id);
   1117 	atomic_thread_fence_release();
   1118 
   1119 	//If we only have one block we will grab it, otherwise
   1120 	//set span as new span to use for next allocation
   1121 	if (size_class->block_count > 1) {
   1122 		//Reset block order to sequential auto linked order
   1123 		active_block->free_count = (uint16_t)(size_class->block_count - 1);
   1124 		active_block->free_list = 1;
   1125 		active_block->first_autolink = 1;
   1126 		heap->active_span[class_idx] = span;
   1127 	}
   1128 	else {
   1129 		span->data.block.free_count = 0;
   1130 		span->data.block.first_autolink = (uint16_t)size_class->block_count;
   1131 	}
   1132 
   1133 	//Track counters
   1134 	_memory_counter_increase(&heap->span_counter[0], &_memory_max_allocation[0], 1);
   1135 
   1136 	//Return first block if memory page span
   1137 	return pointer_offset(span, SPAN_HEADER_SIZE);
   1138 }
   1139 
   1140 //! Allocate a large sized memory block from the given heap
   1141 static void*
   1142 _memory_allocate_large_from_heap(heap_t* heap, size_t size) {
   1143 	//Calculate number of needed max sized spans (including header)
   1144 	//Since this function is never called if size > LARGE_SIZE_LIMIT
   1145 	//the span_count is guaranteed to be <= LARGE_CLASS_COUNT
   1146 	size += SPAN_HEADER_SIZE;
   1147 	size_t span_count = size >> _memory_span_size_shift;
   1148 	if (size & (_memory_span_size - 1))
   1149 		++span_count;
   1150 	size_t idx = span_count - 1;
   1151 
   1152 #if ENABLE_THREAD_CACHE
   1153 	if (!heap->span_cache[idx])
   1154 		_memory_deallocate_deferred(heap, SIZE_CLASS_COUNT + idx);
   1155 #else
   1156 	_memory_deallocate_deferred(heap, SIZE_CLASS_COUNT + idx);
   1157 #endif
   1158 	//Step 1: Find span in one of the cache levels
   1159 	span_t* span = _memory_heap_cache_extract(heap, span_count);
   1160 	if (!span) {
   1161 		//Step 2: Map in more virtual memory
   1162 		span = _memory_map_spans(heap, span_count);
   1163 	}
   1164 
   1165 	//Mark span as owned by this heap and set base data
   1166 	assert((size_t)SPAN_COUNT(span->flags) == span_count);
   1167 	span->size_class = (uint16_t)(SIZE_CLASS_COUNT + idx);
   1168 	atomic_store32(&span->heap_id, heap->id);
   1169 	atomic_thread_fence_release();
   1170 
   1171 	//Increase counter
   1172 	_memory_counter_increase(&heap->span_counter[idx], &_memory_max_allocation[idx], span_count);
   1173 
   1174 	return pointer_offset(span, SPAN_HEADER_SIZE);
   1175 }
   1176 
   1177 //! Allocate a new heap
   1178 static heap_t*
   1179 _memory_allocate_heap(void) {
   1180 	void* raw_heap;
   1181 	void* next_raw_heap;
   1182 	uintptr_t orphan_counter;
   1183 	heap_t* heap;
   1184 	heap_t* next_heap;
   1185 	//Try getting an orphaned heap
   1186 	atomic_thread_fence_acquire();
   1187 	do {
   1188 		raw_heap = atomic_load_ptr(&_memory_orphan_heaps);
   1189 		heap = (heap_t*)(void*)((uintptr_t)raw_heap & _memory_page_mask);
   1190 		if (!heap)
   1191 			break;
   1192 		next_heap = heap->next_orphan;
   1193 		orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
   1194 		next_raw_heap = (void*)((uintptr_t)next_heap | (orphan_counter & ~_memory_page_mask));
   1195 	}
   1196 	while (!atomic_cas_ptr(&_memory_orphan_heaps, next_raw_heap, raw_heap));
   1197 
   1198 	if (!heap) {
   1199 		//Map in pages for a new heap
   1200 		size_t align_offset = 0;
   1201 		heap = (heap_t*)_memory_map((1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size, &align_offset);
   1202 		memset(heap, 0, sizeof(heap_t));
   1203 		heap->align_offset = align_offset;
   1204 
   1205 		//Get a new heap ID
   1206 		do {
   1207 			heap->id = atomic_incr32(&_memory_heap_id);
   1208 			if (_memory_heap_lookup(heap->id))
   1209 				heap->id = 0;
   1210 		} while (!heap->id);
   1211 
   1212 		//Link in heap in heap ID map
   1213 		size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
   1214 		do {
   1215 			next_heap = (heap_t*)atomic_load_ptr(&_memory_heaps[list_idx]);
   1216 			heap->next_heap = next_heap;
   1217 		} while (!atomic_cas_ptr(&_memory_heaps[list_idx], heap, next_heap));
   1218 	}
   1219 
   1220 #if ENABLE_THREAD_CACHE
   1221 	heap->span_counter[0].cache_limit = MIN_SPAN_CACHE_RELEASE + MIN_SPAN_CACHE_SIZE;
   1222 	for (size_t idx = 1; idx < LARGE_CLASS_COUNT; ++idx)
   1223 		heap->span_counter[idx].cache_limit = MIN_LARGE_SPAN_CACHE_RELEASE + MIN_LARGE_SPAN_CACHE_SIZE;
   1224 #endif
   1225 
   1226 	//Clean up any deferred operations
   1227 	_memory_unmap_deferred(heap, 0);
   1228 	_memory_deallocate_deferred(heap, 0);
   1229 
   1230 	return heap;
   1231 }
   1232 
   1233 //! Deallocate the given small/medium memory block from the given heap
   1234 static void
   1235 _memory_deallocate_to_heap(heap_t* heap, span_t* span, void* p) {
   1236 	//Check if span is the currently active span in order to operate
   1237 	//on the correct bookkeeping data
   1238 	assert(SPAN_COUNT(span->flags) == 1);
   1239 	const count_t class_idx = span->size_class;
   1240 	size_class_t* size_class = _memory_size_class + class_idx;
   1241 	int is_active = (heap->active_span[class_idx] == span);
   1242 	span_block_t* block_data = is_active ?
   1243 	                           heap->active_block + class_idx :
   1244 	                           &span->data.block;
   1245 
   1246 	//Check if the span will become completely free
   1247 	if (block_data->free_count == ((count_t)size_class->block_count - 1)) {
   1248 #if ENABLE_THREAD_CACHE
   1249 		//Track counters
   1250 		assert(heap->span_counter[0].current_allocations > 0);
   1251 		if (heap->span_counter[0].current_allocations)
   1252 			--heap->span_counter[0].current_allocations;
   1253 #endif
   1254 
   1255 		//If it was active, reset counter. Otherwise, if not active, remove from
   1256 		//partial free list if we had a previous free block (guard for classes with only 1 block)
   1257 		if (is_active)
   1258 			block_data->free_count = 0;
   1259 		else if (block_data->free_count > 0)
   1260 			_memory_span_list_doublelink_remove(&heap->size_cache[class_idx], span);
   1261 
   1262 		//Add to heap span cache
   1263 		_memory_heap_cache_insert(heap, span);
   1264 		return;
   1265 	}
   1266 
   1267 	//Check if first free block for this span (previously fully allocated)
   1268 	if (block_data->free_count == 0) {
   1269 		//add to free list and disable autolink
   1270 		_memory_span_list_doublelink_add(&heap->size_cache[class_idx], span);
   1271 		block_data->first_autolink = (uint16_t)size_class->block_count;
   1272 	}
   1273 	++block_data->free_count;
   1274 	//Span is not yet completely free, so add block to the linked list of free blocks
   1275 	void* blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
   1276 	count_t block_offset = (count_t)pointer_diff(p, blocks_start);
   1277 	count_t block_idx = block_offset / (count_t)size_class->size;
   1278 	uint32_t* block = (uint32_t*)pointer_offset(blocks_start, block_idx * size_class->size);
   1279 	*block = block_data->free_list;
   1280 	block_data->free_list = (uint16_t)block_idx;
   1281 }
   1282 
   1283 //! Deallocate the given large memory block from the given heap
   1284 static void
   1285 _memory_deallocate_large_to_heap(heap_t* heap, span_t* span) {
   1286 	//Decrease counter
   1287 	size_t idx = (size_t)span->size_class - SIZE_CLASS_COUNT;
   1288 	size_t span_count = idx + 1;
   1289 	assert((size_t)SPAN_COUNT(span->flags) == span_count);
   1290 	assert(span->size_class >= SIZE_CLASS_COUNT);
   1291 	assert(idx < LARGE_CLASS_COUNT);
   1292 #if ENABLE_THREAD_CACHE
   1293 	assert(heap->span_counter[idx].current_allocations > 0);
   1294 	if (heap->span_counter[idx].current_allocations)
   1295 		--heap->span_counter[idx].current_allocations;
   1296 #endif
   1297 	if (!heap->spans_reserved && (span_count > 1)) {
   1298 		//Split the span and store remainder as reserved spans
   1299 		//Must split to a dummy 1-span master since we cannot have master spans as reserved
   1300 		_memory_set_span_remainder_as_reserved(heap, span, 1);
   1301 		span_count = 1;
   1302 	}
   1303 
   1304 	//Insert into cache list
   1305 	_memory_heap_cache_insert(heap, span);
   1306 }
   1307 
   1308 //! Process pending deferred cross-thread deallocations
   1309 static int
   1310 _memory_deallocate_deferred(heap_t* heap, size_t size_class) {
   1311 	//Grab the current list of deferred deallocations
   1312 	atomic_thread_fence_acquire();
   1313 	void* p = atomic_load_ptr(&heap->defer_deallocate);
   1314 	if (!p || !atomic_cas_ptr(&heap->defer_deallocate, 0, p))
   1315 		return 0;
   1316 	//Keep track if we deallocate in the given size class
   1317 	int got_class = 0;
   1318 	do {
   1319 		void* next = *(void**)p;
   1320 		//Get span and check which type of block
   1321 		span_t* span = (span_t*)(void*)((uintptr_t)p & _memory_span_mask);
   1322 		if (span->size_class < SIZE_CLASS_COUNT) {
   1323 			//Small/medium block
   1324 			got_class |= (span->size_class == size_class);
   1325 			_memory_deallocate_to_heap(heap, span, p);
   1326 		}
   1327 		else {
   1328 			//Large block
   1329 			got_class |= ((span->size_class >= size_class) && (span->size_class <= (size_class + 2)));
   1330 			_memory_deallocate_large_to_heap(heap, span);
   1331 		}
   1332 		//Loop until all pending operations in list are processed
   1333 		p = next;
   1334 	} while (p);
   1335 	return got_class;
   1336 }
   1337 
   1338 //! Defer deallocation of the given block to the given heap
   1339 static void
   1340 _memory_deallocate_defer(int32_t heap_id, void* p) {
   1341 	//Get the heap and link in pointer in list of deferred operations
   1342 	heap_t* heap = _memory_heap_lookup(heap_id);
   1343 	if (!heap)
   1344 		return;
   1345 	void* last_ptr;
   1346 	do {
   1347 		last_ptr = atomic_load_ptr(&heap->defer_deallocate);
   1348 		*(void**)p = last_ptr; //Safe to use block, it's being deallocated
   1349 	} while (!atomic_cas_ptr(&heap->defer_deallocate, p, last_ptr));
   1350 }
   1351 
   1352 //! Allocate a block of the given size
   1353 static void*
   1354 _memory_allocate(size_t size) {
   1355 	if (size <= _memory_medium_size_limit)
   1356 		return _memory_allocate_from_heap(get_thread_heap(), size);
   1357 	else if (size <= LARGE_SIZE_LIMIT)
   1358 		return _memory_allocate_large_from_heap(get_thread_heap(), size);
   1359 
   1360 	//Oversized, allocate pages directly
   1361 	size += SPAN_HEADER_SIZE;
   1362 	size_t num_pages = size >> _memory_page_size_shift;
   1363 	if (size & (_memory_page_size - 1))
   1364 		++num_pages;
   1365 	size_t align_offset = 0;
   1366 	span_t* span = (span_t*)_memory_map(num_pages * _memory_page_size, &align_offset);
   1367 	atomic_store32(&span->heap_id, 0);
   1368 	//Store page count in next_span
   1369 	span->next_span = (span_t*)((uintptr_t)num_pages);
   1370 	span->data.list.align_offset = (uint16_t)align_offset;
   1371 
   1372 	return pointer_offset(span, SPAN_HEADER_SIZE);
   1373 }
   1374 
   1375 //! Deallocate the given block
   1376 static void
   1377 _memory_deallocate(void* p) {
   1378 	if (!p)
   1379 		return;
   1380 
   1381 	//Grab the span (always at start of span, using 64KiB alignment)
   1382 	span_t* span = (span_t*)(void*)((uintptr_t)p & _memory_span_mask);
   1383 	int32_t heap_id = atomic_load32(&span->heap_id);
   1384 	heap_t* heap = get_thread_heap();
   1385 	//Check if block belongs to this heap or if deallocation should be deferred
   1386 	if (heap_id == heap->id) {
   1387 		if (span->size_class < SIZE_CLASS_COUNT)
   1388 			_memory_deallocate_to_heap(heap, span, p);
   1389 		else
   1390 			_memory_deallocate_large_to_heap(heap, span);
   1391 	}
   1392 	else if (heap_id > 0) {
   1393 		_memory_deallocate_defer(heap_id, p);
   1394 	}
   1395 	else {
   1396 		//Oversized allocation, page count is stored in next_span
   1397 		size_t num_pages = (size_t)span->next_span;
   1398 		_memory_unmap(span, num_pages * _memory_page_size, span->data.list.align_offset, 1);
   1399 	}
   1400 }
   1401 
   1402 //! Reallocate the given block to the given size
   1403 static void*
   1404 _memory_reallocate(void* p, size_t size, size_t oldsize, unsigned int flags) {
   1405 	if (p) {
   1406 		//Grab the span using guaranteed span alignment
   1407 		span_t* span = (span_t*)(void*)((uintptr_t)p & _memory_span_mask);
   1408 		int32_t heap_id = atomic_load32(&span->heap_id);
   1409 		if (heap_id) {
   1410 			if (span->size_class < SIZE_CLASS_COUNT) {
   1411 				//Small/medium sized block
   1412 				size_class_t* size_class = _memory_size_class + span->size_class;
   1413 				if ((size_t)size_class->size >= size)
   1414 					return p; //Still fits in block, never mind trying to save memory
   1415 				if (!oldsize)
   1416 					oldsize = size_class->size;
   1417 			}
   1418 			else {
   1419 				//Large block
   1420 				size_t total_size = size + SPAN_HEADER_SIZE;
   1421 				size_t num_spans = total_size >> _memory_span_size_shift;
   1422 				if (total_size & (_memory_span_mask - 1))
   1423 					++num_spans;
   1424 				size_t current_spans = (span->size_class - SIZE_CLASS_COUNT) + 1;
   1425 				if ((current_spans >= num_spans) && (num_spans >= (current_spans / 2)))
   1426 					return p; //Still fits and less than half of memory would be freed
   1427 				if (!oldsize)
   1428 					oldsize = (current_spans * _memory_span_size) - SPAN_HEADER_SIZE;
   1429 			}
   1430 		}
   1431 		else {
   1432 			//Oversized block
   1433 			size_t total_size = size + SPAN_HEADER_SIZE;
   1434 			size_t num_pages = total_size >> _memory_page_size_shift;
   1435 			if (total_size & (_memory_page_size - 1))
   1436 				++num_pages;
   1437 			//Page count is stored in next_span
   1438 			size_t current_pages = (size_t)span->next_span;
   1439 			if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2)))
   1440 				return p; //Still fits and less than half of memory would be freed
   1441 			if (!oldsize)
   1442 				oldsize = (current_pages * _memory_page_size) - SPAN_HEADER_SIZE;
   1443 		}
   1444 	}
   1445 
   1446 	//Size is greater than block size, need to allocate a new block and deallocate the old
   1447 	//Avoid hysteresis by overallocating if increase is small (below 37%)
   1448 	size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
   1449 	void* block = _memory_allocate((size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size));
   1450 	if (p) {
   1451 		if (!(flags & RPMALLOC_NO_PRESERVE))
   1452 			memcpy(block, p, oldsize < size ? oldsize : size);
   1453 		_memory_deallocate(p);
   1454 	}
   1455 
   1456 	return block;
   1457 }
   1458 
   1459 //! Get the usable size of the given block
   1460 static size_t
   1461 _memory_usable_size(void* p) {
   1462 	//Grab the span using guaranteed span alignment
   1463 	span_t* span = (span_t*)(void*)((uintptr_t)p & _memory_span_mask);
   1464 	int32_t heap_id = atomic_load32(&span->heap_id);
   1465 	if (heap_id) {
   1466 		//Small/medium block
   1467 		if (span->size_class < SIZE_CLASS_COUNT)
   1468 			return _memory_size_class[span->size_class].size;
   1469 
   1470 		//Large block
   1471 		size_t current_spans = (span->size_class - SIZE_CLASS_COUNT) + 1;
   1472 		return (current_spans * _memory_span_size) - SPAN_HEADER_SIZE;
   1473 	}
   1474 
   1475 	//Oversized block, page count is stored in next_span
   1476 	size_t current_pages = (size_t)span->next_span;
   1477 	return (current_pages * _memory_page_size) - SPAN_HEADER_SIZE;
   1478 }
   1479 
   1480 //! Adjust and optimize the size class properties for the given class
   1481 static void
   1482 _memory_adjust_size_class(size_t iclass) {
   1483 	size_t block_size = _memory_size_class[iclass].size;
   1484 	size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
   1485 
   1486 	_memory_size_class[iclass].block_count = (uint16_t)block_count;
   1487 	_memory_size_class[iclass].class_idx = (uint16_t)iclass;
   1488 
   1489 	//Check if previous size classes can be merged
   1490 	size_t prevclass = iclass;
   1491 	while (prevclass > 0) {
   1492 		--prevclass;
   1493 		//A class can be merged if number of pages and number of blocks are equal
   1494 		if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count) {
   1495 			memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass]));
   1496 		}
   1497 		else {
   1498 			break;
   1499 		}
   1500 	}
   1501 }
   1502 
   1503 }
   1504 
   1505 #if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 )
   1506 #  include <windows.h>
   1507 #else
   1508 #  include <sys/mman.h>
   1509 #  include <sched.h>
   1510 #  ifndef MAP_UNINITIALIZED
   1511 #    define MAP_UNINITIALIZED 0
   1512 #  endif
   1513 #endif
   1514 #include <errno.h>
   1515 
   1516 namespace tracy
   1517 {
   1518 
   1519 //! Initialize the allocator and setup global data
   1520 int
   1521 rpmalloc_initialize(void) {
   1522 	memset(&_memory_config, 0, sizeof(rpmalloc_config_t));
   1523 	return rpmalloc_initialize_config(0);
   1524 }
   1525 
   1526 int
   1527 rpmalloc_initialize_config(const rpmalloc_config_t* config) {
   1528 	if (config)
   1529 		memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
   1530 
   1531 	if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
   1532 		_memory_config.memory_map = _memory_map_os;
   1533 		_memory_config.memory_unmap = _memory_unmap_os;
   1534 	}
   1535 
   1536 	_memory_page_size = _memory_config.page_size;
   1537 	if (!_memory_page_size) {
   1538 #if PLATFORM_WINDOWS
   1539 		SYSTEM_INFO system_info;
   1540 		memset(&system_info, 0, sizeof(system_info));
   1541 		GetSystemInfo(&system_info);
   1542 		_memory_page_size = system_info.dwPageSize;
   1543 		_memory_map_granularity = system_info.dwAllocationGranularity;
   1544 #else
   1545 		_memory_page_size = (size_t)sysconf(_SC_PAGESIZE);
   1546 		_memory_map_granularity = _memory_page_size;
   1547 #endif
   1548 	}
   1549 
   1550 	if (_memory_page_size < 512)
   1551 		_memory_page_size = 512;
   1552 	if (_memory_page_size > (16 * 1024))
   1553 		_memory_page_size = (16 * 1024);
   1554 
   1555 	_memory_page_size_shift = 0;
   1556 	size_t page_size_bit = _memory_page_size;
   1557 	while (page_size_bit != 1) {
   1558 		++_memory_page_size_shift;
   1559 		page_size_bit >>= 1;
   1560 	}
   1561 	_memory_page_size = ((size_t)1 << _memory_page_size_shift);
   1562 	_memory_page_mask = ~(uintptr_t)(_memory_page_size - 1);
   1563 
   1564 	size_t span_size = _memory_config.span_size;
   1565 	if (!span_size)
   1566 		span_size = (64 * 1024);
   1567 	if (span_size > (256 * 1024))
   1568 		span_size = (256 * 1024);
   1569 	_memory_span_size = 4096;
   1570 	_memory_span_size_shift = 12;
   1571 	while ((_memory_span_size < span_size) || (_memory_span_size < _memory_page_size)) {
   1572 		_memory_span_size <<= 1;
   1573 		++_memory_span_size_shift;
   1574 	}
   1575 	_memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
   1576 
   1577 	_memory_config.page_size = _memory_page_size;
   1578 	_memory_config.span_size = _memory_span_size;
   1579 
   1580 	if (!_memory_config.span_map_count)
   1581 		_memory_config.span_map_count = DEFAULT_SPAN_MAP_COUNT;
   1582 	if (_memory_config.span_size * _memory_config.span_map_count < _memory_config.page_size)
   1583 		_memory_config.span_map_count = (_memory_config.page_size / _memory_config.span_size);
   1584 	if (_memory_config.span_map_count > 128)
   1585 		_memory_config.span_map_count = 128;
   1586 
   1587 #if defined(__APPLE__) && ENABLE_PRELOAD
   1588 	if (pthread_key_create(&_memory_thread_heap, 0))
   1589 		return -1;
   1590 #endif
   1591 
   1592 	atomic_store32(&_memory_heap_id, 0);
   1593 	atomic_store32(&_memory_orphan_counter, 0);
   1594 	atomic_store32(&_memory_active_heaps, 0);
   1595 
   1596 	//Setup all small and medium size classes
   1597 	size_t iclass;
   1598 	for (iclass = 0; iclass < SMALL_CLASS_COUNT; ++iclass) {
   1599 		size_t size = (iclass + 1) * SMALL_GRANULARITY;
   1600 		_memory_size_class[iclass].size = (uint16_t)size;
   1601 		_memory_adjust_size_class(iclass);
   1602 	}
   1603 
   1604 	_memory_medium_size_limit = _memory_span_size - SPAN_HEADER_SIZE;
   1605 	if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
   1606 		_memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
   1607 	for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
   1608 		size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
   1609 		if (size > _memory_medium_size_limit)
   1610 			size = _memory_medium_size_limit;
   1611 		_memory_size_class[SMALL_CLASS_COUNT + iclass].size = (uint16_t)size;
   1612 		_memory_adjust_size_class(SMALL_CLASS_COUNT + iclass);
   1613 	}
   1614 
   1615 	//Initialize this thread
   1616 	rpmalloc_thread_initialize();
   1617 	return 0;
   1618 }
   1619 
   1620 //! Finalize the allocator
   1621 void
   1622 rpmalloc_finalize(void) {
   1623 	atomic_thread_fence_acquire();
   1624 
   1625 	rpmalloc_thread_finalize();
   1626 	//If you hit this assert, you still have active threads or forgot to finalize some thread(s)
   1627 	assert(atomic_load32(&_memory_active_heaps) == 0);
   1628 
   1629 	//Free all thread caches
   1630 	for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
   1631 		heap_t* heap = (heap_t*)atomic_load_ptr(&_memory_heaps[list_idx]);
   1632 		while (heap) {
   1633 			_memory_deallocate_deferred(heap, 0);
   1634 
   1635 			//Free span caches (other thread might have deferred after the thread using this heap finalized)
   1636 #if ENABLE_THREAD_CACHE
   1637 			for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
   1638 				if (heap->span_cache[iclass])
   1639 					_memory_unmap_span_list(0, heap->span_cache[iclass]);
   1640 			}
   1641 #endif
   1642 			heap = heap->next_heap;
   1643 		}
   1644 	}
   1645 
   1646 #if ENABLE_GLOBAL_CACHE
   1647 	//Free global caches
   1648 	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
   1649 		_memory_cache_finalize(&_memory_span_cache[iclass]);
   1650 #endif
   1651 
   1652 	for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
   1653 		heap_t* heap = (heap_t*)atomic_load_ptr(&_memory_heaps[list_idx]);
   1654 		atomic_store_ptr(&_memory_heaps[list_idx], 0);
   1655 		while (heap) {
   1656 			if (heap->spans_reserved) {
   1657 				span_t* span = heap->span_reserve;
   1658 				span_t* master = heap->span_reserve_master;
   1659 				uint32_t remains = SPAN_REMAINS(master->flags);
   1660 
   1661 				assert(master != span);
   1662 				assert(remains >= heap->spans_reserved);
   1663 				_memory_unmap(span, heap->spans_reserved * _memory_span_size, 0, 0);
   1664 				_memory_statistics_sub(&_reserved_spans, heap->spans_reserved);
   1665 				remains = ((uint32_t)heap->spans_reserved >= remains) ? 0 : (remains - (uint32_t)heap->spans_reserved);
   1666 				if (!remains) {
   1667 					uint32_t master_span_count = SPAN_COUNT(master->flags);
   1668 					_memory_statistics_sub(&_reserved_spans, master_span_count);
   1669 					_memory_unmap(master, master_span_count * _memory_span_size, master->data.list.align_offset, 1);
   1670 				}
   1671 				else {
   1672 					SPAN_SET_REMAINS(master->flags, remains);
   1673 				}
   1674 			}
   1675 
   1676 			_memory_unmap_deferred(heap, 0);
   1677 
   1678 			heap_t* next_heap = heap->next_heap;
   1679 			_memory_unmap(heap, (1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size, heap->align_offset, 1);
   1680 			heap = next_heap;
   1681 		}
   1682 	}
   1683 	atomic_store_ptr(&_memory_orphan_heaps, 0);
   1684 	atomic_thread_fence_release();
   1685 
   1686 #if ENABLE_STATISTICS
   1687 	//If you hit these asserts you probably have memory leaks or double frees in your code
   1688 	assert(!atomic_load32(&_mapped_pages));
   1689 	assert(!atomic_load32(&_reserved_spans));
   1690 #endif
   1691 
   1692 #if defined(__APPLE__) && ENABLE_PRELOAD
   1693 	pthread_key_delete(_memory_thread_heap);
   1694 #endif
   1695 }
   1696 
   1697 //! Initialize thread, assign heap
   1698 void
   1699 rpmalloc_thread_initialize(void) {
   1700 	if (!get_thread_heap()) {
   1701 		atomic_incr32(&_memory_active_heaps);
   1702 		heap_t* heap = _memory_allocate_heap();
   1703 #if ENABLE_STATISTICS
   1704 		heap->thread_to_global = 0;
   1705 		heap->global_to_thread = 0;
   1706 #endif
   1707 		set_thread_heap(heap);
   1708 	}
   1709 }
   1710 
   1711 //! Finalize thread, orphan heap
   1712 void
   1713 rpmalloc_thread_finalize(void) {
   1714 	heap_t* heap = get_thread_heap();
   1715 	if (!heap)
   1716 		return;
   1717 
   1718 	_memory_deallocate_deferred(heap, 0);
   1719 	_memory_unmap_deferred(heap, 0);
   1720 
   1721 	//Release thread cache spans back to global cache
   1722 #if ENABLE_THREAD_CACHE
   1723 	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
   1724 		span_t* span = heap->span_cache[iclass];
   1725 #if ENABLE_GLOBAL_CACHE
   1726 		const size_t span_count = iclass + 1;
   1727 		while (span) {
   1728 			assert((size_t)SPAN_COUNT(span->flags) == span_count);
   1729 			span_t* next = _memory_span_list_split(span, !iclass ? MIN_SPAN_CACHE_RELEASE : (MIN_LARGE_SPAN_CACHE_RELEASE / span_count));
   1730 			_memory_global_cache_insert(0, span);
   1731 			span = next;
   1732 		}
   1733 #else
   1734 		if (span)
   1735 			_memory_unmap_span_list(heap, span);
   1736 #endif
   1737 		heap->span_cache[iclass] = 0;
   1738 	}
   1739 #endif
   1740 
   1741 	//Orphan the heap
   1742 	void* raw_heap;
   1743 	uintptr_t orphan_counter;
   1744 	heap_t* last_heap;
   1745 	do {
   1746 		last_heap = (heap_t*)atomic_load_ptr(&_memory_orphan_heaps);
   1747 		heap->next_orphan = (heap_t*)(void*)((uintptr_t)last_heap & _memory_page_mask);
   1748 		orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
   1749 		raw_heap = (void*)((uintptr_t)heap | (orphan_counter & ~_memory_page_mask));
   1750 	}
   1751 	while (!atomic_cas_ptr(&_memory_orphan_heaps, raw_heap, last_heap));
   1752 
   1753 	set_thread_heap(0);
   1754 	atomic_add32(&_memory_active_heaps, -1);
   1755 }
   1756 
   1757 int
   1758 rpmalloc_is_thread_initialized(void) {
   1759 	return (get_thread_heap() != 0) ? 1 : 0;
   1760 }
   1761 
   1762 const rpmalloc_config_t*
   1763 rpmalloc_config(void) {
   1764 	return &_memory_config;
   1765 }
   1766 
   1767 //! Map new pages to virtual memory
   1768 static void*
   1769 _memory_map_os(size_t size, size_t* offset) {
   1770 	//Either size is a heap (a single page) or a (multiple) span - we only need to align spans
   1771 	size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0;
   1772 
   1773 #if PLATFORM_WINDOWS
   1774 	//Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
   1775 	void* ptr = VirtualAlloc(0, size + padding, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
   1776 	if (!ptr) {
   1777 		assert("Failed to map virtual memory block" && 0);
   1778 		return 0;
   1779 	}
   1780 #else
   1781 	void* ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED, -1, 0);
   1782 	if ((ptr == MAP_FAILED) || !ptr) {
   1783 		assert("Failed to map virtual memory block" && 0);
   1784 		return 0;
   1785 	}
   1786 #endif
   1787 
   1788 	if (padding) {
   1789 		size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
   1790 #if PLATFORM_POSIX
   1791 		//Unmap the last unused pages, for Windows this is done with the final VirtualFree with MEM_RELEASE call
   1792 		size_t remains = padding - final_padding;
   1793 		if (remains)
   1794 			munmap(pointer_offset(ptr, final_padding + size), remains);
   1795 #endif
   1796 		ptr = pointer_offset(ptr, final_padding);
   1797 		assert(final_padding <= _memory_span_size);
   1798 		assert(!(final_padding & 5));
   1799 		assert(!((uintptr_t)ptr & ~_memory_span_mask));
   1800 		*offset = final_padding >> 3;
   1801 		assert(*offset < 65536);
   1802 	}
   1803 
   1804 	return ptr;
   1805 }
   1806 
   1807 //! Unmap pages from virtual memory
   1808 static void
   1809 _memory_unmap_os(void* address, size_t size, size_t offset, int release) {
   1810 	assert(release || (offset == 0));
   1811 	if (release && offset) {
   1812 		offset <<= 3;
   1813 #if PLATFORM_POSIX
   1814 		size += offset;
   1815 #endif
   1816 		address = pointer_offset(address, -(int32_t)offset);
   1817 	}
   1818 #if PLATFORM_WINDOWS
   1819 	if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) {
   1820 		DWORD err = GetLastError();
   1821 		(void)err;
   1822 		assert("Failed to unmap virtual memory block" && 0);
   1823 	}
   1824 #else
   1825 	MEMORY_UNUSED(release);
   1826 	if (munmap(address, size)) {
   1827 		assert("Failed to unmap virtual memory block" && 0);
   1828 	}
   1829 #endif
   1830 }
   1831 
   1832 #if ENABLE_GUARDS
   1833 static void
   1834 _memory_guard_validate(void* p) {
   1835 	if (!p)
   1836 		return;
   1837 	void* block_start;
   1838 	size_t block_size = _memory_usable_size(p);
   1839 	span_t* span = (void*)((uintptr_t)p & _memory_span_mask);
   1840 	int32_t heap_id = atomic_load32(&span->heap_id);
   1841 	if (heap_id) {
   1842 		if (span->size_class < SIZE_CLASS_COUNT) {
   1843 			void* span_blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
   1844 			size_class_t* size_class = _memory_size_class + span->size_class;
   1845 			count_t block_offset = (count_t)pointer_diff(p, span_blocks_start);
   1846 			count_t block_idx = block_offset / (count_t)size_class->size;
   1847 			block_start = pointer_offset(span_blocks_start, block_idx * size_class->size);
   1848 		}
   1849 		else {
   1850 			block_start = pointer_offset(span, SPAN_HEADER_SIZE);
   1851 		}
   1852 	}
   1853 	else {
   1854 		block_start = pointer_offset(span, SPAN_HEADER_SIZE);
   1855 	}
   1856 	uint32_t* deadzone = block_start;
   1857 	//If these asserts fire, you have written to memory before the block start
   1858 	for (int i = 0; i < 8; ++i) {
   1859 		if (deadzone[i] != MAGIC_GUARD) {
   1860 			if (_memory_config.memory_overwrite)
   1861 				_memory_config.memory_overwrite(p);
   1862 			else
   1863 				assert("Memory overwrite before block start" && 0);
   1864 			return;
   1865 		}
   1866 		deadzone[i] = 0;
   1867 	}
   1868 	deadzone = (uint32_t*)pointer_offset(block_start, block_size - 32);
   1869 	//If these asserts fire, you have written to memory after the block end
   1870 	for (int i = 0; i < 8; ++i) {
   1871 		if (deadzone[i] != MAGIC_GUARD) {
   1872 			if (_memory_config.memory_overwrite)
   1873 				_memory_config.memory_overwrite(p);
   1874 			else
   1875 				assert("Memory overwrite after block end" && 0);
   1876 			return;
   1877 		}
   1878 		deadzone[i] = 0;
   1879 	}
   1880 }
   1881 #else
   1882 #define _memory_guard_validate(block)
   1883 #endif
   1884 
   1885 #if ENABLE_GUARDS
   1886 static void
   1887 _memory_guard_block(void* block) {
   1888 	if (block) {
   1889 		size_t block_size = _memory_usable_size(block);
   1890 		uint32_t* deadzone = block;
   1891 		deadzone[0] = deadzone[1] = deadzone[2] = deadzone[3] =
   1892 		deadzone[4] = deadzone[5] = deadzone[6] = deadzone[7] = MAGIC_GUARD;
   1893 		deadzone = (uint32_t*)pointer_offset(block, block_size - 32);
   1894 		deadzone[0] = deadzone[1] = deadzone[2] = deadzone[3] =
   1895 		deadzone[4] = deadzone[5] = deadzone[6] = deadzone[7] = MAGIC_GUARD;
   1896 	}
   1897 }
   1898 #define _memory_guard_pre_alloc(size) size += 64
   1899 #define _memory_guard_pre_realloc(block, size) block = pointer_offset(block, -32); size += 64
   1900 #define _memory_guard_post_alloc(block, size) _memory_guard_block(block); block = pointer_offset(block, 32); size -= 64
   1901 #else
   1902 #define _memory_guard_pre_alloc(size)
   1903 #define _memory_guard_pre_realloc(block, size)
   1904 #define _memory_guard_post_alloc(block, size)
   1905 #endif
   1906 
   1907 // Extern interface
   1908 
   1909 TRACY_API RPMALLOC_RESTRICT void*
   1910 rpmalloc(size_t size) {
   1911 #if ENABLE_VALIDATE_ARGS
   1912 	if (size >= MAX_ALLOC_SIZE) {
   1913 		errno = EINVAL;
   1914 		return 0;
   1915 	}
   1916 #endif
   1917 	_memory_guard_pre_alloc(size);
   1918 	void* block = _memory_allocate(size);
   1919 	_memory_guard_post_alloc(block, size);
   1920 	return block;
   1921 }
   1922 
   1923 TRACY_API void
   1924 rpfree(void* ptr) {
   1925 	_memory_guard_validate(ptr);
   1926 	_memory_deallocate(ptr);
   1927 }
   1928 
   1929 RPMALLOC_RESTRICT void*
   1930 rpcalloc(size_t num, size_t size) {
   1931 	size_t total;
   1932 #if ENABLE_VALIDATE_ARGS
   1933 #if PLATFORM_WINDOWS
   1934 	int err = SizeTMult(num, size, &total);
   1935 	if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
   1936 		errno = EINVAL;
   1937 		return 0;
   1938 	}
   1939 #else
   1940 	int err = __builtin_umull_overflow(num, size, &total);
   1941 	if (err || (total >= MAX_ALLOC_SIZE)) {
   1942 		errno = EINVAL;
   1943 		return 0;
   1944 	}
   1945 #endif
   1946 #else
   1947 	total = num * size;
   1948 #endif
   1949 	_memory_guard_pre_alloc(total);
   1950 	void* block = _memory_allocate(total);
   1951 	_memory_guard_post_alloc(block, total);
   1952 	memset(block, 0, total);
   1953 	return block;
   1954 }
   1955 
   1956 void*
   1957 rprealloc(void* ptr, size_t size) {
   1958 #if ENABLE_VALIDATE_ARGS
   1959 	if (size >= MAX_ALLOC_SIZE) {
   1960 		errno = EINVAL;
   1961 		return ptr;
   1962 	}
   1963 #endif
   1964 	_memory_guard_validate(ptr);
   1965 	_memory_guard_pre_realloc(ptr, size);
   1966 	void* block = _memory_reallocate(ptr, size, 0, 0);
   1967 	_memory_guard_post_alloc(block, size);
   1968 	return block;
   1969 }
   1970 
   1971 void*
   1972 rpaligned_realloc(void* ptr, size_t alignment, size_t size, size_t oldsize,
   1973                   unsigned int flags) {
   1974 #if ENABLE_VALIDATE_ARGS
   1975 	if ((size + alignment < size) || (alignment > _memory_page_size)) {
   1976 		errno = EINVAL;
   1977 		return 0;
   1978 	}
   1979 #endif
   1980 	void* block;
   1981 	if (alignment > 32) {
   1982 		block = rpaligned_alloc(alignment, size);
   1983 		if (!(flags & RPMALLOC_NO_PRESERVE))
   1984 			memcpy(block, ptr, oldsize < size ? oldsize : size);
   1985 		rpfree(ptr);
   1986 	}
   1987 	else {
   1988 		_memory_guard_validate(ptr);
   1989 		_memory_guard_pre_realloc(ptr, size);
   1990 		block = _memory_reallocate(ptr, size, oldsize, flags);
   1991 		_memory_guard_post_alloc(block, size);
   1992 	}
   1993 	return block;
   1994 }
   1995 
   1996 RPMALLOC_RESTRICT void*
   1997 rpaligned_alloc(size_t alignment, size_t size) {
   1998 	if (alignment <= 32)
   1999 		return rpmalloc(size);
   2000 
   2001 #if ENABLE_VALIDATE_ARGS
   2002 	if ((size + alignment < size) || (alignment > _memory_page_size)) {
   2003 		errno = EINVAL;
   2004 		return 0;
   2005 	}
   2006 #endif
   2007 
   2008 	void* ptr = rpmalloc(size + alignment);
   2009 	if ((uintptr_t)ptr & (alignment - 1))
   2010 		ptr = (void*)(((uintptr_t)ptr & ~((uintptr_t)alignment - 1)) + alignment);
   2011 	return ptr;
   2012 }
   2013 
   2014 RPMALLOC_RESTRICT void*
   2015 rpmemalign(size_t alignment, size_t size) {
   2016 	return rpaligned_alloc(alignment, size);
   2017 }
   2018 
   2019 int
   2020 rpposix_memalign(void **memptr, size_t alignment, size_t size) {
   2021 	if (memptr)
   2022 		*memptr = rpaligned_alloc(alignment, size);
   2023 	else
   2024 		return EINVAL;
   2025 	return *memptr ? 0 : ENOMEM;
   2026 }
   2027 
   2028 size_t
   2029 rpmalloc_usable_size(void* ptr) {
   2030 	size_t size = 0;
   2031 	if (ptr) {
   2032 		size = _memory_usable_size(ptr);
   2033 #if ENABLE_GUARDS
   2034 		size -= 64;
   2035 #endif
   2036 	}
   2037 	return size;
   2038 }
   2039 
   2040 void
   2041 rpmalloc_thread_collect(void) {
   2042 	heap_t* heap = get_thread_heap();
   2043 	_memory_unmap_deferred(heap, 0);
   2044 	_memory_deallocate_deferred(0, 0);
   2045 }
   2046 
   2047 void
   2048 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t* stats) {
   2049 	memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
   2050 	heap_t* heap = get_thread_heap();
   2051 	void* p = atomic_load_ptr(&heap->defer_deallocate);
   2052 	while (p) {
   2053 		void* next = *(void**)p;
   2054 		span_t* span = (span_t*)(void*)((uintptr_t)p & _memory_span_mask);
   2055 		stats->deferred += _memory_size_class[span->size_class].size;
   2056 		p = next;
   2057 	}
   2058 
   2059 	for (size_t isize = 0; isize < SIZE_CLASS_COUNT; ++isize) {
   2060 		if (heap->active_block[isize].free_count)
   2061 			stats->active += heap->active_block[isize].free_count * _memory_size_class[heap->active_span[isize]->size_class].size;
   2062 
   2063 		span_t* cache = heap->size_cache[isize];
   2064 		while (cache) {
   2065 			stats->sizecache = cache->data.block.free_count * _memory_size_class[cache->size_class].size;
   2066 			cache = cache->next_span;
   2067 		}
   2068 	}
   2069 
   2070 #if ENABLE_THREAD_CACHE
   2071 	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
   2072 		if (heap->span_cache[iclass])
   2073 			stats->spancache = (size_t)heap->span_cache[iclass]->data.list.size * (iclass + 1) * _memory_span_size;
   2074 	}
   2075 #endif
   2076 }
   2077 
   2078 void
   2079 rpmalloc_global_statistics(rpmalloc_global_statistics_t* stats) {
   2080 	memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
   2081 #if ENABLE_STATISTICS
   2082 	stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
   2083 	stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
   2084 	stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
   2085 #endif
   2086 #if ENABLE_GLOBAL_CACHE
   2087 	for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
   2088 		stats->cached += (size_t)atomic_load32(&_memory_span_cache[iclass].size) * (iclass + 1) * _memory_span_size;
   2089 	}
   2090 #endif
   2091 }
   2092 
   2093 }
   2094 
   2095 #ifdef _MSC_VER
   2096 #  pragma warning( pop )
   2097 #endif
   2098 
   2099 #endif