thread.hpp - medfall - A super great game engine

thread.hpp (12569B)
      1 /*  Relacy Race Detector
      2  *  Copyright (c) 2008-2013, Dmitry S. Vyukov
      3  *  All rights reserved.
      4  *  This software is provided AS-IS with no warranty, either express or implied.
      5  *  This software is distributed under a license and may not be copied,
      6  *  modified or distributed except as expressly authorized under the
      7  *  terms of the license contained in the file LICENSE in this distribution.
      8  */
      9 
     10 #ifndef RL_THREAD_HPP
     11 #define RL_THREAD_HPP
     12 #ifdef _MSC_VER
     13 #   pragma once
     14 #endif
     15 
     16 #include "base.hpp"
     17 #include "context_base.hpp"
     18 #include "dyn_thread_ctx.hpp"
     19 #include "thread_base.hpp"
     20 #include "test_suite.hpp"
     21 #include "memory_order.hpp"
     22 #include "foreach.hpp"
     23 
     24 
     25 namespace rl
     26 {
     27 
     28 
     29 
     30 struct atomic_data;
     31 struct var_data;
     32 template<thread_id_t thread_count> struct atomic_data_impl;
     33 template<thread_id_t thread_count> struct var_data_impl;
     34 
     35 
     36 template<thread_id_t thread_count>
     37 struct thread_info : thread_info_base
     38 {
     39     thread_info(thread_id_t index = 0)
     40         : thread_info_base(index, acq_rel_order_)
     41     {
     42     }
     43 
     44     void iteration_begin()
     45     {
     46         sync_object_.iteration_begin();
     47         last_yield_ = 0;
     48         dynamic_thread_func_ = 0;
     49         dynamic_thread_param_ = 0;
     50         for (thread_id_t j = 0; j != thread_count; ++j)
     51         {
     52             acq_rel_order_[j] = 0;
     53         }
     54         acq_rel_order_[index_] = 1;
     55         temp_switch_from_ = -1;
     56         saved_disable_preemption_ = -1;
     57     }
     58 
     59     thread_sync_object<thread_count> sync_object_;
     60 
     61     timestamp_t acq_rel_order_ [thread_count];
     62     timestamp_t acquire_fence_order_ [thread_count];
     63     timestamp_t release_fence_order_ [thread_count];
     64 
     65 #ifdef RL_IMPROVED_SEQ_CST_FENCE
     66     timestamp_t imp_seq_cst_order_ [thread_count];
     67 #endif
     68 
     69     virtual void on_start()
     70     {
     71         RL_VERIFY(temp_switch_from_ == -1);
     72         RL_VERIFY(saved_disable_preemption_ == -1);
     73         sync_object_.on_start();
     74     }
     75 
     76     virtual void on_finish()
     77     {
     78         RL_VERIFY(temp_switch_from_ == -1);
     79         RL_VERIFY(saved_disable_preemption_ == -1);
     80         sync_object_.on_finish();
     81     }
     82 
     83     void atomic_thread_fence_acquire()
     84     {
     85         foreach<thread_count>(
     86             acq_rel_order_,
     87             acquire_fence_order_,
     88             &assign_max);
     89     }
     90 
     91     void atomic_thread_fence_release()
     92     {
     93         foreach<thread_count>(
     94             release_fence_order_,
     95             acq_rel_order_,
     96             &assign);
     97     }
     98 
     99     void atomic_thread_fence_acq_rel()
    100     {
    101         atomic_thread_fence_acquire();
    102         atomic_thread_fence_release();
    103     }
    104 
    105     void atomic_thread_fence_seq_cst(timestamp_t* seq_cst_fence_order)
    106     {
    107 #ifdef RL_IMPROVED_SEQ_CST_FENCE
    108         foreach<thread_count>(acq_rel_order_, imp_seq_cst_order_, assign_max);
    109 #endif
    110 
    111         atomic_thread_fence_acquire();
    112 
    113         foreach<thread_count>(
    114             acq_rel_order_,
    115             seq_cst_fence_order,
    116             &assign_max);
    117 
    118         foreach<thread_count>(
    119             seq_cst_fence_order,
    120             acq_rel_order_,
    121             &assign);
    122 
    123         atomic_thread_fence_release();
    124     }
    125 
    126     virtual ~thread_info() {} // just to calm down gcc
    127 
    128 private:
    129     thread_info(thread_info const&);
    130     thread_info& operator = (thread_info const&);
    131 
    132     virtual unsigned atomic_load_relaxed(atomic_data* RL_RESTRICT data)
    133     {
    134         return atomic_load<mo_relaxed, false>(data);
    135     }
    136 
    137     virtual unsigned atomic_load_acquire(atomic_data* RL_RESTRICT data)
    138     {
    139         return atomic_load<mo_acquire, false>(data);
    140     }
    141 
    142     virtual unsigned atomic_load_seq_cst(atomic_data* RL_RESTRICT data)
    143     {
    144         return atomic_load<mo_seq_cst, false>(data);
    145     }
    146 
    147     virtual unsigned atomic_load_relaxed_rmw(atomic_data* RL_RESTRICT data)
    148     {
    149         return atomic_load<mo_relaxed, true>(data);
    150     }
    151 
    152     virtual unsigned atomic_load_acquire_rmw(atomic_data* RL_RESTRICT data)
    153     {
    154         return atomic_load<mo_acquire, true>(data);
    155     }
    156 
    157     virtual unsigned atomic_load_seq_cst_rmw(atomic_data* RL_RESTRICT data)
    158     {
    159         return atomic_load<mo_seq_cst, true>(data);
    160     }
    161 
    162     virtual unsigned atomic_store_relaxed(atomic_data* RL_RESTRICT data)
    163     {
    164         return atomic_store<mo_relaxed, false>(data);
    165     }
    166 
    167     virtual unsigned atomic_store_release(atomic_data* RL_RESTRICT data)
    168     {
    169         return atomic_store<mo_release, false>(data);
    170     }
    171 
    172     virtual unsigned atomic_store_seq_cst(atomic_data* RL_RESTRICT data)
    173     {
    174         return atomic_store<mo_seq_cst, false>(data);
    175     }
    176 
    177     virtual unsigned atomic_rmw_relaxed(atomic_data* RL_RESTRICT data, bool& aba)
    178     {
    179         return atomic_rmw<mo_relaxed>(data, aba);
    180     }
    181 
    182     virtual unsigned atomic_rmw_acquire(atomic_data* RL_RESTRICT data, bool& aba)
    183     {
    184         return atomic_rmw<mo_acquire>(data, aba);
    185     }
    186 
    187     virtual unsigned atomic_rmw_release(atomic_data* RL_RESTRICT data, bool& aba)
    188     {
    189         return atomic_rmw<mo_release>(data, aba);
    190     }
    191 
    192     virtual unsigned atomic_rmw_acq_rel(atomic_data* RL_RESTRICT data, bool& aba)
    193     {
    194         return atomic_rmw<mo_acq_rel>(data, aba);
    195     }
    196 
    197     virtual unsigned atomic_rmw_seq_cst(atomic_data* RL_RESTRICT data, bool& aba)
    198     {
    199         return atomic_rmw<mo_seq_cst>(data, aba);
    200     }
    201 
    202     template<memory_order mo, bool rmw>
    203     unsigned get_load_index(atomic_data_impl<thread_count>& var)
    204     {
    205         typedef typename atomic_data_impl<thread_count>::history_record history_t;
    206 
    207         unsigned index = var.current_index_;
    208         context& c = ctx();
    209 
    210         if (false == val(rmw))
    211         {
    212             size_t const limit = c.is_random_sched() ? atomic_history_size  - 1: 1;
    213             for (size_t i = 0; i != limit; ++i, --index)
    214             {
    215                 history_t const& rec = var.history_[index % atomic_history_size];
    216                 if (false == rec.busy_)
    217                     return (unsigned)-1; // access to unitialized var
    218 
    219                 history_t const& prev = var.history_[(index - 1) % atomic_history_size];
    220                 if (prev.busy_ && prev.last_seen_order_[index_] <= last_yield_)
    221                     break;
    222 
    223                 if (mo_seq_cst == val(mo) && rec.seq_cst_)
    224                     break;
    225 
    226                 timestamp_t acq_rel_order =
    227                     acq_rel_order_[rec.thread_id_];
    228 
    229                 if (acq_rel_order >= rec.acq_rel_timestamp_)
    230                     break;
    231 
    232                 bool stop = false;
    233                 for (thread_id_t i = 0; i != thread_count; ++i)
    234                 {
    235                     timestamp_t acq_rel_order2 = acq_rel_order_[i];
    236                     if (acq_rel_order2 >= rec.last_seen_order_[i])
    237                     {
    238                         stop = true;
    239                         break;
    240                     }
    241                 }
    242                 if (stop)
    243                     break;
    244 
    245                 if (0 == c.rand(2, sched_type_atomic_load))
    246                     break;
    247             }
    248         }
    249 
    250         if (false == var.history_[index % atomic_history_size].busy_)
    251             return (unsigned)-1;
    252 
    253         return index;
    254     }
    255 
    256     template<memory_order mo, bool rmw>
    257     unsigned atomic_load(atomic_data* RL_RESTRICT data)
    258     {
    259         RL_VERIFY(mo_release != mo || rmw);
    260         RL_VERIFY(mo_acq_rel != mo || rmw);
    261 
    262         atomic_data_impl<thread_count>& var = 
    263             *static_cast<atomic_data_impl<thread_count>*>(data);
    264 
    265         typedef typename atomic_data_impl<thread_count>::history_record history_t;
    266 
    267         unsigned index = get_load_index<mo, rmw>(var);
    268         if ((unsigned)-1 == index)
    269             return (unsigned)-1;
    270 
    271         index %= atomic_history_size;
    272         history_t& rec = var.history_[index];
    273         RL_VERIFY(rec.busy_);
    274 
    275         own_acq_rel_order_ += 1;
    276         rec.last_seen_order_[index_] = own_acq_rel_order_;
    277 
    278         bool const synch =
    279             (mo_acquire == mo
    280             || mo_acq_rel == mo
    281             || mo_seq_cst == mo);
    282 
    283         timestamp_t* acq_rel_order = (synch ? acq_rel_order_ : acquire_fence_order_);
    284 
    285         foreach<thread_count>(acq_rel_order, rec.acq_rel_order_, assign_max);
    286 
    287         return index;
    288     }
    289 
    290     virtual unsigned atomic_init(atomic_data* RL_RESTRICT data)
    291     {
    292         atomic_data_impl<thread_count>& var = 
    293             *static_cast<atomic_data_impl<thread_count>*>(data);
    294 
    295         typedef typename atomic_data_impl<thread_count>::history_record history_t;
    296 
    297         unsigned const idx = ++var.current_index_ % atomic_history_size;
    298         history_t& rec = var.history_[idx];
    299 
    300         rec.busy_ = true;
    301         rec.thread_id_ = index_;
    302         rec.seq_cst_ = false;
    303         rec.acq_rel_timestamp_ = 0;
    304 
    305         foreach<thread_count>(rec.acq_rel_order_, assign_zero);
    306 
    307         return idx;
    308     }
    309 
    310     template<memory_order mo, bool rmw>
    311     unsigned atomic_store(atomic_data* RL_RESTRICT data)
    312     {
    313         RL_VERIFY(mo_consume != mo || rmw);
    314         RL_VERIFY(mo_acquire != mo || rmw);
    315         RL_VERIFY(mo_acq_rel != mo || rmw);
    316 
    317         atomic_data_impl<thread_count>& var = 
    318             *static_cast<atomic_data_impl<thread_count>*>(data);
    319 
    320         typedef typename atomic_data_impl<thread_count>::history_record history_t;
    321 
    322         unsigned const idx = ++var.current_index_ % atomic_history_size;
    323         history_t& rec = var.history_[idx];
    324 
    325         rec.busy_ = true;
    326         rec.thread_id_ = index_;
    327         rec.seq_cst_ = (mo_seq_cst == mo);
    328 
    329         own_acq_rel_order_ += 1;
    330         rec.acq_rel_timestamp_ = own_acq_rel_order_;
    331 
    332         foreach<thread_count>(rec.last_seen_order_, assign<(timestamp_t)-1>);
    333 
    334         rec.last_seen_order_[index_] = own_acq_rel_order_;
    335 
    336         unsigned const prev_idx = (var.current_index_ - 1) % atomic_history_size;
    337         history_t& prev = var.history_[prev_idx];
    338 
    339 #ifdef RL_IMPROVED_SEQ_CST_FENCE
    340         if (val(mo) == mo_release && val(rmw) == false)
    341             foreach<thread_count>(imp_seq_cst_order_, prev.acq_rel_order_, assign_max);
    342 #endif
    343 
    344         bool const synch = 
    345             (mo_release == mo
    346             || mo_acq_rel == mo
    347             || mo_seq_cst == mo);
    348 
    349         bool const preserve = 
    350             prev.busy_ && (rmw || (index_ == prev.thread_id_));
    351 
    352         timestamp_t* acq_rel_order = (synch ? acq_rel_order_ : release_fence_order_);
    353 
    354         if (preserve)
    355         {
    356             foreach<thread_count>(rec.acq_rel_order_, prev.acq_rel_order_, assign);
    357             foreach<thread_count>(rec.acq_rel_order_, acq_rel_order, assign_max);
    358         }
    359         else
    360         {
    361             foreach<thread_count>(rec.acq_rel_order_, acq_rel_order, assign);
    362         }
    363 
    364         return idx;
    365     }
    366 
    367     template<memory_order mo>
    368     unsigned atomic_rmw(atomic_data* RL_RESTRICT data, bool& aba)
    369     {
    370         atomic_data_impl<thread_count>& var = 
    371             *static_cast<atomic_data_impl<thread_count>*>(data);
    372         timestamp_t const last_seen = var.history_[var.current_index_ % atomic_history_size].last_seen_order_[index_];
    373         aba = (last_seen > own_acq_rel_order_);
    374         atomic_load<mo, true>(data);
    375         unsigned result = atomic_store<mo, true>(data);
    376 
    377 #ifdef RL_IMPROVED_SEQ_CST_RMW
    378         atomic_thread_fence_seq_cst(ctx_->seq_cst_fence_order_);
    379 #endif
    380 
    381         return result;
    382     }
    383 
    384     virtual unpark_reason atomic_wait(atomic_data* RL_RESTRICT data, bool is_timed, bool allow_spurious_wakeup, debug_info_param info)
    385     {
    386         context& c = ctx();
    387         atomic_data_impl<thread_count>& var = 
    388             *static_cast<atomic_data_impl<thread_count>*>(data);
    389         unpark_reason const res = var.futex_ws_.park_current(c, is_timed, allow_spurious_wakeup, false, info);
    390         if (res == unpark_reason_normal)
    391             var.futex_sync_.acquire(this);
    392         return res;
    393     }
    394 
    395     virtual thread_id_t atomic_wake(atomic_data* RL_RESTRICT data, thread_id_t count, debug_info_param info)
    396     {
    397         context& c = ctx();
    398         atomic_data_impl<thread_count>& var = 
    399             *static_cast<atomic_data_impl<thread_count>*>(data);
    400         thread_id_t unblocked = 0;
    401         for (; count != 0; count -= 1, unblocked += 1)
    402         {
    403             if (var.futex_ws_.unpark_one(c, info) == false)
    404                 break;
    405         }
    406         if (unblocked != 0)
    407             var.futex_sync_.release(this);
    408         return unblocked;
    409     }
    410 };
    411 
    412 
    413 }
    414 
    415 #endif