00001
00011 #ifndef TBCI_MALLOC_CACHE_H
00012 #define TBCI_MALLOC_CACHE_H
00013
00014 #define MALLOC_CACHE 1
00015
00016 #ifndef HAVE_NO_NEW_HEADERS_BUG
00017 # include <cstdio>
00018 #else
00019 # include "stdio.h"
00020 #endif
00021
00022 #ifdef TBCI_MALLOC_STATS
00023 # include <typeinfo>
00024 #endif
00025
00026 #ifdef HAVE_MALLOC_ATTR
00027 # define MALLOC __attribute__((malloc))
00028 #else
00029 # define MALLOC
00030 #endif
00031
00032 #ifdef HAVE_MALLOC_H // memalign
00033 # include <malloc.h>
00034 #endif
00035
00036 #if defined(SMP) && !defined(TBCI_MALLOC_NOLOCK)
00037 # define TBCI_MALLOC_LOCK 1
00038 # include "smp.h"
00039 #endif
00040
00041 #if defined(__GNUC__) && defined(SMP) && defined(TBCI_MALLOC_NOLOCK)
00042 # warning "SMP support but no locking in memalloc_cache. Don't do memory\
00043 de/allocations from threads; otherwise you face corruption!"
00044 #endif
00045
00046
00047
00048 #ifdef DEBUG_MALLOC
00049 # define DEBUG_PRINTF1(arg1) CSTD__ fprintf(stderr, arg1)
00050 # define DEBUG_PRINTF2(a1,a2) CSTD__ fprintf(stderr, a1, a2)
00051 # define DEBUG_PRINTF3(a1,a2,a3) CSTD__ fprintf(stderr, a1, a2, a3)
00052 # define DEBUG_PRINTF4(a1,a2,a3,a4) CSTD__ fprintf(stderr, a1, a2, a3, a4)
00053 #else
00054 # define DEBUG_PRINTF1(arg1) do {} while (0)
00055 # define DEBUG_PRINTF2(a1,a2) do {} while (0)
00056 # define DEBUG_PRINTF3(a1,a2,a3) do {} while (0)
00057 # define DEBUG_PRINTF4(a1,a2,a3,a4) do {} while (0)
00058 #endif
00059
00060 NAMESPACE_TBCI
00061 #ifndef TBCI_MALLOC_POOLSZ
00062 # define TBCI_MALLOC_POOLSZ 8
00063 #endif
00064 #ifndef TBCI_MALLOC_LIMIT
00065 # define TBCI_MALLOC_LIMIT 8192
00066 #endif
00067
00068 #if !defined(__GNUC__) && !defined(__BORLANDC__)
00069 # define DUMMY1 const T* const dummy
00070 # define DUMMY2(TYPE) const TYPE * const dummy
00071 # define DUMMY3(t) (t*)0
00072 # define FGD2
00073 #else // GCC || Borland
00074 # define DUMMY1
00075 # define DUMMY2(TYPE)
00076 # define DUMMY3(t)
00077 # define FGD2 FGDT
00078 #endif
00079 #ifndef MINLINE
00080 # define MINLINE inline
00081 #endif
00082
00083
00084 #if defined(__INTEL_COMPILER) || defined(_MSC_VER) || defined(__BORLANDC__) || (defined(_SGI_SOURCE) && defined(_COMPILER_VERSION) && !defined(__GNUC__))
00085 # define NO_SINGLETON
00086 # define NO_SINGLETON_PUBLIC public:
00087
00088 #else
00089 # define NO_SINGLETON_PUBLIC
00090 #endif
00091
00092 template <typename T> class tbci_memalloc;
00093 template <typename T> inline tbci_memalloc<T>& tbci_s_allocator (DUMMY1);
00094
00095
00100 template <typename T>
00101 class tbci_memalloc {
00102 protected:
00103
00104 NO_SINGLETON_PUBLIC
00105 tbci_memalloc() {}
00106 ~tbci_memalloc() {}
00107 public:
00108 friend tbci_memalloc<T>& tbci_s_allocator FGD2 (DUMMY1);
00109 static T* alloc(const unsigned long sz)
00110 { return new T[sz]; }
00111 static void dealloc(const T* const ptr, const unsigned long sz)
00112 { delete [] (T*)ptr; }
00113 };
00114
00115
00116 #ifndef TBCI_SIMD_ALIGN
00117 # define TBCI_SIMD_ALIGN 16
00118 #endif
00119 #ifndef TBCI_MALLOC_ALIGN_FACT
00120 # define TBCI_MALLOC_ALIGN_FACT 1
00121 #endif
00122
00123 #if !defined(__SSE2__) || defined(__x86_64__) || !defined(HAVE_MALLOC_H)
00124 # define _MC_NEW(T,SZ) new T[SZ]
00125 # define _MC_DELETE(T,PTR) delete[] (T*)PTR
00126 # ifndef __x86_64__
00127 # define SSE_VARS_MAY_BE_UNALIGNED
00128 # endif
00129 #else
00130 # define _MC_NEW(T,SZ) (T*)memalign(TBCI_SIMD_ALIGN*TBCI_MALLOC_ALIGN_FACT, SZ*sizeof(T))
00131 # define _MC_DELETE(T,PTR) free((T*)PTR)
00132 #endif
00133
00134 #if defined(__x86_64__) || defined(__i386__)
00135 # define INC(x) asm volatile ("lock incl %0" : "=m" (x): "m" (x));
00136 #else
00137 # define INC(x) ++x;
00138 #endif
00139
00168 #if defined(HAVE_TLS) && !defined(TBCI_MALLOC_FORCESPINLOCK)
00169 # define TBCI_MALLOC_NOSPINLOCK
00170 #endif
00171
00172 #if !defined(TBCI_MALLOC_NOSPINLOCK) && defined(HAVE_PTHREAD_SPINLOCK) && defined(TBCI_MALLOC_LOCK)
00173
00174 #define TBCI_MALLOC_SPINLOCK
00175 #include <errno.h>
00176
00177 #if defined(__i386__) || defined(__x86_64__)
00178 # define _cpu_relax() asm ("rep; nop")
00179 #else
00180 # define _cpu_relax()
00181 #endif
00182
00183 #ifndef TBCI_MAXLOCKTRIES
00184 # define TBCI_MAXLOCKTRIES 4
00185 #endif
00186
00187 #ifdef TBCI_MALLOC_STATS
00188 static volatile unsigned long locks_taken = 0;
00189 static volatile unsigned long locks_tried = 0;
00190 static volatile unsigned long locks_direct = 0;
00191 static volatile unsigned long locks_yield = 0;
00192 #endif
00193
00194
00195
00196
00197
00198
00199
00200 static inline void _spin_lock(pthread_spinlock_t* lock)
00201 {
00202 unsigned int tries = 0;
00203 int err;
00204 while (0 != (err = pthread_spin_trylock(lock))) {
00205 #ifdef PARANOIA
00206 if (UNLIKELY(err != EBUSY)) {
00207 fprintf(stderr, "LOCKING ERROR: %s\n", strerror(err));
00208 abort();
00209 }
00210 #endif
00211 if (0 == ++tries % TBCI_MAXLOCKTRIES) {
00212 #ifdef TBCI_MALLOC_STATS
00213 INC(locks_yield);
00214 #endif
00215 sched_yield();
00216 }
00217 _cpu_relax();
00218 }
00219 #ifdef TBCI_MALLOC_STATS
00220 INC(locks_taken);
00221 locks_tried += tries;
00222 if (!tries)
00223 INC(locks_direct);
00224 #endif
00225 }
00226
00227 static inline void _spin_unlock(pthread_spinlock_t* lock)
00228 {
00229 pthread_spin_unlock(lock);
00230 }
00231
00232 # define SMP_LOCK _spin_lock(&this->lock)
00233 # define SMP_UNLOCK _spin_unlock(&this->lock)
00234
00235 #else
00236 # define SMP_LOCK
00237 # define SMP_UNLOCK
00238 #endif
00239
00240 #if __WORDSIZE == 64
00241 template <typename T>
00242 struct tbci_memalloc_cache_tls
00243 {
00244 const T *free_pt[TBCI_MALLOC_POOLSZ];
00245 unsigned int free_sz[TBCI_MALLOC_POOLSZ];
00246 unsigned int free_idx;
00247
00248 volatile unsigned int hit, srch, all, miss;
00249 } ALIGN(64);
00250 #else
00251 template <typename T>
00252 struct tbci_memalloc_cache_tls
00253 {
00254 const T *free_pt[TBCI_MALLOC_POOLSZ];
00255 unsigned short free_sz[TBCI_MALLOC_POOLSZ];
00256 unsigned char free_idx;
00257 volatile unsigned int miss:24;
00258 volatile unsigned int hit, srch, all;
00259 } ALIGN(64);
00260 #endif
00261
00272 template <typename T>
00273 struct tbci_memalloc_cache
00274 {
00275 tbci_memalloc_cache_tls<T> *malloc_tls;
00276 #ifdef TBCI_MALLOC_SPINLOCK
00277 pthread_spinlock_t lock;
00278 #endif
00279 #ifdef SMP
00280 int nthr;
00281 #endif
00282
00283 int find_by_sz(const unsigned ln) const;
00284 void enter(const T* const ptr, const unsigned ln);
00285 void free_and_enter(const T* const ptr, const unsigned ln);
00286 void rmv(const unsigned idx);
00287 const T * find_and_rmv(const unsigned sz);
00288 T * alloc(const unsigned long) MALLOC;
00289 void dealloc(const T* const, const unsigned long);
00290 #ifdef SMP
00291 static void _init(tbci_memalloc_cache<T>* th, const int thr)
00292 {
00293 if (th->nthr >= thr)
00294 return;
00295 tbci_memalloc_cache_tls<T> *new_malloc_tls;
00296 new_malloc_tls = (tbci_memalloc_cache_tls<T>*)
00297 memalign(128, sizeof(tbci_memalloc_cache_tls<T>) *(thr+1));
00298 memcpy(new_malloc_tls, th->malloc_tls,
00299 sizeof(tbci_memalloc_cache_tls<T>) *(th->nthr+1));
00300 SWAP(new_malloc_tls, th->malloc_tls);
00301 th->nthr = thr;
00302 free(new_malloc_tls);
00303 th->smp_init();
00304 }
00305 static void _deinit(tbci_memalloc_cache<T>* th, const int thr)
00306 { th->smp_deinit(); }
00307 #endif
00308 void deinit(const int thr);
00309 void init(const int thr)
00310 {
00311 #ifdef SMP
00312 BCHK(thr > nthr, NumErr, thread malloc not allocated, thr, );
00313 #endif
00314 tbci_memalloc_cache_tls<T> * const pool = malloc_tls+thr;
00315 CSTD__ memset(pool, 0, sizeof(struct tbci_memalloc_cache_tls<T>));
00316 pool->free_idx = TBCI_MALLOC_POOLSZ-1;
00317 #ifdef TBCI_MALLOC_SPINLOCK
00318 pthread_spin_init(&lock, 0);
00319 #endif
00320
00321 }
00322 #ifdef SMP
00323 void smp_init()
00324 {
00325
00326
00327 for (int t = 1; t <= nthr; ++t)
00328 init(t);
00329 }
00330 #endif
00331 void smp_deinit()
00332 {
00333 #ifdef TBCI_MALLOC_STATS
00334 unsigned int hit, srch, all, miss;
00335 hit = srch = all = miss = 0;
00336 #endif
00337 #ifdef SMP
00338 if (nthr) {
00339
00340
00341 for (int t = nthr; t > 0; --t) {
00342 #ifdef TBCI_MALLOC_STATS
00343 hit += malloc_tls[t].hit; srch += malloc_tls[t].srch;
00344 all += malloc_tls[t].all; miss += malloc_tls[t].miss;
00345 #endif
00346 deinit(t);
00347 }
00348 nthr = 0; thrno = 0;
00349 }
00350 #endif
00351 #ifdef TBCI_MALLOC_STATS
00352 hit += malloc_tls[0].hit; srch += malloc_tls[0].srch;
00353 all += malloc_tls[0].all; miss += malloc_tls[0].miss;
00354 CSTD__ fprintf(stderr, "malloc_cache<%s>: %i hits, %i misses, %i searches, %i allocs\n",
00355 typeid(T).name(), hit, miss, srch, all);
00356 #ifdef TBCI_MALLOC_SPINLOCK
00357 CSTD__ fprintf(stderr, "malloc_cache<%s>: %lu locks, %lu tries, %lu direct, %lu yields\n",
00358 typeid(T).name(), locks_taken, locks_tried, locks_direct, locks_yield);
00359 #endif
00360 #endif
00361 }
00362 tbci_memalloc_cache()
00363 #ifdef SMP
00364 : nthr(num_threads)
00365 #endif
00366 {
00367
00368 malloc_tls = (tbci_memalloc_cache_tls<T>*)
00369 memalign(128, sizeof(tbci_memalloc_cache_tls<T>) *(num_threads+1));
00370 init(0);
00371 #ifdef SMP
00372 if (nthr) {
00373 smp_init();
00374
00375
00376 }
00377 thread_reg_callback((cbackfn*) &_init,
00378 (cbackfn*) &_deinit, (void*)this);
00379 #endif
00380 }
00381 ~tbci_memalloc_cache()
00382 {
00383 #ifdef SMP
00384 thread_dereg_callback((cbackfn*) &_init,
00385 (cbackfn*) &_deinit, (void*)this);
00386 #endif
00387 smp_deinit();
00388 deinit(0);
00389
00390 free(malloc_tls);
00391 }
00392 };
00393
00395 template <typename T>
00396 inline int tbci_memalloc_cache<T>::find_by_sz(const unsigned sz) const
00397 {
00398 DEBUG_PRINTF2("Search for free chunk of sz %i: ", sz);
00399 tbci_memalloc_cache_tls<T> * const pool = malloc_tls+thrno;
00400 #ifdef SMP
00401 BCHK(thrno > nthr, NumErr, thread malloc_cache too small, thrno, -1);
00402 #endif
00403 for (unsigned int _i = 1; _i <= TBCI_MALLOC_POOLSZ; ++_i) {
00404 const unsigned fidx = pool->free_idx;
00405 const unsigned int i = (fidx + _i) % TBCI_MALLOC_POOLSZ;
00406 DEBUG_PRINTF2("%i ", i);
00407 if (sz == pool->free_sz[i]) {
00408 DEBUG_PRINTF1("(found)\n");
00409 #ifdef TBCI_MALLOC_STATS
00410 ++pool->hit;
00411 #endif
00412 return i;
00413 }
00414 #ifdef TBCI_MALLOC_STATS
00415 ++pool->srch;
00416 #endif
00417 }
00418 DEBUG_PRINTF1("not found!\n");
00419 #ifdef TBCI_MALLOC_STATS
00420 ++pool->miss;
00421 #endif
00422 return -1;
00423 }
00424
00426 template <typename T>
00427 inline void tbci_memalloc_cache<T>::enter(const T* const ptr, const unsigned ln)
00428 {
00429 tbci_memalloc_cache_tls<T> * const pool = malloc_tls+thrno;
00430 const unsigned fidx = pool->free_idx;
00431 pool->free_pt[fidx] = ptr;
00432 pool->free_sz[fidx] = ln;
00433 pool->free_idx = (TBCI_MALLOC_POOLSZ + fidx - 1) % TBCI_MALLOC_POOLSZ;
00434 }
00435
00437 template <typename T>
00438 inline void tbci_memalloc_cache<T>::rmv(const unsigned idx)
00439 {
00440 tbci_memalloc_cache_tls<T> * const pool = malloc_tls+thrno;
00441 pool->free_sz[idx] = 0;
00442 pool->free_idx = idx;
00443 }
00444
00445 template <typename T>
00446 inline const T* tbci_memalloc_cache<T>::find_and_rmv(const unsigned sz)
00447 {
00448 SMP_LOCK;
00449 const T* ptr = 0;
00450 const int idx = find_by_sz(sz);
00451 if (LIKELY(idx >= 0)) {
00452 rmv(idx);
00453 ptr = malloc_tls[thrno].free_pt[idx];
00454 }
00455 SMP_UNLOCK;
00456 return ptr;
00457 }
00458
00459
00460 template <typename T>
00461 inline T* tbci_memalloc_cache<T>::alloc(const unsigned long sz)
00462 {
00463 BCHK(!sz, NumErr, alloc 0 bytes, sz, (T*)0);
00464 #ifdef TBCI_MALLOC_STATS
00465 ++malloc_tls[thrno].all;
00466 #endif
00467 #if defined(TBCI_MALLOC_LOCK) && !defined(TBCI_MALLOC_SPINLOCK)
00468
00469 if (0 && !ismainthread)
00470 return _MC_NEW(T, sz);
00471 #endif
00472 if (UNLIKELY (sz > TBCI_MALLOC_LIMIT/sizeof(T)))
00473 return _MC_NEW(T, sz);
00474
00475 const T* ptr = find_and_rmv(sz);
00476 if (!ptr)
00477 ptr = _MC_NEW(T, sz);
00478
00479 DEBUG_PRINTF3("Allocated %p (%li)\n", ptr, sz);
00480 return (T*) ptr;
00481 }
00482
00483 template <typename T>
00484 inline void tbci_memalloc_cache<T>::free_and_enter(const T* const ptr, const unsigned sz)
00485 {
00486 const T* tofree = 0;
00487 SMP_LOCK;
00488
00489 tbci_memalloc_cache_tls<T> * const pool = malloc_tls+thrno;
00490 const unsigned fidx = pool->free_idx;
00491 if (UNLIKELY(pool->free_sz[fidx] != 0))
00492 tofree = pool->free_pt[fidx];
00493 DEBUG_PRINTF4("\"Freed\" %p (%li) -> %i\n", ptr, sz, fidx);
00494 enter(ptr, sz);
00495 SMP_UNLOCK;
00496 if (LIKELY(!tofree))
00497 return;
00498 _MC_DELETE(T, tofree);
00499 }
00500
00501 template <typename T>
00502 inline void tbci_memalloc_cache<T>::dealloc(const T* const ptr, const unsigned long sz)
00503 {
00504 BCHK (!ptr||!sz, NumErr, dealloc null ptr or 0 bytes, (long)ptr, );
00505
00506 #ifdef MALLOC_POISON
00507 CSTD__ memset((void*)ptr, 0xa5, sz*sizeof(T));
00508 #endif
00509 #if defined(TBCI_MALLOC_LOCK) && !defined(TBCI_MALLOC_SPINLOCK)
00510 if (0 && !ismainthread) {
00511 _MC_DELETE(T, ptr); return;
00512 }
00513 #endif
00514
00515 if (UNLIKELY (sz > TBCI_MALLOC_LIMIT/sizeof(T)))
00516 _MC_DELETE(T, ptr);
00517 else
00518 free_and_enter(ptr, sz);
00519 }
00520
00521 template <typename T>
00522 inline void tbci_memalloc_cache<T>::deinit(const int thr)
00523 {
00524 DEBUG_PRINTF1("Freeing memory pool ...\n");
00525
00526 tbci_memalloc_cache_tls<T> * const pool = malloc_tls+thr;
00527
00528 for (int _i = 0; _i < TBCI_MALLOC_POOLSZ; ++_i) {
00529 if (pool->free_sz[_i]) {
00530 DEBUG_PRINTF3("Freeing mem %p (size %i)\n",
00531 pool->free_pt[_i], pool->free_sz[_i]);
00532 pool->free_sz[_i] = 0;
00533 _MC_DELETE(T, pool->free_pt[_i]);
00534 }
00535 }
00536 #ifdef TBCI_MALLOC_SPINLOCK
00537 pthread_spin_destroy(&lock);
00538 #endif
00539 }
00540
00543 #define SPECIALIZE_MEMALLOC_CLASS(TYPE) \
00544 template <> \
00545 class tbci_memalloc<TYPE > { \
00546 typedef TYPE T; \
00547 protected: \
00548 tbci_memalloc_cache<T > m_cache; \
00549 NO_SINGLETON_PUBLIC \
00550 tbci_memalloc() {} \
00551 ~tbci_memalloc() {} \
00552 public: \
00553 friend tbci_memalloc<T >& tbci_s_allocator FGD2 (DUMMY1); \
00554 T* alloc(const unsigned long sz) { return m_cache.alloc(sz); } \
00555 void dealloc(const T* const ptr, const unsigned long sz) \
00556 { m_cache.dealloc(ptr,sz); } \
00557 };
00558
00559
00560 template <typename T> \
00561 MINLINE tbci_memalloc<T>& tbci_s_allocator (DUMMY1)
00562 {
00563 static tbci_memalloc<T> _tbci_s_alloc_ANON;
00564 return _tbci_s_alloc_ANON;
00565 }
00566
00567 #if defined(__MINGW32__)
00568 #define SPECIALIZE_MEMALLOC(TYPE)
00569 #define SPECIALIZE_MEMALLOC2(TYPE,SHTP)
00570 #else
00571
00572 #define SPECIALIZE_MEMALLOC(TYPE) \
00573 SPECIALIZE_MEMALLOC_CLASS(TYPE) \
00574 template <> \
00575 MINLINE tbci_memalloc<TYPE >& tbci_s_allocator<TYPE > (DUMMY2(TYPE)) \
00576 { \
00577 static tbci_memalloc<TYPE > _tbci_s_alloc_##TYPE; \
00578 return _tbci_s_alloc_##TYPE; \
00579 }
00580
00581 #define SPECIALIZE_MEMALLOC2(TYPE,SHTP) \
00582 SPECIALIZE_MEMALLOC_CLASS(TYPE) \
00583 template <> \
00584 MINLINE tbci_memalloc<TYPE >& tbci_s_allocator<TYPE > (DUMMY2(TYPE)) \
00585 { \
00586 static tbci_memalloc<TYPE > _tbci_s_alloc_##SHTP; \
00587 return _tbci_s_alloc_##SHTP; \
00588 }
00589 #endif
00590
00591
00592
00593 #if 0 // defined(TBCI_SELECTIVE_INST) && !defined(TBCI_INSTANTIATE) && !defined(AUTO_DECL)
00594 # include "malloc_cache_gd.h"
00595 #else
00596
00597 SPECIALIZE_MEMALLOC(double)
00598 typedef double* doubleptr;
00599 SPECIALIZE_MEMALLOC(doubleptr)
00600 SPECIALIZE_MEMALLOC(float)
00601 typedef float* floatptr;
00602 SPECIALIZE_MEMALLOC(floatptr)
00603 SPECIALIZE_MEMALLOC(unsigned)
00604 typedef unsigned int* uintptr;
00605 SPECIALIZE_MEMALLOC(uintptr)
00606 SPECIALIZE_MEMALLOC(int)
00607 typedef int* intptr;
00608 SPECIALIZE_MEMALLOC(intptr)
00609 SPECIALIZE_MEMALLOC2(unsigned char, uchar)
00610 SPECIALIZE_MEMALLOC2(signed char, schar)
00611 typedef char* charptr;
00612 SPECIALIZE_MEMALLOC(charptr)
00613 typedef unsigned char* ucharptr;
00614 SPECIALIZE_MEMALLOC(ucharptr)
00615 SPECIALIZE_MEMALLOC(long)
00616 typedef long* longptr;
00617 SPECIALIZE_MEMALLOC(longptr)
00618 SPECIALIZE_MEMALLOC2(unsigned long, ulong)
00619 typedef unsigned long* ulongptr;
00620 SPECIALIZE_MEMALLOC(ulongptr)
00621 SPECIALIZE_MEMALLOC2(unsigned short, ushort)
00622 typedef void* voidptr;
00623 SPECIALIZE_MEMALLOC(voidptr)
00624
00625 #endif // TBCI_SELECTIVE_INST
00626
00627 #define NEW(t, s) tbci_s_allocator<t>(DUMMY3(t)).alloc(s)
00628 #define TBCIDELETE(t, v, sz) tbci_s_allocator<t>(DUMMY3(t)).dealloc(v, sz)
00629 #define TBCIDELETE_RO(t, v, sz) tbci_s_allocator<t>(DUMMY3(t)).dealloc(v, sz)
00630 #define REALLOC(v, os, t, s) do { \
00631 t *ptr = NEW(t, s); \
00632 if (LIKELY(v)) { \
00633 TBCICOPY(ptr, v, t, MIN(os, s));\
00634 TBCIDELETE(t, v, os); \
00635 } \
00636 v = ptr; \
00637 } while (0)
00638
00639 NAMESPACE_END
00640 #endif