diff options
Diffstat (limited to 'nedmalloc.h')
-rw-r--r-- | nedmalloc.h | 1620 |
1 files changed, 1620 insertions, 0 deletions
diff --git a/nedmalloc.h b/nedmalloc.h new file mode 100644 index 0000000..ba699be --- /dev/null +++ b/nedmalloc.h @@ -0,0 +1,1620 @@ +/* nedalloc, an alternative malloc implementation for multiple threads without +lock contention based on dlmalloc v2.8.4. (C) 2005-2010 Niall Douglas + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NEDMALLOC_H +#define NEDMALLOC_H + +/*! \file nedmalloc.h +\brief Defines the functionality provided by nedalloc. +*/ + +/*! \mainpage + +<a href="../../Readme.html">Please see the Readme.html</a> +*/ + +/*! \def NEDMALLOC_DEBUG +\brief Defines the assertion checking performed by nedalloc + +NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc +than for the rest of the build. Remember to set NDEBUG to disable all assertion +checking too. +*/ + +/*! \def ENABLE_LARGE_PAGES +\brief Defines whether nedalloc uses large pages (>=2Mb) + +ENABLE_LARGE_PAGES enables support for requesting memory from the system in large +(typically >=2Mb) pages if the host OS supports this. These occupy just a single +TLB entry and can significantly improve performance in large working set applications. +*/ + +/*! \def ENABLE_FAST_HEAP_DETECTION +\brief Defines whether nedalloc takes platform specific shortcuts when detecting foreign blocks. + +ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated +by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc +blocks, but it assumes that the NT and glibc heaps function in a very specific +fashion which may not hold true across OS upgrades. +*/ + +/*! \def HAVE_CPP0XRVALUEREFS +\ingroup C++ +\brief Enables rvalue references + +Define to enable the usage of rvalue references which enables move semantics and +other things. Automatically defined if __cplusplus indicates a C++0x compiler, +otherwise you'll need to set it yourself. +*/ + +/*! \def HAVE_CPP0XVARIADICTEMPLATES +\ingroup C++ +\brief Enables variadic templates + +Define to enable the usage of variadic templates which enables the use of arbitrary +numbers of policies and other useful things. Automatically defined if __cplusplus +indicates a C++0x compiler, otherwise you'll need to set it yourself. +*/ + +/*! \def HAVE_CPP0XSTATICASSERT +\ingroup C++ +\brief Enables static assertions + +Define to enable the usage of static assertions. Automatically defined if __cplusplus +indicates a C++0x compiler, otherwise you'll need to set it yourself. +*/ + +/*! \def HAVE_CPP0XTYPETRAITS +\ingroup C++ +\brief Enables type traits + +Define to enable the usage of <type_traits>. Automatically defined if __cplusplus +indicates a C++0x compiler, otherwise you'll need to set it yourself. +*/ + +#if __cplusplus > 199711L || defined(HAVE_CPP0X) /* Do we have C++0x? */ +#undef HAVE_CPP0XRVALUEREFS +#define HAVE_CPP0XRVALUEREFS 1 +#undef HAVE_CPP0XVARIADICTEMPLATES +#define HAVE_CPP0XVARIADICTEMPLATES 1 +#undef HAVE_CPP0XSTATICASSERT +#define HAVE_CPP0XSTATICASSERT 1 +#undef HAVE_CPP0XTYPETRAITS +#define HAVE_CPP0XTYPETRAITS 1 +#endif + +#include <stddef.h> /* for size_t */ + +/*! \def NEDMALLOCEXTSPEC +\brief Defines how nedalloc's API is to be made visible. + +NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or +__attribute__ ((visibility("default"))) or whatever you like. It defaults +to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building +nedmalloc.dll. + */ +#ifndef NEDMALLOCEXTSPEC + #ifdef NEDMALLOC_DLL_EXPORTS + #ifdef WIN32 + #define NEDMALLOCEXTSPEC extern __declspec(dllexport) + #elif defined(__GNUC__) + #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default"))) + #endif + #ifndef ENABLE_TOLERANT_NEDMALLOC + #define ENABLE_TOLERANT_NEDMALLOC 1 + #endif + #else + #define NEDMALLOCEXTSPEC extern + #endif +#endif + +/*! \def NEDMALLOCDEPRECATED +\brief Defined to mark an API as deprecated */ +#ifndef NEDMALLOCDEPRECATED +#if defined(_MSC_VER) && !defined(__GCCXML__) + #define NEDMALLOCDEPRECATED __declspec(deprecated) +#elif defined(__GNUC__) && !defined(__GCCXML__) + #define NEDMALLOCDEPRECATED __attribute ((deprecated)) +#else +//! Marks a function as being deprecated + #define NEDMALLOCDEPRECATED +#endif +#endif + +/*! \def RESTRICT +\brief Defined to the restrict keyword or equivalent if available */ +#ifndef RESTRICT +#if __STDC_VERSION__ >= 199901L /* C99 or better */ + #define RESTRICT restrict +#else + #if defined(_MSC_VER) && _MSC_VER>=1400 + #define RESTRICT __restrict + #endif + #ifdef __GNUC__ + #define RESTRICT __restrict + #endif +#endif +#ifndef RESTRICT + #define RESTRICT +#endif +#endif + +#if defined(_MSC_VER) && _MSC_VER>=1400 + #define NEDMALLOCPTRATTR __declspec(restrict) + #define NEDMALLOCNOALIASATTR __declspec(noalias) +#endif +#ifdef __GNUC__ + #define NEDMALLOCPTRATTR __attribute__ ((malloc)) +#endif +/*! \def NEDMALLOCPTRATTR +\brief Defined to the specifier for a pointer which points to a memory block. Like NEDMALLOCNOALIASATTR, but sadly not identical. */ +#ifndef NEDMALLOCPTRATTR + #define NEDMALLOCPTRATTR +#endif +/*! \def NEDMALLOCNOALIASATTR +\brief Defined to the specifier for a pointer which does not alias any other variable. */ +#ifndef NEDMALLOCNOALIASATTR + #define NEDMALLOCNOALIASATTR +#endif + +/*! \def USE_MAGIC_HEADERS +\brief Defines whether nedalloc should use magic headers in foreign heap block detection + +USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t) +to each block. nedpfree() and nedprealloc() can then automagically know when +to free a system allocated block. Enabling this typically adds 20-50% to +application memory usage, and is mandatory if USE_ALLOCATOR is not 1. +*/ +#ifndef USE_MAGIC_HEADERS + #define USE_MAGIC_HEADERS 0 +#endif + +/*! \def USE_ALLOCATOR +\brief Defines the underlying allocator to use + +USE_ALLOCATOR can be one of these settings (it defaults to 1): + 0: System allocator (nedmalloc now simply acts as a threadcache) which is + very useful for testing with valgrind and Glowcode. + WARNING: Intended for DEBUG USE ONLY - not all functions work correctly. + 1: dlmalloc +*/ +#ifndef USE_ALLOCATOR + #define USE_ALLOCATOR 1 /* dlmalloc */ +#endif + +#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS +#error If you are using the system allocator then you MUST use magic headers +#endif + +/*! \def REPLACE_SYSTEM_ALLOCATOR +\brief Defines whether to replace the system allocator (malloc(), free() et al) with nedalloc's implementation. + +REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called +malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want +this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries +to replace usage of the system allocator. + +Always turns on ENABLE_TOLERANT_NEDMALLOC. +*/ +#ifdef REPLACE_SYSTEM_ALLOCATOR + #if USE_ALLOCATOR==0 + #error Cannot combine using the system allocator with replacing the system allocator + #endif + #ifndef ENABLE_TOLERANT_NEDMALLOC + #define ENABLE_TOLERANT_NEDMALLOC 1 + #endif + #ifndef WIN32 /* We have a dedicated patcher for Windows */ + #define nedmalloc malloc + #define nedmalloc2 malloc2 + #define nedcalloc calloc + #define nedrealloc realloc + #define nedrealloc2 realloc2 + #define nedfree free + #define nedfree2 free2 + #define nedmemalign memalign + #define nedmallinfo mallinfo + #define nedmallopt mallopt + #define nedmalloc_trim malloc_trim + #define nedmalloc_stats malloc_stats + #define nedmalloc_footprint malloc_footprint + #define nedindependent_calloc independent_calloc + #define nedindependent_comalloc independent_comalloc + #ifdef __GNUC__ + #define nedmemsize malloc_usable_size + #endif + #endif +#endif + +/*! \def ENABLE_TOLERANT_NEDMALLOC +\brief Defines whether nedalloc should check for blocks from the system allocator. + +ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR +is set or the Windows DLL is being built. This causes nedmalloc to detect when a +system allocator block is passed to it and to handle it appropriately. Note that +without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault +on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there +is no comparable system on POSIX). +*/ + +#if defined(__cplusplus) +extern "C" { +#endif +/*! \brief Returns information about a memory pool */ +struct nedmallinfo { + size_t arena; /*!< non-mmapped space allocated from system */ + size_t ordblks; /*!< number of free chunks */ + size_t smblks; /*!< always 0 */ + size_t hblks; /*!< always 0 */ + size_t hblkhd; /*!< space in mmapped regions */ + size_t usmblks; /*!< maximum total allocated space */ + size_t fsmblks; /*!< always 0 */ + size_t uordblks; /*!< total allocated space */ + size_t fordblks; /*!< total free space */ + size_t keepcost; /*!< releasable (via malloc_trim) space */ +}; +#if defined(__cplusplus) +} +#endif + +/*! \def NO_NED_NAMESPACE +\brief Defines the use of the nedalloc namespace for the C functions. + +NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc +namespace when in C++ (uses the global C namespace instead). +*/ +/*! \def THROWSPEC +\brief Defined to throw() or noexcept(true) (as in, throws nothing) under C++, otherwise nothing. +*/ +#if defined(__cplusplus) + #if !defined(NO_NED_NAMESPACE) +namespace nedalloc { + #else +extern "C" { + #endif + #if __cplusplus > 199711L + #define THROWSPEC noexcept(true) + #else + #define THROWSPEC throw() + #endif +#else + #define THROWSPEC +#endif + +/* These are the global functions */ + +/*! \defgroup v2malloc The v2 malloc API + +\warning This API is being completely retired in v1.10 beta 2 and replaced with the API +being developed for inclusion into the C1X programming language standard + +For the v1.10 release which was generously sponsored by +<a href="http://www.ara.com/" target="_blank">Applied Research Associates (USA)</a>, +a new general purpose allocator API was designed which is intended to remedy many +of the long standing problems and inefficiencies introduced by the ISO C allocator +API. Internally nedalloc's implementations of nedmalloc(), nedcalloc(), nedmemalign() +and nedrealloc() call into this API: + +<ul> + <li><code>void* malloc2(size_t bytes, size_t alignment, unsigned flags)</code></li> + <li><code>void* realloc2(void* mem, size_t bytes, size_t alignment, unsigned + flags)</code></li> + <li><code>void free2(void* mem, unsigned flags)</code></li> +</ul> + +If nedmalloc.h is being included by C++ code, the alignment and flags parameters +default to zero which makes the new API identical to the old API (roll on the introduction +of default parameters to C!). The ability for realloc2() to take an alignment is +<em>particularly</em> useful for extending aligned vector arrays such as SSE/AVX +vector arrays. Hitherto SSE/AVX vector code had to jump through all sorts of unpleasant +hoops to maintain alignment :(. + +Note that using any of these flags other than M2_ZERO_MEMORY or any alignment +other than zero inhibits the threadcache. + +Currently MREMAP support is limited to Linux and Windows. Patches implementing +support for other platforms are welcome. + +On Linux the non portable mremap() kernel function is currently used, so in fact +the M2_RESERVE_* options are currently ignored. + +On Windows, there are two different MREMAP implementations which are chosen according +to whether a 32 bit or a 64 bit build is being performed. The 32 bit implementation +is based on Win32 file mappings where it reserves the address space within the Windows +VM system, so you can safely specify silly reservation quantities like 2Gb per block +and not exhaust local process address space. Note however that on x86 this costs +2Kb (1Kb if PAE is off) of kernel memory per Mb reserved, and as kernel memory has +a hard limit of 447Mb on x86 you will find the total address space reservable in +the system is limited. On x64, or if you define WIN32_DIRECT_USE_FILE_MAPPINGS=0 +on x86, a much faster implementation of using VirtualAlloc(MEM_RESERVE) to directly +reserve the address space is used. + +When using M2_RESERVE_* with realloc2(), the setting only takes effect when the +mmapped chunk has exceeded its reservation space and a new reservation space needs +to be created. +*/ + +#ifndef M2_FLAGS_DEFINED +#define M2_FLAGS_DEFINED + +/*! \def M2_ZERO_MEMORY +\ingroup v2malloc +\brief Sets the contents of the allocated block (or any increase in the allocated +block) to zero. + +Note that this zeroes only the increase from what dlmalloc thinks +the chunk's size is, so if you realloc2() a block which wasn't allocated using +malloc2() using this flag then you may have garbage just before the newly extended +space. + +\li <strong>Rationale:</strong> Memory returned by the system is guaranteed to +be zero on most platforms, and hence dlmalloc knows when it can skip zeroing +memory. This improves performance. +*/ +#define M2_ZERO_MEMORY (1<<0) + +/*! \def M2_PREVENT_MOVE +\ingroup v2malloc +\brief Cause realloc2() to attempt to extend a block in place, but to never move +it. + +\li <strong>Rationale:</strong> C++ makes almost no use of realloc(), even for +contiguous arrays such as std::vector<> because most C++ objects cannot be relocated +in memory without a copy or rvalue construction (though some clever STL implementations +specialise for Plain Old Data (POD) types, and use realloc() then and only then). +This flag allows C++ containers to speculatively try to extend in place, thus +improving performance <em>especially</em> for large allocations which will use +mmap(). +*/ +#define M2_PREVENT_MOVE (1<<1) + +/*! \def M2_ALWAYS_MMAP +\ingroup v2malloc +\brief Always allocate as though mmap_threshold were being exceeded. + +In the case of realloc2(), note that setting this bit will not necessarily mmap a chunk +which isn't already mmapped, but it will force a mmapped chunk if new memory +needs allocating. + +\li <strong>Rationale:</strong> If you know that an array you are allocating +is going to be repeatedly extended up into the hundred of kilobytes range, then +you can avoid the constant memory copying into larger blocks by specifying this +flag at the beginning along with one of the M2_RESERVE_* flags below. This can +<strong>greatly</strong> improve performance for large arrays. +*/ +#define M2_ALWAYS_MMAP (1<<2) +#define M2_RESERVED1 (1<<3) +#define M2_RESERVED2 (1<<4) +#define M2_RESERVED3 (1<<5) +#define M2_RESERVED4 (1<<6) +#define M2_RESERVED5 (1<<7) +#define M2_RESERVE_ISMULTIPLIER (1<<15) +/* 7 bits is given to the address reservation specifier. +This lets you set a multiplier (bit 15 set) or a 1<< shift value. +*/ +#define M2_RESERVE_MASK 0x00007f00 + +/*! \def M2_RESERVE_MULT(n) +\ingroup v2malloc +\brief Reserve n times as much address space such that mmapped realloc2(size <= +n * original size) avoids memory copying and hence is much faster. +*/ +#define M2_RESERVE_MULT(n) (M2_RESERVE_ISMULTIPLIER|(((n)<<8)&M2_RESERVE_MASK)) + +/*! \def M2_RESERVE_SHIFT(n) +\ingroup v2malloc +\brief Reserve (1<<n) bytes of address space such that mmapped realloc2(size <= +(1<<n)) avoids memory copying and hence is much faster. +*/ +#define M2_RESERVE_SHIFT(n) (((n)<<8)&M2_RESERVE_MASK) +#define M2_FLAGS_MASK 0x0000ffff +#define M2_CUSTOM_FLAGS_BEGIN (1<<16) +#define M2_CUSTOM_FLAGS_MASK 0xffff0000 + +/*! \def NM_SKIP_TOLERANCE_CHECKS +\ingroup v2malloc +\brief Causes nedmalloc to not inspect the block being passed to see if it belongs +to the system allocator. Can improve speed by up to 10%. +*/ +#define NM_SKIP_TOLERANCE_CHECKS (1<<31) +#endif /* M2_FLAGS_DEFINED */ + + +#if defined(__cplusplus) +/*! \brief Gets the usable size of an allocated block. + +Note this will always be bigger than what was +asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the +system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows +systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS. +*/ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem, unsigned flags=0) THROWSPEC; +#else +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem, unsigned flags) THROWSPEC; +#endif +/*! \brief Identical to nedblksize() except without the isforeign */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmemsize(void *RESTRICT mem) THROWSPEC; + +/*! \brief Equivalent to nedpsetvalue((nedpool *) 0, v) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC; + +/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, size, 0, 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC; +/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, no*size, 0, M2_ZERO_MEMORY) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC; +/*! \brief Equivalent to nedprealloc2((nedpool *) 0, size, mem, size, 0, M2_RESERVE_MULT(8)) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC; +/*! \brief Equivalent to nedpfree2((nedpool *) 0, mem, 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC; +/*! \brief Equivalent to nedpmalloc2((nedpool *) 0, size, alignment, 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC; + +#if defined(__cplusplus) +/*! \ingroup v2malloc +\brief Equivalent to nedpmalloc2((nedpool *) 0, size, alignment, flags) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc2(size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC; +/*! \ingroup v2malloc +\brief Equivalent to nedprealloc2((nedpool *) 0, mem, size, alignment, flags) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc2(void *mem, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC; +/*! \ingroup v2malloc +\brief Equivalent to nedpfree2((nedpool *) 0, mem, flags) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree2(void *mem, unsigned flags=0) THROWSPEC; +#else +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc2(size_t size, size_t alignment, unsigned flags) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc2(void *mem, size_t size, size_t alignment, unsigned flags) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree2(void *mem, unsigned flags) THROWSPEC; +#endif + +/*! \brief Equivalent to nedpmallinfo((nedpool *) 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC; +/*! \brief Equivalent to nedpmallopt((nedpool *) 0, parno, value) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC; +/*! \brief Returns the internal allocation granularity and the magic header XOR used for internal consistency checks. */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC; +/*! \brief Equivalent to nedpmalloc_trim((nedpool *) 0, pad) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC; +/*! \brief Equivalent to nedpmalloc_stats((nedpool *) 0) */ +NEDMALLOCEXTSPEC void nedmalloc_stats(void) THROWSPEC; +/*! \brief Equivalent to nedpmalloc_footprint((nedpool *) 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC; +/*! \brief Equivalent to nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; +/*! \brief Equivalent to nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC; + +/*! \brief Destroys the system memory pool used by the functions above. + +Useful for when you have nedmalloc in a DLL you're about to unload. +If you call ANY nedmalloc functions after calling this you will +get a fatal exception! +*/ +NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC; + +/*! \brief A nedpool type */ +struct nedpool_t; +/*! \brief A nedpool type */ +typedef struct nedpool_t nedpool; + +/*! \brief Creates a memory pool for use with the nedp* functions below. + +Capacity is how much to allocate immediately (if you know you'll be allocating a lot +of memory very soon) which you can leave at zero. Threads specifies how many threads +will *normally* be accessing the pool concurrently. Setting this to zero means it +extends on demand, but be careful of this as it can rapidly consume system resources +where bursts of concurrent threads use a pool at once. +*/ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC; + +/*! \brief Destroys a memory pool previously created by nedcreatepool(). +*/ +NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC; + +/*! \brief Returns a zero terminated snapshot of threadpools existing at the time of call. + +Call nedfree() on the returned list when you are done. Returns zero if there is only the +system pool in existence. +*/ +NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC; + +/*! \brief Sets a value to be associated with a pool. + +You can retrieve this value by passing any memory block allocated from that pool. +*/ +NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC; + +/*! \brief Gets a previously set value using nedpsetvalue() or zero if memory is unknown. + +Optionally can also retrieve pool. You can detect an unknown block by the return +being zero and *p being unmodifed. +*/ +NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC; + +/*! \brief Trims the thread cache for the calling thread, returning any existing cache +data to the central pool. + +Remember to ALWAYS call with zero if you used the system pool. Setting disable to +non-zero replicates neddisablethreadcache(). +*/ +NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC; + +/*! \brief Disables the thread cache for the calling thread, returning any existing cache +data to the central pool. + +Remember to ALWAYS call with zero if you used the system pool. +*/ +NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC; + +/*! \brief Releases all memory in all threadcaches in the pool, and writes all +accumulated memory operations to the log if enabled. + +You can pass zero for filepath to use the compiled default, or else a char[MAX_PATH] +containing the path you wish to use for the log file. The log file is always +appended to if it already exists. After writing the logs, the logging ability +is disabled for that pool. + +\warning Do NOT call this if the pool is in use - this call is NOT threadsafe. +*/ +NEDMALLOCEXTSPEC size_t nedflushlogs(nedpool *p, char *filepath) THROWSPEC; + + +/*! \brief Equivalent to nedpmalloc2(p, size, 0, 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC; +/*! \brief Equivalent to nedpmalloc2(p, no*size, 0, M2_ZERO_MEMORY) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC; +/*! \brief Equivalent to nedprealloc2(p, mem, size, 0, M2_RESERVE_MULT(8)) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC; +/*! \brief Equivalent to nedpfree2(p, mem, 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedpfree(nedpool *p, void *mem) THROWSPEC; +/*! \brief Equivalent to nedpmalloc2(p, bytes, alignment, 0) */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC; +#if defined(__cplusplus) +/*! \ingroup v2malloc +\brief Allocates a block of memory sized \em size from pool \em p, aligned to \em alignment and according to the flags \em flags. +*/ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc2(nedpool *p, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC; +/*! \ingroup v2malloc +\brief Resizes the block of memory at \em mem in pool \em p to size \em size, aligned to \em alignment and according to the flags \em flags. +*/ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc2(nedpool *p, void *mem, size_t size, size_t alignment=0, unsigned flags=0) THROWSPEC; +/*! \brief Frees the block \em mem from the pool \em p according to flags \em flags. */ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedpfree2(nedpool *p, void *mem, unsigned flags=0) THROWSPEC; +#else +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedpmalloc2(nedpool *p, size_t size, size_t alignment, unsigned flags) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedprealloc2(nedpool *p, void *mem, size_t size, size_t alignment, unsigned flags) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedpfree2(nedpool *p, void *mem, unsigned flags) THROWSPEC; +#endif +/*! \brief Returns information about the memory pool */ +NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC; +/*! \brief Changes the operational parameters of the memory pool */ +NEDMALLOCEXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC; +/*! \brief Tries to release as much free memory back to the system as possible, leaving \em pad remaining per threadpool. */ +NEDMALLOCEXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC; +/*! \brief Prints some operational statistics to stdout. */ +NEDMALLOCEXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC; +/*! \brief Returns how much memory is currently in use by the memory pool */ +NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC; +/*! \brief Returns a series of guaranteed consecutive cleared memory allocations. + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; +/*! \brief Returns a series of guaranteed consecutive allocations. + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC; + +#if defined(__cplusplus) +} /* namespace or extern "C" */ +#include <new> +#include <memory> +#ifdef HAVE_CPP0XTYPETRAITS +#include <type_traits> +#endif + +// Touch into existence for future platforms +namespace std { namespace tr1 { } } + +/*! \defgroup C++ C++ language support + +Thanks to the generous support of Applied Research Associates (USA), nedalloc has extensive +C++ language support which uses C++ metaprogramming techniques to provide a policy driven +STL container reimplementor. The metaprogramming silently overrides or replaces the STL implementation +on your system (MSVC and GCC are the two currently supported) to \b substantially improve +the performance of STL containers by making use of nedalloc's additional features. + +Sounds difficult to use? Not really. Simply do this: +\code +using namespace nedalloc; +typedef nedallocatorise<std::vector, unsigned int, + nedpolicy::typeIsPOD<true>::policy, + nedpolicy::mmap<>::policy, + nedpolicy::reserveN<26>::policy // 1<<26 = 64Mb. 10,000,000 * sizeof(unsigned int) = 38Mb. +>::value myvectortype; +myvectortype a; +for(int n=0; n<10000000; n++) + a.push_back(n); +\endcode + +The metaprogramming requires a new C++ compiler (> year 2008), and it will readily make use +of a C++0x compiler where it will use rvalue referencing, variadic templates, type traits and more. +Visual Studio 2008 or later is sufficent, as is GCC v4.4 or later. + +nedalloc's metaprogramming is designed to be extensible, so the rest of this page is intended for those +wishing to customise the metaprogramming. If you simply wish to know how to use the +nedalloc::nedallocator STL allocator or the nedalloc::nedallocatorise STL reimplementor, please refer +to test.cpp which gives several examples of usage. + +<h2>Extending the metaprogramming:</h2> +A nedallocator policy looks as follows: +\code +namespace nedpolicy { + template<size_t size, size_t alignment> struct sizedalign + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + size_t policy_alignment(size_t bytes) const + { + return (bytes < size) ? alignment : 0; + } + }; + }; +} +\endcode +The policy above implements a size based alignment, so if the block being allocated is +less than \em size then it causes \em alignment to be used, otherwise it does not align. +The sizedalign struct is merely a template parameter encapsulator used to capture +additional parameters, so the real policy is in fact the class policy held within in. +If you did not need to specify any additional parameters e.g. if you were defining +policy_nedpool(), then you would directly define a policy returning your nedpool and pass +it directly to nedallocator<>. + +The primary policy functions which are intended to be overridden are listed in +nedalloc::nedallocatorI::baseimplementation in nedmalloc.h and are prefixed by "policy_". +However, there is absolutely no reason why the meatier functions such as +nedalloc::nedallocatorI::baseimplementation::allocate() cannot be overriden, and indeed +some of the policies defined in nedmalloc.h do just that. + +Policy composition is handled by a dedicated recursive variadic template called +nedalloc::nedallocatorI::policycompositor. If you have \em really specialised needs, you +can partially specialise this class to make it do all sorts of interesting things - hence +its separation into its own class. +*/ + +/*! \brief The nedalloc namespace */ +namespace nedalloc { + +/*! \def NEDSTATIC_ASSERT(expr, msg) +\brief Generates a static assertion if (expr)==0 at compile time. + +Make SURE your message contains no spaces or anything else which would make it an invalid +variable name. +*/ +#ifndef HAVE_CPP0XSTATICASSERT +template<bool> struct StaticAssert; +template<> struct StaticAssert<true> +{ + StaticAssert() { } +}; +#define NEDSTATIC_ASSERT(expr, msg) \ + nedalloc::StaticAssert<(expr)!=0> ERROR_##msg +#else +#define NEDSTATIC_ASSERT(expr, msg) static_assert((expr)!=0, #msg ) +#endif + +/*! \brief The policy namespace in which all nedallocator policies live. */ +namespace nedpolicy { + /*! \class empty + \ingroup C++ + \brief An empty policy which does nothing. + */ + template<class Base> class empty : public Base + { + }; +} + +/*! \brief The implementation namespace where the internals live. */ +namespace nedallocatorI +{ + using namespace std; + using namespace tr1; + + /* Roll on variadic templates is all I can say! */ +#ifdef HAVE_CPP0XVARIADICTEMPLATES + template<class Impl, template<class> class... policies> class policycompositor; + template<class Impl, template<class> class A, template<class> class... policies> class policycompositor<Impl, A, policies...> + { + typedef policycompositor<Impl, policies...> temp; + public: + typedef A<typename temp::value> value; + }; +#else + template<class Impl, + template<class> class A=nedpolicy::empty, + template<class> class B=nedpolicy::empty, + template<class> class C=nedpolicy::empty, + template<class> class D=nedpolicy::empty, + template<class> class E=nedpolicy::empty, + template<class> class F=nedpolicy::empty, + template<class> class G=nedpolicy::empty, + template<class> class H=nedpolicy::empty, + template<class> class I=nedpolicy::empty, + template<class> class J=nedpolicy::empty, + template<class> class K=nedpolicy::empty, + template<class> class L=nedpolicy::empty, + template<class> class M=nedpolicy::empty, + template<class> class N=nedpolicy::empty, + template<class> class O=nedpolicy::empty + > class policycompositor + { + typedef policycompositor<Impl, B, C, D, E, F, G, H, I, J, K, L, M, N, O> temp; + public: + typedef A<typename temp::value> value; + }; +#endif + template<class Impl> class policycompositor<Impl> + { + public: + typedef Impl value; + }; +} + +template<typename T, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + template<class> class... policies +#else + template<class> class policy1=nedpolicy::empty, + template<class> class policy2=nedpolicy::empty, + template<class> class policy3=nedpolicy::empty, + template<class> class policy4=nedpolicy::empty, + template<class> class policy5=nedpolicy::empty, + template<class> class policy6=nedpolicy::empty, + template<class> class policy7=nedpolicy::empty, + template<class> class policy8=nedpolicy::empty, + template<class> class policy9=nedpolicy::empty, + template<class> class policy10=nedpolicy::empty, + template<class> class policy11=nedpolicy::empty, + template<class> class policy12=nedpolicy::empty, + template<class> class policy13=nedpolicy::empty, + template<class> class policy14=nedpolicy::empty, + template<class> class policy15=nedpolicy::empty +#endif +> class nedallocator; + +namespace nedallocatorI +{ + /*! \brief The base implementation class */ + template<class implementation> class baseimplementation + { + //NEDSTATIC_ASSERT(false, Bad_policies_specified); + }; + /*! \brief The base implementation class */ + template<typename T, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + template<class> class... policies +#else + template<class> class policy1, + template<class> class policy2, + template<class> class policy3, + template<class> class policy4, + template<class> class policy5, + template<class> class policy6, + template<class> class policy7, + template<class> class policy8, + template<class> class policy9, + template<class> class policy10, + template<class> class policy11, + template<class> class policy12, + template<class> class policy13, + template<class> class policy14, + template<class> class policy15 +#endif + > class baseimplementation<nedallocator<T, +#ifdef HAVE_CPP0XVARIADICTEMPLATES +policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > > + { + protected: + //! \brief The most derived nedallocator implementation type + typedef nedallocator<T, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > implementationType; + //! \brief Returns a this for the most derived nedallocator implementation type + implementationType *_this() { return static_cast<implementationType *>(this); } + //! \brief Returns a this for the most derived nedallocator implementation type + const implementationType *_this() const { return static_cast<const implementationType *>(this); } + //! \brief Specifies the nedpool to use. Defaults to zero (the system pool). + nedpool *policy_nedpool(size_t bytes) const + { + return 0; + } + //! \brief Specifies the granularity to use. Defaults to \em bytes (no granularity). + size_t policy_granularity(size_t bytes) const + { + return bytes; + } + //! \brief Specifies the alignment to use. Defaults to zero (no alignment). + size_t policy_alignment(size_t bytes) const + { + return 0; + } + //! \brief Specifies the flags to use. Defaults to zero (no flags). + unsigned policy_flags(size_t bytes) const + { + return 0; + } + //! \brief Specifies what to do when the allocation fails. Defaults to throwing std::bad_alloc. + void policy_throwbadalloc(size_t bytes) const + { + throw std::bad_alloc(); + } + //! \brief Specifies if the type is POD. Is std::is_trivially_copyable<T>::value on C++0x compilers, otherwise false. + static const bool policy_typeIsPOD= +#ifdef HAVE_CPP0XTYPETRAITS +#if defined(__GNUC__) && (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40900 + is_pod<T>::value; +#else + is_trivially_copyable<T>::value; +#endif +#else + false; +#endif + public: + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + T *address(T &r) const { return &r; } + const T *address(const T &s) const { return &s; } + size_t max_size() const { return (static_cast<size_t>(0) - static_cast<size_t>(1)) / sizeof(T); } + bool operator!=(const baseimplementation &other) const { return !(*this == other); } + bool operator==(const baseimplementation &other) const { return true; } + + void construct(T *const p, const T &t) const { + void *const _p = static_cast<void *>(p); + new (_p) T(t); + } + void destroy(T *const p) const { + p->~T(); + } + baseimplementation() { } + baseimplementation(const baseimplementation &) { } +#ifdef HAVE_CPP0XRVALUEREFS + baseimplementation(baseimplementation &&) { } +#endif + template<typename U> struct rebind { + typedef nedallocator<U, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > other; + }; + template<typename U> baseimplementation(const nedallocator<U, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > &) { } + + T *allocate(const size_t n) const { + // Leave these spelled out to aid debugging + const size_t t_size = sizeof(T); + size_t size = _this()->policy_granularity(n*t_size); + nedpool *pool = _this()->policy_nedpool(size); + size_t alignment = _this()->policy_alignment(size); + unsigned flags = _this()->policy_flags(size); + void *ptr = nedpmalloc2(pool, size, alignment, flags); + if(!ptr) + _this()->policy_throwbadalloc(size); + return static_cast<T *>(ptr); + } + void deallocate(T *p, const size_t n) const { + nedpfree(0/*not needed*/, p); + } + template<typename U> T *allocate(const size_t n, const U * /* hint */) const { + return allocate(n); + } + private: + baseimplementation &operator=(const baseimplementation &); + }; + +} + +namespace nedpolicy +{ + /*! \class granulate + \ingroup C++ + \brief A policy setting the granularity of the allocated memory. + + Memory is sized according to (size+granularity-1) & ~(granularity-1). + In other words, granularity \b must be a power of two. + */ + template<size_t granularity> struct granulate + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + size_t policy_granularity(size_t bytes) const + { + return (bytes+granularity-1) & ~(granularity-1); + } + }; + }; + /*! \class align + \ingroup C++ + \brief A policy setting the alignment of the allocated memory. + */ + template<size_t alignment> struct align + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + size_t policy_alignment(size_t bytes) const + { + return alignment; + } + }; + }; + /*! \class zero + \ingroup C++ + \brief A policy causing the zeroing of the allocated memory. + */ + template<bool dozero=true> struct zero + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + unsigned policy_flags(size_t bytes) const + { + return dozero ? Base::policy_flags(bytes)|M2_ZERO_MEMORY : Base::policy_flags(bytes); + } + }; + }; + /*! \class preventmove + \ingroup C++ + \brief A policy preventing the moving of the allocated memory. + */ + template<bool doprevent=true> struct preventmove + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + unsigned policy_flags(size_t bytes) const + { + return doprevent ? Base::policy_flags(bytes)|M2_PREVENT_MOVE : Base::policy_flags(bytes); + } + }; + }; + /*! \class mmap + \ingroup C++ + \brief A policy causing the mmapping of the allocated memory. + */ + template<bool dommap=true> struct mmap + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + unsigned policy_flags(size_t bytes) const + { + return dommap ? Base::policy_flags(bytes)|M2_ALWAYS_MMAP : Base::policy_flags(bytes); + } + }; + }; + /*! \class reserveX + \ingroup C++ + \brief A policy causing the address reservation of X times the allocated memory. + */ + template<size_t X> struct reserveX + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + unsigned policy_flags(size_t bytes) const + { + return Base::policy_flags(bytes)|M2_RESERVE_MULT(X); + } + }; + }; + /*! \class reserveN + \ingroup C++ + \brief A policy causing the address reservation of (1<<N) bytes of memory. + */ + template<size_t N> struct reserveN + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + unsigned policy_flags(size_t bytes) const + { + return Base::policy_flags(bytes)|M2_RESERVE_SHIFT(N); + } + }; + }; + /*! \class badalloc + \ingroup C++ + \brief A policy specifying what to throw when an allocation failure occurs. + + A type specialisation exists for badalloc<void> which is equivalent to new(nothrow) + i.e. return zero and don't throw anything. + */ + template<typename T> struct badalloc + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + void policy_throwbadalloc(size_t bytes) const + { + throw T(); + } + }; + }; + template<> struct badalloc<void> + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + void policy_throwbadalloc(size_t bytes) const + { + } + }; + }; + /*! \class typeIsPOD + \ingroup C++ + \brief A policy forcing the treatment of the type as Plain Old Data (POD) + + On C++0x compilers, the <type_traits> is_trivially_copyable<type>::value is used by default. + When treated as POD, memcpy() is used instead + of copy construction and realloc() is permitted to move the memory contents when + resizing. + */ + template<bool ispod> struct typeIsPOD + { + template<class Base> class policy : public Base + { + template<class implementation> friend class nedallocatorI::baseimplementation; + protected: + static const bool policy_typeIsPOD=ispod; + }; + }; +} + +/*! \class nedallocator +\ingroup C++ +\brief A policy driven STL allocator which uses nedmalloc + +One of the lesser known features of STL container classes is their ability to take +an allocator implementation class, so where you had std::vector<Foo> you can now +have std::vector<Foo, nedalloc::nedallocator< std::vector<Foo> > such that +std::vector<> will now use nedalloc as the policy specifies. + +You <b>almost certainly</b> don't want to use this directly except in the naive +case. See nedalloc::nedallocatorise to see what I mean. +*/ +template<typename T, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + template<class> class... policies +#else + template<class> class policy1, + template<class> class policy2, + template<class> class policy3, + template<class> class policy4, + template<class> class policy5, + template<class> class policy6, + template<class> class policy7, + template<class> class policy8, + template<class> class policy9, + template<class> class policy10, + template<class> class policy11, + template<class> class policy12, + template<class> class policy13, + template<class> class policy14, + template<class> class policy15 +#endif +> class nedallocator : public nedallocatorI::policycompositor< +#ifdef HAVE_CPP0XVARIADICTEMPLATES + nedallocatorI::baseimplementation<nedallocator<T, policies...> >, + policies... +#else + nedallocatorI::baseimplementation<nedallocator<T, + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 + > >, + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif +>::value +{ + typedef typename nedallocatorI::policycompositor< +#ifdef HAVE_CPP0XVARIADICTEMPLATES + nedallocatorI::baseimplementation<nedallocator<T, policies...> >, + policies... +#else + nedallocatorI::baseimplementation<nedallocator<T, + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 + > >, + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + >::value Base; +public: + nedallocator() { } + nedallocator(const nedallocator &o) : Base(o) { } +#ifdef HAVE_CPP0XRVALUEREFS + nedallocator(nedallocator &&o) : Base(std::move(o)) { } +#endif + /* This templated constructor and rebind() are used by MSVC's secure iterator checker. + I think it's best to not copy state even though it may break policies which store data. */ + template<typename U> nedallocator(const nedallocator<U, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > &o) { } +#ifdef HAVE_CPP0XRVALUEREFS + template<typename U> nedallocator(nedallocator<U, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > &&o) { } +#endif + + template<typename U> struct rebind { + typedef nedallocator<U, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > other; + }; +}; + +namespace nedallocatorI { + // Holds a static allocator instance shared by anything allocating from allocator + template<class allocator> struct StaticAllocator + { + static allocator &get() + { + static allocator a; + return a; + } + }; + // RAII holder for a Newed object + template<typename T, class allocator> struct PtrHolder + { + T *mem; + PtrHolder(T *_mem) : mem(_mem) { } + ~PtrHolder() + { + if(mem) + { + allocator &a=nedallocatorI::StaticAllocator<allocator>::get(); + a.deallocate(mem, sizeof(T)); + mem=0; + } + } + T *release() { T *ret=mem; mem=0; return ret; } + T *operator *() { return mem; } + const T *operator *() const { return mem; } + }; +} +/*! \brief Allocates the memory for an instance of object \em T and constructs it. + +If an exception is thrown during construction, the memory is freed before +rethrowing the exception. + +Usage is very simple: +\code + SSEVectorType *foo1=New<SSEVectorType>(4, 5, 6, 7); +\endcode +*/ +#ifdef HAVE_CPP0XVARIADICTEMPLATES +template<typename T, class allocator=nedallocator<T>, typename... Parameters> inline T *New(const Parameters&... parameters) +#else +template<typename T, class allocator> inline T *New() +#endif +{ + allocator &a=nedallocatorI::StaticAllocator<allocator>::get(); + nedallocatorI::PtrHolder<T, allocator> ret(a.allocate(sizeof(T))); + if(*ret) + { +#ifdef HAVE_CPP0XVARIADICTEMPLATES + new((void *) *ret) T(parameters...); +#else + new((void *) *ret) T; +#endif + } + return ret.release(); +} +#ifndef HAVE_CPP0XVARIADICTEMPLATES +// Extremely annoying not to have default template arguments for functions pre-C++0x +template<typename T> inline T *New() +{ + return New<T, nedallocator<T> >(); +} +// Also, it's painful to replicate function overloads :( +#define NEDMALLOC_NEWIMPL \ +template<typename T, class allocator, NEDMALLOC_NEWIMPLTYPES> inline T *New(NEDMALLOC_NEWIMPLPARSDEFS) \ +{ \ + allocator &a=nedallocatorI::StaticAllocator<allocator>::get(); \ + nedallocatorI::PtrHolder<T, allocator> ret(a.allocate(sizeof(T))); \ + if(*ret) \ + { \ + new((void *) *ret) T(NEDMALLOC_NEWIMPLPARS); \ + } \ + return ret.release(); \ +} \ +template<typename T, NEDMALLOC_NEWIMPLTYPES> inline T *New(NEDMALLOC_NEWIMPLPARSDEFS)\ +{ \ + return New<T, nedallocator<T> >(NEDMALLOC_NEWIMPLPARS); \ +} +#define NEDMALLOC_NEWIMPLTYPES typename P1 +#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1 +#define NEDMALLOC_NEWIMPLPARS p1 +NEDMALLOC_NEWIMPL +#undef NEDMALLOC_NEWIMPLTYPES +#undef NEDMALLOC_NEWIMPLPARSDEFS +#undef NEDMALLOC_NEWIMPLPARS + +#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2 +#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2 +#define NEDMALLOC_NEWIMPLPARS p1, p2 +NEDMALLOC_NEWIMPL +#undef NEDMALLOC_NEWIMPLTYPES +#undef NEDMALLOC_NEWIMPLPARSDEFS +#undef NEDMALLOC_NEWIMPLPARS + +#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2, typename P3 +#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2, const P3 &p3 +#define NEDMALLOC_NEWIMPLPARS p1, p2, p3 +NEDMALLOC_NEWIMPL +#undef NEDMALLOC_NEWIMPLTYPES +#undef NEDMALLOC_NEWIMPLPARSDEFS +#undef NEDMALLOC_NEWIMPLPARS + +#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2, typename P3, typename P4 +#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2, const P3 &p3, const P4 &p4 +#define NEDMALLOC_NEWIMPLPARS p1, p2, p3, p4 +NEDMALLOC_NEWIMPL +#undef NEDMALLOC_NEWIMPLTYPES +#undef NEDMALLOC_NEWIMPLPARSDEFS +#undef NEDMALLOC_NEWIMPLPARS + +#define NEDMALLOC_NEWIMPLTYPES typename P1, typename P2, typename P3, typename P4, typename P5 +#define NEDMALLOC_NEWIMPLPARSDEFS const P1 &p1, const P2 &p2, const P3 &p3, const P4 &p4, const P5 &p5 +#define NEDMALLOC_NEWIMPLPARS p1, p2, p3, p4, p5 +NEDMALLOC_NEWIMPL +#undef NEDMALLOC_NEWIMPLTYPES +#undef NEDMALLOC_NEWIMPLPARSDEFS +#undef NEDMALLOC_NEWIMPLPARS + +#undef NEDMALLOC_NEWIMPL +#endif + +/*! \brief Destructs an instance of object T, and releases the memory used to store it. +*/ +template<class allocator, typename T> inline void Delete(const T *_obj) +{ + T *obj=const_cast<T *>(_obj); + allocator &a=nedallocatorI::StaticAllocator<allocator>::get(); + obj->~T(); + a.deallocate(obj, sizeof(T)); +} +template<typename T> inline void Delete(const T *obj) { Delete<nedallocator<T> >(obj); } + +/*! \class nedallocatorise +\ingroup C++ +\brief Reimplements a given STL container to make full and efficient usage of nedalloc +\param stlcontainer The STL container you wish to reimplement +\param T The type to be contained +\param policies... Any policies you want applied to the allocator + + +This is a clever bit of C++ metaprogramming if I do say so myself! What it does +is to specialise a STL container implementation to make full use of nedalloc's +advanced facilities, so for example if you do: +\code +using namespace nedalloc; +typedef nedallocatorise<std::vector, unsigned int, + nedpolicy::typeIsPOD<true>::policy, + nedpolicy::mmap<>::policy, + nedpolicy::reserveN<26>::policy // 1<<26 = 64Mb. 10,000,000 * sizeof(unsigned int) = 38Mb. +>::value myvectortype; +myvectortype a; +for(int n=0; n<10000000; n++) + a.push_back(n); +\endcode +What happens here is that nedallocatorise reimplements the parts of +std::vector which extend and shrink the actual memory allocation. +Because the typeIsPOD policy is specified, it means that realloc() +rather than realloc(M2_PREVENT_MOVE) can be used. Also, because the +mmap and the reserveN policies are specified, std::vector immediately +reserves 64Mb of address space and forces the immediate use of mmap(). +This allows you to push_back() a lot of data very, very quickly indeed. +You will also find that pop_back() actually reduces the allocation now +(most implementations don't bother ever releasing memory except when +reaching empty or when resize() is called). When mmapped, reserve() +is automatically held at a minimum of <page size>/sizeof(type) though +larger values are respected. + +test.cpp has a benchmark of the speed differences you may realise, plus +an example of usage. +*/ +template<template<typename, class> class stlcontainer, + typename T, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + template<class> class... policies +#else + template<class> class policy1=nedpolicy::empty, + template<class> class policy2=nedpolicy::empty, + template<class> class policy3=nedpolicy::empty, + template<class> class policy4=nedpolicy::empty, + template<class> class policy5=nedpolicy::empty, + template<class> class policy6=nedpolicy::empty, + template<class> class policy7=nedpolicy::empty, + template<class> class policy8=nedpolicy::empty, + template<class> class policy9=nedpolicy::empty, + template<class> class policy10=nedpolicy::empty, + template<class> class policy11=nedpolicy::empty, + template<class> class policy12=nedpolicy::empty, + template<class> class policy13=nedpolicy::empty, + template<class> class policy14=nedpolicy::empty, + template<class> class policy15=nedpolicy::empty +#endif +> class nedallocatorise +{ +public: + //! The reimplemented STL container type + typedef stlcontainer<T, nedallocator<T, +#ifdef HAVE_CPP0XVARIADICTEMPLATES + policies... +#else + policy1, policy2, policy3, policy4, policy5, + policy6, policy7, policy8, policy9, policy10, + policy11, policy12, policy13, policy14, policy15 +#endif + > > value; +}; + +} /* namespace */ +#endif + +/* Some miscellaneous dlmalloc option documentation */ + +#ifdef DOXYGEN_IS_PARSING_ME +/* Just some false defines to keep doxygen happy */ + +#define NEDMALLOC_DEBUG DEBUG +#define ENABLE_LARGE_PAGES undef +#define ENABLE_FAST_HEAP_DETECTION undef +#define REPLACE_SYSTEM_ALLOCATOR undef +#define ENABLE_TOLERANT_NEDMALLOC undef +#define NO_NED_NAMESPACE undef + +/*! \def MALLOC_ALIGNMENT +\brief Defines what alignment normally returned blocks should use. Is 16 bytes on Mac OS X, otherwise 8 bytes. */ +#define MALLOC_ALIGNMENT 8 + +/*! \def USE_LOCKS +\brief Defines the threadsafety of nedalloc + +USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK, +ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER. +*/ +#define USE_LOCKS 1 + +/*! \def DEFAULT_GRANULARITY +\brief Defines the granularity in which to request or free system memory. +*/ +#define DEFAULT_GRANULARITY (2*1024*1024) + +/*! \def DEFAULT_TRIM_THRESHOLD +\brief Defines how much memory must be free before returning it to the system. +*/ +#define DEFAULT_TRIM_THRESHOLD (2*1024*1024) + +/*! \def DEFAULT_MMAP_THRESHOLD +\brief Defines the threshold above which mmap() is used to perform direct allocation. +*/ +#define DEFAULT_MMAP_THRESHOLD (256*1024) + +/*! \def MAX_RELEASE_CHECK_RATE +\brief Defines how many free() ops should occur before checking how much free memory there is. +*/ +#define MAX_RELEASE_CHECK_RATE 4095 + +/*! \def NEDMALLOC_FORCERESERVE +\brief Lets you force address space reservation in the \b standard malloc API + +Note that by default realloc() sets M2_RESERVE_MULT(8) when thunking to realloc2(), +so you probably don't need to override this +*/ +#define NEDMALLOC_FORCERESERVE(p, mem, size) 0 + +/*! \def NEDMALLOC_TESTLOGENTRY +\brief Used to determine whether a given memory operation should be logged. +*/ +#define NEDMALLOC_TESTLOGENTRY(tc, np, type, mspace, size, mem, alignment, flags, returned) ((type)&ENABLE_LOGGING) + +/*! \def NEDMALLOC_STACKBACKTRACEDEPTH +\brief Turns on stack backtracing in the logger. + +You almost certainly want to constrain what gets logged using NEDMALLOC_TESTLOGENTRY +if you turn this on as the sheer volume of data output can make execution very slow. +*/ +#define NEDMALLOC_STACKBACKTRACEDEPTH 0 + +#endif + +#endif |