summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSatish Patel <satish.patel@linaro.org>2016-04-02 19:11:36 +0530
committerSatish Patel <satish.patel@linaro.org>2016-04-02 19:11:36 +0530
commit8d36cec3933182c3e3de7fff42d12373d4769165 (patch)
tree374e5df555f044d00babc7a4b8d729d66ce57183
parent5c4465b95c8eead66400f27e37943e2f1fe7c0b7 (diff)
downloadnedmalloc-8d36cec3933182c3e3de7fff42d12373d4769165.tar.gz
Add test applications for nedmalloc
test.c & test.cpp are taken from https://github.com/ned14/nedmalloc Generate executables will be nedtest_c & nedtest_cpp respectively Changes(test.c) - added command line argument support to choose between "default" or "nedmalloc" while running the test e.g. if one want to run the test using default malloc nedtest_c default otherwise nedmalloc implementation can run by nedtest_c Note: There are crashes observed when using nedmalloc implementation in test app, need to fine tune the same Signed-off-by: Satish Patel <satish.patel@linaro.org>
-rw-r--r--test/Android.mk79
-rw-r--r--test/test.c546
-rw-r--r--test/test.cpp264
-rw-r--r--test/test1.c27
-rw-r--r--test/test1.cpp30
5 files changed, 946 insertions, 0 deletions
diff --git a/test/Android.mk b/test/Android.mk
new file mode 100644
index 0000000..5b9f4c7
--- /dev/null
+++ b/test/Android.mk
@@ -0,0 +1,79 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+# Copyright (C) 2016 Linaro Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+LOCAL_PATH := $(call my-dir)
+#=====================================================
+# nedtest_c
+#=====================================================
+include $(CLEAR_VARS)
+common_c_includes := \
+ external/nedmalloc \
+
+LOCAL_CLANG := true
+
+LOCAL_SRC_FILES := \
+ test.c \
+
+LOCAL_CFLAGS += \
+ -fno-stack-protector \
+ -Wstrict-overflow=5 \
+ -fvisibility=hidden \
+ -Wall -Wextra -Wno-unused-parameter -Werror \
+ -fexceptions \
+ -DNO_NED_NAMESPACE \
+ -Wno-unused-function
+
+LOCAL_CPPFLAGS += \
+
+LOCAL_C_INCLUDES += $(common_c_includes)
+LOCAL_STATIC_LIBRARIES += libnedmalloc
+
+LOCAL_MODULE=nedtest_c
+
+#include $(BUILD_STATIC_LIBRARY)
+include $(BUILD_EXECUTABLE)
+
+#=====================================================
+# nedtest_cpp
+#=====================================================
+include $(CLEAR_VARS)
+common_c_includes := \
+ external/nedmalloc \
+
+LOCAL_CLANG := true
+
+LOCAL_SRC_FILES := \
+ test.cpp \
+
+LOCAL_CFLAGS += \
+ -fno-stack-protector \
+ -Wstrict-overflow=5 \
+ -fvisibility=hidden \
+ -Wall -Wextra -Wno-unused-parameter -Werror \
+ -fexceptions \
+ -DNO_NED_NAMESPACE \
+ -Wno-unused-function
+
+LOCAL_CPPFLAGS += \
+
+LOCAL_C_INCLUDES += $(common_c_includes)
+LOCAL_STATIC_LIBRARIES += libnedmalloc
+
+LOCAL_MODULE=nedtest_cpp
+
+#include $(BUILD_STATIC_LIBRARY)
+include $(BUILD_EXECUTABLE)
diff --git a/test/test.c b/test/test.c
new file mode 100644
index 0000000..169a7f5
--- /dev/null
+++ b/test/test.c
@@ -0,0 +1,546 @@
+/* test.c
+An example of how to use nedalloc in C
+(C) 2005-2010 Niall Douglas
+*/
+
+#define _CRT_SECURE_NO_WARNINGS 1 /* Don't care about MSVC warnings on POSIX functions */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <nedmalloc.h>
+
+#define USE_NEDMALLOC_DLL
+#define TEST_DEFAULT 0
+#define TEST_NEDMALLOC 1
+
+/**** TEST CONFIGURATION ****/
+#if 0 /* Test patterns typical of C++ code */
+#define THREADS 4 /* How many threads to run */
+#define TESTCPLUSPLUS 1 /* =1 to make 50% of ops have blocksize<=512. This is typical for C++ allocator usage. */
+#define BLOCKSIZE 16384 /* Test will be with blocks up to BLOCKSIZE. Try 8-16Kb for typical app usage, 1Mb if you use large arrays etc. */
+#define TESTTYPE 1 /* =1 for maximum speed test, =2 for randomised test */
+#define TOUCH 0 /* Whether to touch all pages of an allocated region. Can make a huge difference to scores. */
+#define MAXMEMORY (768*1024*1024) /* Maximum memory to use (approx) */
+#define RECORDS (100000/THREADS)
+#define MAXMEMORY2 (MAXMEMORY/THREADS)
+#endif
+
+#if 1 /* Test avrg. 2Mb block realloc() speed */
+#define THREADS 3
+#define TESTCPLUSPLUS 0
+#define BLOCKSIZE (2*1024*1024)
+#define TESTTYPE 2
+#define TOUCH 1
+#define MAXMEMORY (8*1024*1024)
+#define RECORDS (50/THREADS)
+#define MAXMEMORY2 (MAXMEMORY/THREADS)
+#endif
+
+#ifdef _MSC_VER
+/*#pragma optimize("g", off)*/ /* Useful for debugging */
+#endif
+
+#if !defined(USE_NEDMALLOC_DLL)
+#include "nedmalloc.c"
+#elif defined(WIN32)
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+#include <malloc.h>
+#else
+#include <pthread.h>
+#endif
+
+#ifndef FORCEINLINE
+ #if defined(__GNUC__)
+#define FORCEINLINE __inline __attribute__ ((always_inline))
+ #elif defined(_MSC_VER)
+ #define FORCEINLINE __forceinline
+ #endif
+#endif
+#ifndef NOINLINE
+ #if defined(__GNUC__)
+ #define NOINLINE __attribute__ ((noinline))
+ #elif defined(_MSC_VER)
+ #define NOINLINE __declspec(noinline)
+ #else
+ #define NOINLINE
+ #endif
+#endif
+
+
+static int whichmalloc;
+//static int doRealloc;
+static struct threadstuff_t
+{
+ struct
+ {
+ int mallocs;
+ int reallocs;
+ int frees;
+ } ops;
+ unsigned int *toalloc;
+ void **allocs;
+ char cachesync1[128];
+ int done;
+ char cachesync2[128];
+} threadstuff[THREADS];
+
+static void threadcode(int);
+
+#ifdef WIN32
+static DWORD WINAPI _threadcode(LPVOID a)
+{
+ threadcode((int)(size_t) a);
+ return 0;
+}
+#define THREADVAR HANDLE
+#define THREADINIT(v, id) (*v=CreateThread(NULL, 0, _threadcode, (LPVOID)(size_t) id, 0, NULL))
+#define THREADSLEEP(v) SleepEx(v, FALSE)
+#define THREADWAIT(v) (WaitForSingleObject(v, INFINITE), 0)
+
+typedef unsigned __int64 usCount;
+static FORCEINLINE usCount GetUsCount()
+{
+ static LARGE_INTEGER ticksPerSec;
+ static double scalefactor;
+ LARGE_INTEGER val;
+ if(!scalefactor)
+ {
+ if(QueryPerformanceFrequency(&ticksPerSec))
+ scalefactor=ticksPerSec.QuadPart/1000000000000.0;
+ else
+ scalefactor=1;
+ }
+ if(!QueryPerformanceCounter(&val))
+ return (usCount) GetTickCount() * 1000000000;
+ return (usCount) (val.QuadPart/scalefactor);
+}
+
+static HANDLE win32heap;
+static void *win32malloc(size_t size)
+{
+ return HeapAlloc(win32heap, 0, size);
+}
+static void *win32realloc(void *p, size_t size)
+{
+ return HeapReAlloc(win32heap, 0, p, size);
+}
+static size_t win32memsize(void *p)
+{
+ return HeapSize(win32heap, 0, p);
+}
+static void win32free(void *mem)
+{
+ HeapFree(win32heap, 0, mem);
+}
+
+static void *(*const mallocs[])(size_t size)={ malloc, nedmalloc, win32malloc };
+static void *(*const reallocs[])(void *p, size_t size)={ realloc, nedrealloc, win32realloc };
+static size_t (*const memsizes[])(void *p)={ _msize, nedmemsize, win32memsize };
+static void (*const frees[])(void *mem)={ free, nedfree, win32free };
+#else
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+#if defined(__cplusplus)
+extern "C"
+#else
+extern
+#endif
+#if defined(__linux__) || defined(__FreeBSD__)
+/* Sadly we can't include <malloc.h> as it causes a redefinition error */
+size_t malloc_usable_size(const void *);
+#elif defined(__APPLE__)
+size_t malloc_size(const void *ptr);
+#else
+#error Do not know what to do here
+#endif
+static void *_threadcode(void *a)
+{
+ threadcode((int)(size_t) a);
+ return 0;
+}
+#define THREADVAR pthread_t
+#define THREADINIT(v, id) pthread_create(v, NULL, _threadcode, (void *)(size_t) id)
+#define THREADSLEEP(v) usleep(v*1000)
+#define THREADWAIT(v) pthread_join(v, NULL)
+
+typedef unsigned long long usCount;
+static FORCEINLINE usCount GetUsCount()
+{
+#ifdef CLOCK_MONOTONIC
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ((usCount) ts.tv_sec*1000000000000LL)+ts.tv_nsec*1000LL;
+#else
+ struct timeval tv;
+ gettimeofday(&tv, 0);
+ return ((usCount) tv.tv_sec*1000000000000LL)+tv.tv_usec*1000000LL;
+#endif
+}
+
+static void *(*const mallocs[])(size_t size)={ malloc, nedmalloc };
+static void *(*const reallocs[])(void *p, size_t size)={ realloc, nedrealloc };
+static size_t (*const memsizes[])(const void *)={
+#if defined(__linux__) || defined(__FreeBSD__)
+//nedmemsize,
+malloc_usable_size,
+#elif defined(__APPLE__)
+malloc_size,
+#endif
+nedmemsize };
+static void (*const frees[])(void *mem)={ free, nedfree };
+#endif
+static usCount times[THREADS];
+
+
+static FORCEINLINE unsigned int myrandom(unsigned int *seed)
+{
+ *seed=1664525UL*(*seed)+1013904223UL;
+ return *seed;
+}
+
+static void threadcode(int threadidx)
+{
+ int n;
+ void **allocptr=threadstuff[threadidx].allocs;
+ unsigned int r, seed=threadidx;
+ usCount start;
+ size_t allocated=0, size;
+ threadstuff[threadidx].done=0;
+ /*neddisablethreadcache(0);*/
+ THREADSLEEP(100);
+ start=GetUsCount();
+#if 2==TESTTYPE
+ /* A randomised malloc/realloc/free test (torture test) */
+ for(n=0; n<RECORDS*100; n++)
+ {
+ static int reallocflip;
+ unsigned int i, dorealloc=(reallocflip=!reallocflip);
+ r=myrandom(&seed);
+ i=(int)(r % RECORDS);
+#if TESTCPLUSPLUS
+ dorealloc=!(r&(15<<28));
+ if(r&(1<<31))
+ { /* Make it two power multiple of less than 512 bytes to
+ model frequent C++ new's */
+ size=4<<(r & 7);
+ dorealloc=0;
+ }
+ else
+#endif
+ size=(size_t)(r & (BLOCKSIZE-1));
+ if(allocated<MAXMEMORY2 && !allocptr[i])
+ {
+ if(!(allocptr[i]=mallocs[whichmalloc](size))) abort();
+#if TOUCH
+ {
+ volatile char *mem=(volatile char *)allocptr[i];
+ volatile char *end=mem+size;
+ for(; mem<end; mem+=4096) *mem;
+ }
+#endif
+ allocated+=memsizes[whichmalloc](allocptr[i]);
+ threadstuff[threadidx].ops.mallocs++;
+ }
+ else if(allocated<MAXMEMORY2 && dorealloc) /* If not TESTCPLUSPLUS, then how often realloc() gets called depends on how small RECORDS is. */
+ {
+ allocated-=memsizes[whichmalloc](allocptr[i]);
+ if(!(allocptr[i]=reallocs[whichmalloc](allocptr[i], size))) abort();
+#if TOUCH
+ {
+ volatile char *mem=(volatile char *)allocptr[i];
+ volatile char *end=mem+size;
+ for(; mem<end; mem+=4096) *mem;
+ }
+#endif
+ allocated+=memsizes[whichmalloc](allocptr[i]);
+ threadstuff[threadidx].ops.reallocs++;
+ }
+ else if(allocptr[i])
+ {
+ allocated-=memsizes[whichmalloc](allocptr[i]);
+ frees[whichmalloc](allocptr[i]);
+ allocptr[i]=0;
+ threadstuff[threadidx].ops.frees++;
+ }
+ }
+ for(n=0; n<RECORDS; n++)
+ {
+ if(allocptr[n])
+ {
+ allocated-=memsizes[whichmalloc](allocptr[n]);
+ frees[whichmalloc](allocptr[n]);
+ allocptr[n]=0;
+ threadstuff[threadidx].ops.frees++;
+ }
+ }
+ assert(!allocated);
+#elif 1==TESTTYPE
+ unsigned int *toallocptr=threadstuff[threadidx].toalloc;
+ /* A simple stack which allocates and deallocates off the top (speed test) */
+ for(n=0; n<RECORDS;)
+ {
+#if 1
+ r=myrandom(&seed);
+ if(allocptr>threadstuff[threadidx].allocs && (r & 65535)<32760) /*<32760)*/
+ { /* free */
+ --toallocptr;
+ --allocptr;
+ --n;
+ frees[whichmalloc](*allocptr);
+ *allocptr=0;
+ threadstuff[threadidx].ops.frees++;
+ }
+ else
+#endif
+ {
+ if(doRealloc && allocptr>threadstuff[threadidx].allocs && (r & 1))
+ {
+ if(!(allocptr[-1]=reallocs[whichmalloc](allocptr[-1], *toallocptr))) abort();
+#if TOUCH
+ {
+ volatile char *mem=(volatile char *)allocptr[-1];
+ volatile char *end=mem+*toallocptr;
+ for(; mem<end; mem+=4096) *mem;
+ }
+#endif
+ threadstuff[threadidx].ops.reallocs++;
+ }
+ else
+ {
+ if(!(allocptr[0]=mallocs[whichmalloc](*toallocptr))) abort();
+#if TOUCH
+ {
+ volatile char *mem=(volatile char *)allocptr[0];
+ volatile char *end=mem+*toallocptr;
+ for(; mem<end; mem+=4096) *mem;
+ }
+#endif
+ threadstuff[threadidx].ops.mallocs++;
+ allocptr++;
+ }
+ n++;
+ toallocptr++;
+ /*if(!(threadstuff[threadidx].ops & 0xff))
+ nedtrimthreadcache(0,0);*/
+ }
+ }
+ while(allocptr>threadstuff[threadidx].allocs)
+ {
+ frees[whichmalloc](*--allocptr);
+ threadstuff[threadidx].ops.frees++;
+ }
+#endif
+ times[threadidx]+=GetUsCount()-start;
+ neddisablethreadcache(0);
+ threadstuff[threadidx].done=1;
+}
+
+static double runtest()
+{
+ unsigned int seed=1;
+ int n, i;
+ double opspersec=0;
+ THREADVAR threads[THREADS];
+ for(n=0; n<THREADS; n++)
+ {
+ unsigned int *toallocptr;
+ int m;
+ memset(&threadstuff[n].ops, 0, sizeof(threadstuff[n].ops));
+ times[n]=0;
+ threadstuff[n].toalloc=toallocptr=calloc(RECORDS, sizeof(unsigned int));
+ threadstuff[n].allocs=calloc(RECORDS, sizeof(void *));
+ for(m=0; m<RECORDS; m++)
+ {
+ unsigned int size=myrandom(&seed);
+#if TESTCPLUSPLUS
+ if(size&(1<<31))
+ { /* Make it two power multiple of less than 512 bytes to
+ model frequent C++ new's */
+ size=4<<(size & 7);
+ }
+ else
+#endif
+ {
+ size&=BLOCKSIZE-1;
+ }
+ *toallocptr++=size;
+ }
+ }
+#if 2==TESTTYPE
+ for(n=0; n<THREADS; n++)
+ {
+ THREADINIT(&threads[n], n);
+ }
+ for(i=0; i<8; i++)
+ {
+ int found=-1;
+ do
+ {
+ for(n=0; n<THREADS; n++)
+ {
+ THREADSLEEP(100);
+ if(threadstuff[n].done)
+ {
+ found=n;
+ break;
+ }
+ }
+ } while(found<0);
+ THREADWAIT(threads[found]);
+ threads[found]=0;
+#if DEBUG
+ {
+ usCount totaltime=0;
+ int totalops=0, totalmallocs=0, totalreallocs=0;
+ for(n=0; n<THREADS; n++)
+ {
+ totaltime+=times[n];
+ totalmallocs+=threadstuff[n].ops.mallocs;
+ totalreallocs+=threadstuff[n].ops.reallocs;
+ totalops+=threadstuff[n].ops.mallocs+threadstuff[n].ops.reallocs;
+ }
+ opspersec=1000000000000.0*totalops/totaltime*THREADS;
+ printf("This test spent %f%% of its time doing reallocs\n", 100.0*totalreallocs/totalops);
+ printf("This allocator achieves %lfops/sec under %d threads\n\n", opspersec, THREADS);
+ }
+#endif
+ THREADINIT(&threads[found], found);
+ printf("Relaunched thread %d\n", found);
+ }
+ for(n=THREADS-1; n>=0; n--)
+ {
+ THREADWAIT(threads[n]);
+ threads[n]=0;
+ }
+#else
+#if 1
+ for(n=0; n<THREADS; n++)
+ {
+ THREADINIT(&threads[n], n);
+ }
+ for(n=THREADS-1; n>=0; n--)
+ {
+ THREADWAIT(threads[n]);
+ threads[n]=0;
+ }
+#else
+ /* Quick realloc() test */
+ doRealloc=1;
+ for(n=0; n<THREADS; n++)
+ {
+ THREADINIT(&threads[n], n);
+ }
+ for(n=THREADS-1; n>=0; n--)
+ {
+ THREADWAIT(threads[n]);
+ threads[n]=0;
+ }
+#endif
+#endif
+ {
+ usCount totaltime=0;
+ int totalops=0, totalmallocs=0, totalreallocs=0;
+ for(n=0; n<THREADS; n++)
+ {
+ totaltime+=times[n];
+ totalmallocs+=threadstuff[n].ops.mallocs;
+ totalreallocs+=threadstuff[n].ops.reallocs;
+ totalops+=threadstuff[n].ops.mallocs+threadstuff[n].ops.reallocs;
+ }
+ opspersec=1000000000000.0*totalops/totaltime*THREADS;
+ printf("This test spent %f%% of its time doing reallocs\n", 100.0*totalreallocs/totalops);
+ printf("This allocator achieves %lfops/sec under %d threads\n", opspersec, THREADS);
+ }
+ for(n=THREADS-1; n>=0; n--)
+ {
+ free(threadstuff[n].allocs); threadstuff[n].allocs=0;
+ free(threadstuff[n].toalloc); threadstuff[n].toalloc=0;
+ }
+ return opspersec;
+}
+
+int PatchInNedmallocDLL(void);
+int main(int argc, char *argv[])
+{
+ int test_type = -1;
+ if (argc > 1) {
+ if (!strcmp("default", argv[1]))
+ test_type = TEST_DEFAULT;
+ else
+ test_type = TEST_NEDMALLOC;
+ printf("\ntest type:%d\n", test_type);
+ }
+ double std=0, ned=0;
+#if defined(WIN32) && defined(USE_NEDMALLOC_DLL)
+ /*PatchInNedmallocDLL();*/
+#endif
+
+#if 0
+ {
+ usCount start, end;
+ start=GetUsCount();
+ THREADSLEEP(5000);
+ end=GetUsCount();
+ printf("Wait was %lf\n", (end-start)/1000000000000.0);
+ }
+#endif
+#ifdef WIN32
+#pragma comment(lib, "user32.lib")
+ { /* Force load of user32.dll so we can debug */
+ BOOL v;
+ SystemParametersInfo(SPI_GETBEEP, 0, &v, 0);
+ }
+#endif
+#if 2==TESTTYPE
+ printf("Running torture test\n"
+ "-=-=-=-=-=-=-=-=-=-=\n");
+#elif 1==TESTTYPE
+ printf("Running speed test\n"
+ "-=-=-=-=-=-=-=-=-=\n");
+#endif
+ printf("Block size <= %u, C++ test mode is %s\n", BLOCKSIZE, TESTCPLUSPLUS ? "on" : "off");
+ if(test_type == TEST_DEFAULT)
+ {
+ printf("\nTesting standard allocator with %d threads ...\n", THREADS);
+ std=runtest();
+ } else {
+ printf("\nTesting nedmalloc with %d threads ...\n", THREADS);
+ whichmalloc=1;
+ ned=runtest();
+ }
+#ifdef WIN32
+ if(0)
+ {
+ ULONG data=2;
+ win32heap=HeapCreate(0, 0, 0);
+ HeapSetInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data));
+ HeapQueryInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data), NULL);
+ if(2!=data)
+ {
+ printf("The win32 low frag allocator won't work under a debugger!\n");
+ }
+ else
+ {
+ printf("Testing win32 low frag allocator with %d threads ...\n\n", THREADS);
+ whichmalloc=2;
+ runtest();
+ }
+ HeapDestroy(win32heap);
+ }
+#endif
+ if(std && ned)
+ { // ned should have more ops/sec
+ printf("\n\nnedmalloc allocator is %lf times faster than standard\n", ned/std);
+ }
+ printf("\nPress a key to trim\n");
+ getchar();
+ nedmalloc_trim(0);
+#ifdef _MSC_VER
+ printf("\nPress a key to end\n");
+ getchar();
+#endif
+ return 0;
+}
diff --git a/test/test.cpp b/test/test.cpp
new file mode 100644
index 0000000..ffe0acd
--- /dev/null
+++ b/test/test.cpp
@@ -0,0 +1,264 @@
+/* test.cpp
+An example of how to use nedalloc in C++
+(C) 2010 Niall Douglas
+*/
+
+#define _CRT_SECURE_NO_WARNINGS 1 /* Don't care about MSVC warnings on POSIX functions */
+#include <stdio.h>
+#include <stdlib.h>
+#include "../nedmalloc.h"
+#include <vector>
+#if defined(_M_X64) || defined(__x86_64__) || (defined(_M_IX86) && _M_IX86_FP>=2) || (defined(__i386__) && defined(__SSE2__))
+#include <emmintrin.h>
+#endif
+
+#ifdef _MSC_VER
+/*#pragma optimize("g", off)*/ /* Useful for debugging */
+#endif
+
+#if !defined(USE_NEDMALLOC_DLL)
+#include "../nedmalloc.c"
+#elif defined(WIN32)
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+#endif
+
+#ifdef _MSC_VER
+#define MEMALIGNED(v) __declspec(align(v))
+#elif defined(__GNUC__)
+#define MEMALIGNED(v) __attribute__ ((aligned(v)))
+#else
+#define MEMALIGNED(v)
+#endif
+
+#ifdef WIN32
+typedef unsigned __int64 usCount;
+static usCount GetUsCount()
+{
+ static LARGE_INTEGER ticksPerSec;
+ static double scalefactor;
+ LARGE_INTEGER val;
+ if(!scalefactor)
+ {
+ if(QueryPerformanceFrequency(&ticksPerSec))
+ scalefactor=ticksPerSec.QuadPart/1000000000000.0;
+ else
+ scalefactor=1;
+ }
+ if(!QueryPerformanceCounter(&val))
+ return (usCount) GetTickCount() * 1000000000;
+ return (usCount) (val.QuadPart/scalefactor);
+}
+#else
+#include <sys/time.h>
+#include <time.h>
+typedef unsigned long long usCount;
+static usCount GetUsCount()
+{
+#ifdef CLOCK_MONOTONIC
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ((usCount) ts.tv_sec*1000000000000LL)+ts.tv_nsec*1000LL;
+#else
+ struct timeval tv;
+ gettimeofday(&tv, 0);
+ return ((usCount) tv.tv_sec*1000000000000LL)+tv.tv_usec*1000000LL;
+#endif
+}
+#endif
+
+using namespace nedalloc;
+using namespace std;
+
+// Move constructors are utterly required to store aligned values in a STL collection
+// In fact, alignment can't be guaranteed without them, so pre-C++0x you couldn't do this
+struct
+#ifdef HAVE_CPP0XRVALUEREFS
+ MEMALIGNED(16)
+#endif
+SSEVectorType
+{
+ union {
+#if defined(_M_X64) || defined(__x86_64__) || (defined(_M_IX86) && _M_IX86_FP>=2) || (defined(__i386__) && defined(__SSE2__))
+ __m128i vec;
+#endif
+ struct {
+ int i[4];
+ } ints;
+ } data;
+ SSEVectorType() { }
+ SSEVectorType(int a, int b, int c, int d) { data.ints.i[0]=a; data.ints.i[1]=b; data.ints.i[2]=c; data.ints.i[3]=d; }
+ SSEVectorType(const SSEVectorType &) { /* do nothing */}
+#ifdef HAVE_CPP0XRVALUEREFS
+private:
+ SSEVectorType &operator=(const SSEVectorType &);
+public:
+ SSEVectorType(SSEVectorType &&o)
+ {
+#if defined(_M_X64) || defined(__x86_64__) || (defined(_M_IX86) && _M_IX86_FP>=2) || (defined(__i386__) && defined(__SSE2__))
+ data.vec=std::move(o.data.vec);
+#else
+ data.ints=std::move(o.data.ints);
+#endif
+ }
+ SSEVectorType &operator=(SSEVectorType &&o)
+ {
+#if defined(_M_X64) || defined(__x86_64__) || (defined(_M_IX86) && _M_IX86_FP>=2) || (defined(__i386__) && defined(__SSE2__))
+ data.vec=std::move(o.data.vec);
+#else
+ data.ints=std::move(o.data.ints);
+#endif
+ return *this;
+ }
+#endif
+ void checkaddr() const
+ {
+ size_t myaddr=(size_t) this;
+ printf("SSEVectorType lives at %p and contains %u,%u,%u,%u\n", this, data.ints.i[0], data.ints.i[1], data.ints.i[2], data.ints.i[3]);
+ if(myaddr & 15)
+ {
+ printf("SSEVectorType is not 16 byte aligned!\n");
+ abort();
+ }
+ }
+};
+
+// This is an unsigned integer-ish type with a constructor
+// to inhibit POD vector optimisations
+struct UIntish
+{
+ unsigned int value;
+ UIntish() : value(5) { }
+ UIntish(const UIntish &o) : value(o.value) { }
+ UIntish &operator=(const UIntish &o) { value=o.value; return *this; }
+#ifdef HAVE_CPP0XRVALUEREFS
+ UIntish(UIntish &&o) : value(std::move(o.value)) { }
+ UIntish &operator=(UIntish &&o) { value=std::move(o.value); return *this; }
+#endif
+};
+
+template<class vectype> usCount test(const char *desc)
+{
+ usCount time1=GetUsCount();
+ vectype vec1(5000000), vec2(5000000);
+ typename vectype::value_type v;
+ for(int n=0; n<5000000; n++)
+ {
+ vec1.push_back(v);
+ vec2.push_back(v);
+ }
+ usCount time2=GetUsCount();
+ vec1.clear();
+ usCount time3=GetUsCount();
+ vec1=vec2;
+ usCount time4=GetUsCount();
+ vec2.insert(vec2.end(), vec1.begin(), vec1.end());
+ usCount time5=GetUsCount();
+ while(vec1.size()>1)
+ vec1.pop_back();
+ usCount time6=GetUsCount();
+ size_t capacity=vec1.capacity()+vec2.capacity(), size=vec1.size()+vec2.size();
+ printf("%s:\n"
+ " Appending each of 10,000,000 elements: %fms\n"
+ " Clearing 5,000,000 elements: %fms\n"
+ " Assigning 5,000,000 elements: %fms\n"
+ " Appending block of 5,000,000 elements: %fms\n"
+ " Popping 4,999,999 elements: %fms\n"
+ " Overallocation wastage: %f%%\n"
+ "Total time: %fms\n\n", desc,
+ (time2-time1)/1000000000.0, (time3-time2)/1000000000.0, (time4-time3)/1000000000.0, (time5-time4)/1000000000.0, (time6-time5)/1000000000.0,
+ 100.0*(capacity-size)/size,
+ (time6-time1)/1000000000.0);
+ return time6-time1;
+}
+
+int PatchInNedmallocDLL(void);
+int main(void)
+{
+#if defined(WIN32) && defined(USE_NEDMALLOC_DLL)
+ PatchInNedmallocDLL();
+#endif
+
+#ifdef WIN32
+#pragma comment(lib, "user32.lib")
+ { /* Force load of user32.dll so we can debug */
+ BOOL v;
+ SystemParametersInfo(SPI_GETBEEP, 0, &v, 0);
+ }
+#endif
+
+ {
+ /* This is the classic usage scenario: simply give the
+ STL collection class a nedallocator of the same type */
+ vector<int, nedallocator<int> > anyvector1;
+
+
+ /* What if we are allocating SSE/AVX vectors and we
+ need the array always allocated on a 16 byte boundary? */
+ printf("\nUninitialised (may contain random garbage):\n");
+ vector<SSEVectorType, nedallocator<SSEVectorType, nedpolicy::align<16>::policy > > SSEvector1(5);
+ for(vector<SSEVectorType, nedallocator<SSEVectorType, nedpolicy::align<16>::policy > >::const_iterator it=SSEvector1.begin(); it!=SSEvector1.end(); ++it)
+ it->checkaddr();
+
+ /* You can combine an arbitrary number of policies, so
+ the following works as expected. Remember that policies
+ are weakest to the right and strongest to the left, so
+ leftmost policies always override rightmost policies.
+
+ SSEVectorType doesn't initialise nor copy construct its
+ contents in order to make it fast to copy and move around,
+ but this means that on first instantiation it contains
+ random data. Setting the nedpolicy::zero::policy fixes this. */
+ printf("\nInitialised to zero:\n");
+ typedef vector<SSEVectorType, nedallocator<SSEVectorType,
+ nedpolicy::align<16>::policy,
+ nedpolicy::zero<>::policy,
+ nedpolicy::typeIsPOD<true>::policy
+ > > SSEvector2Type;
+ SSEvector2Type SSEvector2(5);
+ for(SSEvector2Type::const_iterator it=SSEvector2.begin(); it!=SSEvector2.end(); ++it)
+ it->checkaddr();
+
+
+ /* What if you just want to allocate one of or a fixed sized
+ array of some type? Sadly we can't use operator new because
+ the C++ spec only allows one global operator delete, so
+ instead we have New<type>(args...).
+
+ <rant mode>THIS IS HOW operator new SHOULD HAVE BEEN
+ IMPLEMENTED IN THE FIRST GOD DAMN PLACE!!!</rant mode>
+ */
+ SSEVectorType *foo1=New<SSEVectorType>(4, 5, 6, 7);
+ Delete(foo1);
+
+ /* You needn't use nedallocator<> if you don't want, you
+ can use ANY STL allocator implementation */
+ SSEVectorType *foo2=New<SSEVectorType, std::allocator<SSEVectorType> >(4, 5, 6, 7);
+ Delete<std::allocator<SSEVectorType> >(foo2);
+
+
+
+ /* Here comes the real magic! Let us try comparing the
+ speeds of various std::vector<> implementations using
+ the UIntish type, an unsigned integer which pretends
+ not to be POD */
+ printf("\nSpeed test:\n");
+ test<vector<unsigned int> >("vector<unsigned int>");
+ test<vector<UIntish> >("vector<UIntish>");
+ test<nedallocatorise<vector, UIntish,
+ nedpolicy::typeIsPOD<true>::policy,
+ nedpolicy::mmap<>::policy,
+ nedpolicy::reserveN<26>::policy // 1<<26 = 64Mb. 10,000,000 * sizeof(unsigned int) = 38Mb.
+ >::value>("nedallocatorise<vector, UIntish, nedpolicy::typeIsPOD<true>>");
+
+ printf("\nPress a key to trim\n");
+ getchar();
+ nedmalloc_trim(0);
+#ifdef _MSC_VER
+ printf("\nPress a key to end\n");
+ getchar();
+#endif
+ }
+ neddestroysyspool();
+ return 0;
+}
diff --git a/test/test1.c b/test/test1.c
new file mode 100644
index 0000000..e37f5a3
--- /dev/null
+++ b/test/test1.c
@@ -0,0 +1,27 @@
+/* test.c
+An example of how to use nedalloc in C
+(C) 2005-2010 Niall Douglas
+*/
+
+#include <stdio.h>
+//#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <nedmalloc.h>
+
+int main(void)
+{
+ printf("\n Test standard malloc");
+// int *p = malloc(2*1024*1024);
+ // if (!p) abort();
+ // printf("\n standard malloc succeed:%p", p);
+ int *n = nedmalloc(2*1024*1024);
+ void *p = nedcalloc(2, 8);
+// if (!n) abort();
+ // printf("\n ned malloc succeed:%p"t n);
+
+ // free(p);
+ nedfree(n);
+ nedfree(p);
+ return 0;
+}
diff --git a/test/test1.cpp b/test/test1.cpp
new file mode 100644
index 0000000..76dba77
--- /dev/null
+++ b/test/test1.cpp
@@ -0,0 +1,30 @@
+/* test.c
+An example of how to use nedalloc in C
+(C) 2005-2010 Niall Douglas
+*/
+
+#include <stdio.h>
+//#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <nedmalloc.h>
+//extern "C" void * nedcalloc(size_t no, size_t size);
+
+int main(void)
+{
+ printf("\n Test standard malloc");
+// int *p = malloc(2*1024*1024);
+ // if (!p) abort();
+ // printf("\n standard malloc succeed:%p", p);
+ // void *n = nedmalloc(2*1024*1024);
+ void *p = nedcalloc(2, 8);
+ void *p1 = ned_dlvalloc(8);
+// if (!n) abort();
+ printf("\n ned malloc succeed:%p", p);
+ printf("\n ned malloc succeed:%p", p1);
+
+ // free(p);
+ // nedfree(n);
+ // nedfree(p);
+ return 0;
+}